@@ -184,7 +184,8 @@ def udf(*args: Any, **kwargs: Any): # noqa: D417
184184 This class can be used both as either a function or a decorator.
185185
186186 Usage:
187- - As a function: ``udf(func, input_fields, return_field, volatility, name)``.
187+ - As a function: ``udf(func, input_fields, return_field,
188+ volatility, name)``.
188189 - As a decorator: ``@udf(input_fields, return_field, volatility, name)``.
189190 When used a decorator, do **not** pass ``func`` explicitly.
190191
@@ -209,19 +210,33 @@ def udf(*args: Any, **kwargs: Any): # noqa: D417
209210 A user-defined function that can be used in SQL expressions,
210211 data aggregation, or window function calls.
211212
212- Example: Using ``udf`` as a function::
213+ Examples:
214+ Using ``udf`` as a function:
213215
214- def double_func(x):
215- return x * 2
216- double_udf = udf(double_func, [pa.int32()], pa.int32(),
217- "volatile", "double_it")
216+ >>> import pyarrow as pa
217+ >>> import pyarrow.compute as pc
218+ >>> from datafusion.user_defined import ScalarUDF
219+ >>> def double_func(x):
220+ ... return pc.multiply(x, 2)
221+ >>> double_udf = ScalarUDF.udf(
222+ ... double_func, [pa.int64()], pa.int64(),
223+ ... "volatile", "double_it")
218224
219- Example: Using ``udf`` as a decorator: :
225+ Using ``udf`` as a decorator:
220226
221- @udf([pa.int32()], pa.int32(), "volatile", "double_it")
222- def double_udf(x):
223- return x * 2
224- """ # noqa: W505 E501
227+ >>> @ScalarUDF.udf([pa.int64()], pa.int64(), "volatile")
228+ ... def decorator_double_udf(x):
229+ ... return pc.multiply(x, 3)
230+
231+ Apply to a dataframe:
232+
233+ >>> ctx = dfn.SessionContext()
234+ >>> df = ctx.from_pydict({"x": [1, 2, 3]})
235+ >>> df.select(double_udf(col("x")).alias("result")).to_pydict()
236+ {'result': [2, 4, 6]}
237+ >>> df.select(decorator_double_udf(col("x")).alias("result")).to_pydict()
238+ {'result': [3, 6, 9]}
239+ """
225240
226241 def _function (
227242 func : Callable [..., _R ],
@@ -458,48 +473,72 @@ def udaf(*args: Any, **kwargs: Any): # noqa: D417, C901
458473 - As a decorator: ``@udaf(input_types, return_type, state_type, volatility, name)``.
459474 When using ``udaf`` as a decorator, do not pass ``accum`` explicitly.
460475
461- Function example:
462-
463476 If your :py:class:`Accumulator` can be instantiated with no arguments, you
464- can simply pass it's type as `accum`. If you need to pass additional
465- arguments to it's constructor, you can define a lambda or a factory method.
477+ can simply pass its type as `` accum` `. If you need to pass additional
478+ arguments to its constructor, you can define a lambda or a factory method.
466479 During runtime the :py:class:`Accumulator` will be constructed for every
467- instance in which this UDAF is used. The following examples are all valid::
468-
469- import pyarrow as pa
470- import pyarrow.compute as pc
471-
472- class Summarize(Accumulator):
473- def __init__(self, bias: float = 0.0):
474- self._sum = pa.scalar(bias)
475-
476- def state(self) -> list[pa.Scalar]:
477- return [self._sum]
478-
479- def update(self, values: pa.Array) -> None:
480- self._sum = pa.scalar(self._sum.as_py() + pc.sum(values).as_py())
481-
482- def merge(self, states: list[pa.Array]) -> None:
483- self._sum = pa.scalar(self._sum.as_py() + pc.sum(states[0]).as_py())
484-
485- def evaluate(self) -> pa.Scalar:
486- return self._sum
487-
488- def sum_bias_10() -> Summarize:
489- return Summarize(10.0)
490-
491- udaf1 = udaf(Summarize, pa.float64(), pa.float64(), [pa.float64()],
492- "immutable")
493- udaf2 = udaf(sum_bias_10, pa.float64(), pa.float64(), [pa.float64()],
494- "immutable")
495- udaf3 = udaf(lambda: Summarize(20.0), pa.float64(), pa.float64(),
496- [pa.float64()], "immutable")
497-
498- Decorator example:::
499-
500- @udaf(pa.float64(), pa.float64(), [pa.float64()], "immutable")
501- def udf4() -> Summarize:
502- return Summarize(10.0)
480+ instance in which this UDAF is used.
481+
482+ Examples:
483+ >>> import pyarrow as pa
484+ >>> import pyarrow.compute as pc
485+ >>> from datafusion.user_defined import AggregateUDF, Accumulator, udaf
486+ >>> class Summarize(Accumulator):
487+ ... def __init__(self, bias: float = 0.0):
488+ ... self._sum = pa.scalar(bias)
489+ ... def state(self):
490+ ... return [self._sum]
491+ ... def update(self, values):
492+ ... self._sum = pa.scalar(
493+ ... self._sum.as_py() + pc.sum(values).as_py())
494+ ... def merge(self, states):
495+ ... self._sum = pa.scalar(
496+ ... self._sum.as_py() + pc.sum(states[0]).as_py())
497+ ... def evaluate(self):
498+ ... return self._sum
499+
500+ Using ``udaf`` as a function:
501+
502+ >>> udaf1 = AggregateUDF.udaf(
503+ ... Summarize, pa.float64(), pa.float64(),
504+ ... [pa.float64()], "immutable")
505+
506+ Wrapping ``udaf`` with a function:
507+
508+ >>> def sum_bias_10() -> Summarize:
509+ ... return Summarize(10.0)
510+ >>> udaf2 = udaf(sum_bias_10, pa.float64(), pa.float64(), [pa.float64()],
511+ ... "immutable")
512+
513+ Using ``udaf`` with lambda:
514+
515+ >>> udaf3 = udaf(lambda: Summarize(20.0), pa.float64(), pa.float64(),
516+ ... [pa.float64()], "immutable")
517+
518+ Using ``udaf`` as a decorator:
519+
520+ >>> @AggregateUDF.udaf(
521+ ... pa.float64(), pa.float64(),
522+ ... [pa.float64()], "immutable")
523+ ... def udaf4():
524+ ... return Summarize(10.0)
525+
526+ Apply to a dataframe:
527+
528+ >>> ctx = dfn.SessionContext()
529+ >>> df = ctx.from_pydict({"a": [1.0, 2.0, 3.0]})
530+ >>> df.aggregate([], [udaf1(col("a")).alias("total")]).collect_column(
531+ ... "total")[0].as_py()
532+ 6.0
533+ >>> df.aggregate([], [udaf2(col("a")).alias("total")]).collect_column(
534+ ... "total")[0].as_py()
535+ 16.0
536+ >>> df.aggregate([], [udaf3(col("a")).alias("total")]).collect_column(
537+ ... "total")[0].as_py()
538+ 26.0
539+ >>> df.aggregate([], [udaf4(col("a")).alias("total")]).collect_column(
540+ ... "total")[0].as_py()
541+ 16.0
503542
504543 Args:
505544 accum: The accumulator python function. Only needed when calling as a
@@ -834,31 +873,45 @@ def udwf(*args: Any, **kwargs: Any): # noqa: D417
834873 - As a decorator: ``@udwf(input_types, return_type, volatility, name)``.
835874 When using ``udwf`` as a decorator, do not pass ``func`` explicitly.
836875
837- Function example::
838-
839- import pyarrow as pa
840-
841- class BiasedNumbers(WindowEvaluator):
842- def __init__(self, start: int = 0) -> None:
843- self.start = start
844-
845- def evaluate_all(self, values: list[pa.Array],
846- num_rows: int) -> pa.Array:
847- return pa.array([self.start + i for i in range(num_rows)])
848-
849- def bias_10() -> BiasedNumbers:
850- return BiasedNumbers(10)
851-
852- udwf1 = udwf(BiasedNumbers, pa.int64(), pa.int64(), "immutable")
853- udwf2 = udwf(bias_10, pa.int64(), pa.int64(), "immutable")
854- udwf3 = udwf(lambda: BiasedNumbers(20), pa.int64(), pa.int64(), "immutable")
855-
856-
857- Decorator example::
858-
859- @udwf(pa.int64(), pa.int64(), "immutable")
860- def biased_numbers() -> BiasedNumbers:
861- return BiasedNumbers(10)
876+ Examples:
877+ >>> import pyarrow as pa
878+ >>> from datafusion.user_defined import WindowUDF, WindowEvaluator, udwf
879+ >>> class BiasedNumbers(WindowEvaluator):
880+ ... def __init__(self, start: int = 0):
881+ ... self.start = start
882+ ... def evaluate_all(self, values, num_rows):
883+ ... return pa.array(
884+ ... [self.start + i for i in range(num_rows)])
885+
886+ Using ``udwf`` as a function:
887+
888+ >>> udwf1 = WindowUDF.udwf(
889+ ... BiasedNumbers, pa.int64(), pa.int64(), "immutable")
890+ >>> def bias_10() -> BiasedNumbers:
891+ ... return BiasedNumbers(10)
892+ >>> udwf2 = udwf(bias_10, pa.int64(), pa.int64(), "immutable")
893+ >>> udwf3 = udwf(
894+ ... lambda: BiasedNumbers(20), pa.int64(), pa.int64(), "immutable"
895+ ... )
896+
897+ Using ``udwf`` as a decorator:
898+
899+ >>> @WindowUDF.udwf(pa.int64(), pa.int64(), "immutable")
900+ ... def biased_numbers():
901+ ... return BiasedNumbers(10)
902+
903+ Apply to a dataframe:
904+
905+ >>> ctx = dfn.SessionContext()
906+ >>> df = ctx.from_pydict({"a": [10, 20, 30]})
907+ >>> df.select(udwf1(col("a")).alias("result")).to_pydict()
908+ {'result': [0, 1, 2]}
909+ >>> df.select(udwf2(col("a")).alias("result")).to_pydict()
910+ {'result': [10, 11, 12]}
911+ >>> df.select(udwf3(col("a")).alias("result")).to_pydict()
912+ {'result': [20, 21, 22]}
913+ >>> df.select(biased_numbers(col("a")).alias("result")).to_pydict()
914+ {'result': [10, 11, 12]}
862915
863916 Args:
864917 func: Only needed when calling as a function. Skip this argument when
0 commit comments