Skip to content

Commit 0e746a5

Browse files
committed
Move user_defined.py to doctestable examples
1 parent 0bcfbce commit 0e746a5

File tree

1 file changed

+129
-76
lines changed

1 file changed

+129
-76
lines changed

python/datafusion/user_defined.py

Lines changed: 129 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,8 @@ def udf(*args: Any, **kwargs: Any): # noqa: D417
184184
This class can be used both as either a function or a decorator.
185185
186186
Usage:
187-
- As a function: ``udf(func, input_fields, return_field, volatility, name)``.
187+
- As a function: ``udf(func, input_fields, return_field,
188+
volatility, name)``.
188189
- As a decorator: ``@udf(input_fields, return_field, volatility, name)``.
189190
When used a decorator, do **not** pass ``func`` explicitly.
190191
@@ -209,19 +210,33 @@ def udf(*args: Any, **kwargs: Any): # noqa: D417
209210
A user-defined function that can be used in SQL expressions,
210211
data aggregation, or window function calls.
211212
212-
Example: Using ``udf`` as a function::
213+
Examples:
214+
Using ``udf`` as a function:
213215
214-
def double_func(x):
215-
return x * 2
216-
double_udf = udf(double_func, [pa.int32()], pa.int32(),
217-
"volatile", "double_it")
216+
>>> import pyarrow as pa
217+
>>> import pyarrow.compute as pc
218+
>>> from datafusion.user_defined import ScalarUDF
219+
>>> def double_func(x):
220+
... return pc.multiply(x, 2)
221+
>>> double_udf = ScalarUDF.udf(
222+
... double_func, [pa.int64()], pa.int64(),
223+
... "volatile", "double_it")
218224
219-
Example: Using ``udf`` as a decorator::
225+
Using ``udf`` as a decorator:
220226
221-
@udf([pa.int32()], pa.int32(), "volatile", "double_it")
222-
def double_udf(x):
223-
return x * 2
224-
""" # noqa: W505 E501
227+
>>> @ScalarUDF.udf([pa.int64()], pa.int64(), "volatile")
228+
... def decorator_double_udf(x):
229+
... return pc.multiply(x, 3)
230+
231+
Apply to a dataframe:
232+
233+
>>> ctx = dfn.SessionContext()
234+
>>> df = ctx.from_pydict({"x": [1, 2, 3]})
235+
>>> df.select(double_udf(col("x")).alias("result")).to_pydict()
236+
{'result': [2, 4, 6]}
237+
>>> df.select(decorator_double_udf(col("x")).alias("result")).to_pydict()
238+
{'result': [3, 6, 9]}
239+
"""
225240

226241
def _function(
227242
func: Callable[..., _R],
@@ -458,48 +473,72 @@ def udaf(*args: Any, **kwargs: Any): # noqa: D417, C901
458473
- As a decorator: ``@udaf(input_types, return_type, state_type, volatility, name)``.
459474
When using ``udaf`` as a decorator, do not pass ``accum`` explicitly.
460475
461-
Function example:
462-
463476
If your :py:class:`Accumulator` can be instantiated with no arguments, you
464-
can simply pass it's type as `accum`. If you need to pass additional
465-
arguments to it's constructor, you can define a lambda or a factory method.
477+
can simply pass its type as ``accum``. If you need to pass additional
478+
arguments to its constructor, you can define a lambda or a factory method.
466479
During runtime the :py:class:`Accumulator` will be constructed for every
467-
instance in which this UDAF is used. The following examples are all valid::
468-
469-
import pyarrow as pa
470-
import pyarrow.compute as pc
471-
472-
class Summarize(Accumulator):
473-
def __init__(self, bias: float = 0.0):
474-
self._sum = pa.scalar(bias)
475-
476-
def state(self) -> list[pa.Scalar]:
477-
return [self._sum]
478-
479-
def update(self, values: pa.Array) -> None:
480-
self._sum = pa.scalar(self._sum.as_py() + pc.sum(values).as_py())
481-
482-
def merge(self, states: list[pa.Array]) -> None:
483-
self._sum = pa.scalar(self._sum.as_py() + pc.sum(states[0]).as_py())
484-
485-
def evaluate(self) -> pa.Scalar:
486-
return self._sum
487-
488-
def sum_bias_10() -> Summarize:
489-
return Summarize(10.0)
490-
491-
udaf1 = udaf(Summarize, pa.float64(), pa.float64(), [pa.float64()],
492-
"immutable")
493-
udaf2 = udaf(sum_bias_10, pa.float64(), pa.float64(), [pa.float64()],
494-
"immutable")
495-
udaf3 = udaf(lambda: Summarize(20.0), pa.float64(), pa.float64(),
496-
[pa.float64()], "immutable")
497-
498-
Decorator example:::
499-
500-
@udaf(pa.float64(), pa.float64(), [pa.float64()], "immutable")
501-
def udf4() -> Summarize:
502-
return Summarize(10.0)
480+
instance in which this UDAF is used.
481+
482+
Examples:
483+
>>> import pyarrow as pa
484+
>>> import pyarrow.compute as pc
485+
>>> from datafusion.user_defined import AggregateUDF, Accumulator, udaf
486+
>>> class Summarize(Accumulator):
487+
... def __init__(self, bias: float = 0.0):
488+
... self._sum = pa.scalar(bias)
489+
... def state(self):
490+
... return [self._sum]
491+
... def update(self, values):
492+
... self._sum = pa.scalar(
493+
... self._sum.as_py() + pc.sum(values).as_py())
494+
... def merge(self, states):
495+
... self._sum = pa.scalar(
496+
... self._sum.as_py() + pc.sum(states[0]).as_py())
497+
... def evaluate(self):
498+
... return self._sum
499+
500+
Using ``udaf`` as a function:
501+
502+
>>> udaf1 = AggregateUDF.udaf(
503+
... Summarize, pa.float64(), pa.float64(),
504+
... [pa.float64()], "immutable")
505+
506+
Wrapping ``udaf`` with a function:
507+
508+
>>> def sum_bias_10() -> Summarize:
509+
... return Summarize(10.0)
510+
>>> udaf2 = udaf(sum_bias_10, pa.float64(), pa.float64(), [pa.float64()],
511+
... "immutable")
512+
513+
Using ``udaf`` with lambda:
514+
515+
>>> udaf3 = udaf(lambda: Summarize(20.0), pa.float64(), pa.float64(),
516+
... [pa.float64()], "immutable")
517+
518+
Using ``udaf`` as a decorator:
519+
520+
>>> @AggregateUDF.udaf(
521+
... pa.float64(), pa.float64(),
522+
... [pa.float64()], "immutable")
523+
... def udaf4():
524+
... return Summarize(10.0)
525+
526+
Apply to a dataframe:
527+
528+
>>> ctx = dfn.SessionContext()
529+
>>> df = ctx.from_pydict({"a": [1.0, 2.0, 3.0]})
530+
>>> df.aggregate([], [udaf1(col("a")).alias("total")]).collect_column(
531+
... "total")[0].as_py()
532+
6.0
533+
>>> df.aggregate([], [udaf2(col("a")).alias("total")]).collect_column(
534+
... "total")[0].as_py()
535+
16.0
536+
>>> df.aggregate([], [udaf3(col("a")).alias("total")]).collect_column(
537+
... "total")[0].as_py()
538+
26.0
539+
>>> df.aggregate([], [udaf4(col("a")).alias("total")]).collect_column(
540+
... "total")[0].as_py()
541+
16.0
503542
504543
Args:
505544
accum: The accumulator python function. Only needed when calling as a
@@ -834,31 +873,45 @@ def udwf(*args: Any, **kwargs: Any): # noqa: D417
834873
- As a decorator: ``@udwf(input_types, return_type, volatility, name)``.
835874
When using ``udwf`` as a decorator, do not pass ``func`` explicitly.
836875
837-
Function example::
838-
839-
import pyarrow as pa
840-
841-
class BiasedNumbers(WindowEvaluator):
842-
def __init__(self, start: int = 0) -> None:
843-
self.start = start
844-
845-
def evaluate_all(self, values: list[pa.Array],
846-
num_rows: int) -> pa.Array:
847-
return pa.array([self.start + i for i in range(num_rows)])
848-
849-
def bias_10() -> BiasedNumbers:
850-
return BiasedNumbers(10)
851-
852-
udwf1 = udwf(BiasedNumbers, pa.int64(), pa.int64(), "immutable")
853-
udwf2 = udwf(bias_10, pa.int64(), pa.int64(), "immutable")
854-
udwf3 = udwf(lambda: BiasedNumbers(20), pa.int64(), pa.int64(), "immutable")
855-
856-
857-
Decorator example::
858-
859-
@udwf(pa.int64(), pa.int64(), "immutable")
860-
def biased_numbers() -> BiasedNumbers:
861-
return BiasedNumbers(10)
876+
Examples:
877+
>>> import pyarrow as pa
878+
>>> from datafusion.user_defined import WindowUDF, WindowEvaluator, udwf
879+
>>> class BiasedNumbers(WindowEvaluator):
880+
... def __init__(self, start: int = 0):
881+
... self.start = start
882+
... def evaluate_all(self, values, num_rows):
883+
... return pa.array(
884+
... [self.start + i for i in range(num_rows)])
885+
886+
Using ``udwf`` as a function:
887+
888+
>>> udwf1 = WindowUDF.udwf(
889+
... BiasedNumbers, pa.int64(), pa.int64(), "immutable")
890+
>>> def bias_10() -> BiasedNumbers:
891+
... return BiasedNumbers(10)
892+
>>> udwf2 = udwf(bias_10, pa.int64(), pa.int64(), "immutable")
893+
>>> udwf3 = udwf(
894+
... lambda: BiasedNumbers(20), pa.int64(), pa.int64(), "immutable"
895+
... )
896+
897+
Using ``udwf`` as a decorator:
898+
899+
>>> @WindowUDF.udwf(pa.int64(), pa.int64(), "immutable")
900+
... def biased_numbers():
901+
... return BiasedNumbers(10)
902+
903+
Apply to a dataframe:
904+
905+
>>> ctx = dfn.SessionContext()
906+
>>> df = ctx.from_pydict({"a": [10, 20, 30]})
907+
>>> df.select(udwf1(col("a")).alias("result")).to_pydict()
908+
{'result': [0, 1, 2]}
909+
>>> df.select(udwf2(col("a")).alias("result")).to_pydict()
910+
{'result': [10, 11, 12]}
911+
>>> df.select(udwf3(col("a")).alias("result")).to_pydict()
912+
{'result': [20, 21, 22]}
913+
>>> df.select(biased_numbers(col("a")).alias("result")).to_pydict()
914+
{'result': [10, 11, 12]}
862915
863916
Args:
864917
func: Only needed when calling as a function. Skip this argument when

0 commit comments

Comments
 (0)