Skip to content

Commit a322d96

Browse files
committed
Convert dataframe examples to doctestable. Found bug in dropping A
1 parent 2ed355b commit a322d96

File tree

1 file changed

+51
-33
lines changed

1 file changed

+51
-33
lines changed

python/datafusion/dataframe.py

Lines changed: 51 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ def into_view(self, temporary: bool = False) -> Table:
331331
>>> result[0].column("value").to_pylist()
332332
[1]
333333
"""
334-
from datafusion.catalog import Table as _Table
334+
from datafusion.catalog import Table as _Table # noqa: PLC0415
335335

336336
return _Table(self.df.into_view(temporary))
337337

@@ -451,9 +451,20 @@ def drop(self, *columns: str) -> DataFrame:
451451
Returns:
452452
DataFrame with those columns removed in the projection.
453453
454-
Example Usage::
455-
df.drop('a') # To drop a lower-cased column 'a'
456-
df.drop('"a"') # To drop an upper-cased column 'A'
454+
Examples:
455+
To drop a lower-cased column 'a'
456+
457+
>>> ctx = dfn.SessionContext()
458+
>>> df = ctx.from_pydict({"a": [1, 2], "b": [3, 4]})
459+
>>> df.drop("a").schema().names
460+
['b']
461+
462+
Or to drop an upper-cased column 'A'
463+
464+
>>> ctx = dfn.SessionContext()
465+
>>> df = ctx.from_pydict({"A": [1, 2], "b": [3, 4]})
466+
>>> df.drop('"A"').schema().names
467+
['b']
457468
"""
458469
return DataFrame(self.df.drop(*columns))
459470

@@ -468,11 +479,13 @@ def filter(self, *predicates: Expr | str) -> DataFrame:
468479
that will be parsed against the DataFrame schema. If more complex logic is
469480
required, see the logical operations in :py:mod:`~datafusion.functions`.
470481
471-
Example::
472-
473-
from datafusion import col, lit
474-
df.filter(col("a") > lit(1))
475-
df.filter("a > 1")
482+
Examples:
483+
>>> ctx = dfn.SessionContext()
484+
>>> df = ctx.from_pydict({"a": [1, 2, 3]})
485+
>>> df.filter(col("a") > lit(1)).to_pydict()
486+
{'a': [2, 3]}
487+
>>> df.filter("a > 1").to_pydict()
488+
{'a': [2, 3]}
476489
477490
Args:
478491
predicates: Predicate expression(s) or SQL strings to filter the DataFrame.
@@ -495,14 +508,12 @@ def parse_sql_expr(self, expr: str) -> Expr:
495508
496509
The expression is created and processed against the current schema.
497510
498-
Example::
499-
500-
from datafusion import col, lit
501-
df.parse_sql_expr("a > 1")
502-
503-
should produce:
504-
505-
col("a") > lit(1)
511+
Examples:
512+
>>> ctx = dfn.SessionContext()
513+
>>> df = ctx.from_pydict({"a": [1, 2, 3]})
514+
>>> expr = df.parse_sql_expr("a > 1")
515+
>>> df.filter(expr).to_pydict()
516+
{'a': [2, 3]}
506517
507518
Args:
508519
expr: Expression string to be converted to datafusion expression
@@ -519,10 +530,11 @@ def with_column(self, name: str, expr: Expr | str) -> DataFrame:
519530
:func:`datafusion.col` or :func:`datafusion.lit`, or a SQL expression
520531
string that will be parsed against the DataFrame schema.
521532
522-
Example::
523-
524-
from datafusion import col, lit
525-
df.with_column("b", col("a") + lit(1))
533+
Examples:
534+
>>> ctx = dfn.SessionContext()
535+
>>> df = ctx.from_pydict({"a": [1, 2]})
536+
>>> df.with_column("b", col("a") + lit(10)).to_pydict()
537+
{'a': [1, 2], 'b': [11, 12]}
526538
527539
Args:
528540
name: Name of the column to add.
@@ -885,10 +897,14 @@ def join_on(
885897
built with :func:`datafusion.col`. On expressions are used to support
886898
in-equality predicates. Equality predicates are correctly optimized.
887899
888-
Example::
889-
890-
from datafusion import col
891-
df.join_on(other_df, col("id") == col("other_id"))
900+
Examples:
901+
>>> ctx = dfn.SessionContext()
902+
>>> left = ctx.from_pydict({"a": [1, 2], "x": ["a", "b"]})
903+
>>> right = ctx.from_pydict({"b": [1, 2], "y": ["c", "d"]})
904+
>>> left.join_on(
905+
... right, col("a") == col("b")
906+
... ).sort(col("x")).to_pydict()
907+
{'a': [1, 2], 'x': ['a', 'b'], 'b': [1, 2], 'y': ['c', 'd']}
892908
893909
Args:
894910
right: Other DataFrame to join with.
@@ -1350,15 +1366,17 @@ def __aiter__(self) -> AsyncIterator[RecordBatch]:
13501366
def transform(self, func: Callable[..., DataFrame], *args: Any) -> DataFrame:
13511367
"""Apply a function to the current DataFrame which returns another DataFrame.
13521368
1353-
This is useful for chaining together multiple functions. For example::
1354-
1355-
def add_3(df: DataFrame) -> DataFrame:
1356-
return df.with_column("modified", lit(3))
1369+
This is useful for chaining together multiple functions.
13571370
1358-
def within_limit(df: DataFrame, limit: int) -> DataFrame:
1359-
return df.filter(col("a") < lit(limit)).distinct()
1360-
1361-
df = df.transform(modify_df).transform(within_limit, 4)
1371+
Examples:
1372+
>>> ctx = dfn.SessionContext()
1373+
>>> df = ctx.from_pydict({"a": [1, 2, 3]})
1374+
>>> def add_3(df):
1375+
... return df.with_column("modified", dfn.lit(3))
1376+
>>> def within_limit(df: DataFrame, limit: int) -> DataFrame:
1377+
... return df.filter(col("a") < lit(limit)).distinct()
1378+
>>> df.transform(add_3).transform(within_limit, 4).sort("a").to_pydict()
1379+
{'a': [1, 2, 3], 'modified': [3, 3, 3]}
13621380
13631381
Args:
13641382
func: A callable function that takes a DataFrame as it's first argument

0 commit comments

Comments
 (0)