Convert dataframe examples to doctestable. Found bug in dropping A

ntjohnson1 · ntjohnson1 · commit a322d9622e46 · 2026-03-19T10:38:11.000-04:00
diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py
@@ -331,7 +331,7 @@ def into_view(self, temporary: bool = False) -> Table:
             >>> result[0].column("value").to_pylist()
             [1]
         """
-        from datafusion.catalog import Table as _Table
+        from datafusion.catalog import Table as _Table  # noqa: PLC0415
 
         return _Table(self.df.into_view(temporary))
 
@@ -451,9 +451,20 @@ def drop(self, *columns: str) -> DataFrame:
         Returns:
             DataFrame with those columns removed in the projection.
 
-        Example Usage::
-            df.drop('a')                    # To drop a lower-cased column 'a'
-            df.drop('"a"')                  # To drop an upper-cased column 'A'
+        Examples:
+            To drop a lower-cased column 'a'
+
+            >>> ctx = dfn.SessionContext()
+            >>> df = ctx.from_pydict({"a": [1, 2], "b": [3, 4]})
+            >>> df.drop("a").schema().names
+            ['b']
+
+            Or to drop an upper-cased column 'A'
+
+            >>> ctx = dfn.SessionContext()
+            >>> df = ctx.from_pydict({"A": [1, 2], "b": [3, 4]})
+            >>> df.drop('"A"').schema().names
+            ['b']
         """
         return DataFrame(self.df.drop(*columns))
 
@@ -468,11 +479,13 @@ def filter(self, *predicates: Expr | str) -> DataFrame:
         that will be parsed against the DataFrame schema. If more complex logic is
         required, see the logical operations in :py:mod:`~datafusion.functions`.
 
-        Example::
-
-            from datafusion import col, lit
-            df.filter(col("a") > lit(1))
-            df.filter("a > 1")
+        Examples:
+            >>> ctx = dfn.SessionContext()
+            >>> df = ctx.from_pydict({"a": [1, 2, 3]})
+            >>> df.filter(col("a") > lit(1)).to_pydict()
+            {'a': [2, 3]}
+            >>> df.filter("a > 1").to_pydict()
+            {'a': [2, 3]}
 
         Args:
             predicates: Predicate expression(s) or SQL strings to filter the DataFrame.
@@ -495,14 +508,12 @@ def parse_sql_expr(self, expr: str) -> Expr:
 
         The expression is created and processed against the current schema.
 
-        Example::
-
-            from datafusion import col, lit
-            df.parse_sql_expr("a > 1")
-
-            should produce:
-
-            col("a") > lit(1)
+        Examples:
+            >>> ctx = dfn.SessionContext()
+            >>> df = ctx.from_pydict({"a": [1, 2, 3]})
+            >>> expr = df.parse_sql_expr("a > 1")
+            >>> df.filter(expr).to_pydict()
+            {'a': [2, 3]}
 
         Args:
             expr: Expression string to be converted to datafusion expression
@@ -519,10 +530,11 @@ def with_column(self, name: str, expr: Expr | str) -> DataFrame:
         :func:`datafusion.col` or :func:`datafusion.lit`, or a SQL expression
         string that will be parsed against the DataFrame schema.
 
-        Example::
-
-            from datafusion import col, lit
-            df.with_column("b", col("a") + lit(1))
+        Examples:
+            >>> ctx = dfn.SessionContext()
+            >>> df = ctx.from_pydict({"a": [1, 2]})
+            >>> df.with_column("b", col("a") + lit(10)).to_pydict()
+            {'a': [1, 2], 'b': [11, 12]}
 
         Args:
             name: Name of the column to add.
@@ -885,10 +897,14 @@ def join_on(
         built with :func:`datafusion.col`. On expressions are used to support
         in-equality predicates. Equality predicates are correctly optimized.
 
-        Example::
-
-            from datafusion import col
-            df.join_on(other_df, col("id") == col("other_id"))
+        Examples:
+            >>> ctx = dfn.SessionContext()
+            >>> left = ctx.from_pydict({"a": [1, 2], "x": ["a", "b"]})
+            >>> right = ctx.from_pydict({"b": [1, 2], "y": ["c", "d"]})
+            >>> left.join_on(
+            ...     right, col("a") == col("b")
+            ... ).sort(col("x")).to_pydict()
+            {'a': [1, 2], 'x': ['a', 'b'], 'b': [1, 2], 'y': ['c', 'd']}
 
         Args:
             right: Other DataFrame to join with.
@@ -1350,15 +1366,17 @@ def __aiter__(self) -> AsyncIterator[RecordBatch]:
     def transform(self, func: Callable[..., DataFrame], *args: Any) -> DataFrame:
         """Apply a function to the current DataFrame which returns another DataFrame.
 
-        This is useful for chaining together multiple functions. For example::
-
-            def add_3(df: DataFrame) -> DataFrame:
-                return df.with_column("modified", lit(3))
+        This is useful for chaining together multiple functions.
 
-            def within_limit(df: DataFrame, limit: int) -> DataFrame:
-                return df.filter(col("a") < lit(limit)).distinct()
-
-            df = df.transform(modify_df).transform(within_limit, 4)
+        Examples:
+            >>> ctx = dfn.SessionContext()
+            >>> df = ctx.from_pydict({"a": [1, 2, 3]})
+            >>> def add_3(df):
+            ...     return df.with_column("modified", dfn.lit(3))
+            >>> def within_limit(df: DataFrame, limit: int) -> DataFrame:
+            ...     return df.filter(col("a") < lit(limit)).distinct()
+            >>> df.transform(add_3).transform(within_limit, 4).sort("a").to_pydict()
+            {'a': [1, 2, 3], 'modified': [3, 3, 3]}
 
         Args:
             func: A callable function that takes a DataFrame as it's first argument