|
22 | 22 | from __future__ import annotations |
23 | 23 |
|
24 | 24 | import warnings |
| 25 | +from enum import Enum |
25 | 26 | from typing import ( |
26 | 27 | TYPE_CHECKING, |
27 | 28 | Any, |
|
33 | 34 | overload, |
34 | 35 | ) |
35 | 36 |
|
| 37 | +import pyarrow as pa |
36 | 38 | from typing_extensions import deprecated |
37 | 39 |
|
| 40 | +from datafusion import functions as f |
| 41 | +from datafusion._internal import DataFrame as DataFrameInternal |
| 42 | +from datafusion.expr import Expr, SortExpr, sort_or_default |
38 | 43 | from datafusion.plan import ExecutionPlan, LogicalPlan |
39 | 44 | from datafusion.record_batch import RecordBatchStream |
40 | 45 |
|
41 | | -import pyarrow as pa |
42 | | -from datafusion import functions as f |
43 | | - |
44 | 46 | if TYPE_CHECKING: |
45 | 47 | import pathlib |
46 | 48 | from typing import Callable, Sequence |
47 | 49 |
|
48 | 50 | import pandas as pd |
49 | 51 | import polars as pl |
50 | 52 |
|
51 | | -from enum import Enum |
52 | | - |
53 | | -from datafusion._internal import DataFrame as DataFrameInternal |
54 | | -from datafusion.expr import Expr, SortExpr, sort_or_default |
55 | | - |
56 | 53 |
|
57 | 54 | # excerpt from deltalake |
58 | 55 | # https://github.com/apache/datafusion-python/pull/981#discussion_r1905619163 |
@@ -868,14 +865,14 @@ def fill_null(self, value: Any, subset: list[str] | None = None) -> "DataFrame": |
868 | 865 |
|
869 | 866 | Examples: |
870 | 867 | >>> df = df.fill_null(0) # Fill all nulls with 0 where possible |
871 | | - >>> df = df.fill_null("missing", subset=["name", "category"]) # Fill string columns |
| 868 | + >>> # Fill nulls in specific string columns |
| 869 | + >>> df = df.fill_null("missing", subset=["name", "category"]) |
872 | 870 |
|
873 | 871 | Notes: |
874 | 872 | - Only fills nulls in columns where the value can be cast to the column type |
875 | 873 | - For columns where casting fails, the original column is kept unchanged |
876 | 874 | - For columns not in subset, the original column is kept unchanged |
877 | 875 | """ |
878 | | - |
879 | 876 | # Get columns to process |
880 | 877 | if subset is None: |
881 | 878 | subset = self.schema().names |
@@ -916,23 +913,24 @@ def fill_nan( |
916 | 913 | """Fill NaN values in specified numeric columns with a value. |
917 | 914 |
|
918 | 915 | Args: |
919 | | - value: Numeric value to replace NaN values with |
920 | | - subset: Optional list of column names to fill. If None, fills all numeric columns. |
| 916 | + value: Numeric value to replace NaN values with. |
| 917 | + subset: Optional list of column names to fill. If None, fills all numeric |
| 918 | + columns. |
921 | 919 |
|
922 | 920 | Returns: |
923 | | - DataFrame with NaN values replaced in numeric columns |
| 921 | + DataFrame with NaN values replaced in numeric columns. |
924 | 922 |
|
925 | 923 | Examples: |
926 | 924 | >>> df = df.fill_nan(0) # Fill all NaNs with 0 in numeric columns |
927 | | - >>> df = df.fill_nan(99.9, subset=["price", "score"]) # Fill specific columns |
| 925 | + >>> # Fill NaNs in specific numeric columns |
| 926 | + >>> df = df.fill_nan(99.9, subset=["price", "score"]) |
928 | 927 |
|
929 | 928 | Notes: |
930 | 929 | - Only fills NaN values in numeric columns (float32, float64) |
931 | 930 | - Non-numeric columns are kept unchanged |
932 | 931 | - For columns not in subset, the original column is kept unchanged |
933 | 932 | - Value must be numeric (int or float) |
934 | 933 | """ |
935 | | - |
936 | 934 | if not isinstance(value, (int, float)): |
937 | 935 | raise ValueError("Value must be numeric (int or float)") |
938 | 936 |
|
|
0 commit comments