|
22 | 22 | from __future__ import annotations
|
23 | 23 |
|
24 | 24 | import warnings
|
| 25 | +from enum import Enum |
25 | 26 | from typing import (
|
26 | 27 | TYPE_CHECKING,
|
27 | 28 | Any,
|
|
33 | 34 | overload,
|
34 | 35 | )
|
35 | 36 |
|
| 37 | +import pyarrow as pa |
36 | 38 | from typing_extensions import deprecated
|
37 | 39 |
|
| 40 | +from datafusion import functions as f |
| 41 | +from datafusion._internal import DataFrame as DataFrameInternal |
| 42 | +from datafusion.expr import Expr, SortExpr, sort_or_default |
38 | 43 | from datafusion.plan import ExecutionPlan, LogicalPlan
|
39 | 44 | from datafusion.record_batch import RecordBatchStream
|
40 | 45 |
|
41 |
| -import pyarrow as pa |
42 |
| -from datafusion import functions as f |
43 |
| - |
44 | 46 | if TYPE_CHECKING:
|
45 | 47 | import pathlib
|
46 | 48 | from typing import Callable, Sequence
|
47 | 49 |
|
48 | 50 | import pandas as pd
|
49 | 51 | import polars as pl
|
50 | 52 |
|
51 |
| -from enum import Enum |
52 |
| - |
53 |
| -from datafusion._internal import DataFrame as DataFrameInternal |
54 |
| -from datafusion.expr import Expr, SortExpr, sort_or_default |
55 |
| - |
56 | 53 |
|
57 | 54 | # excerpt from deltalake
|
58 | 55 | # https://github.com/apache/datafusion-python/pull/981#discussion_r1905619163
|
@@ -868,14 +865,14 @@ def fill_null(self, value: Any, subset: list[str] | None = None) -> "DataFrame":
|
868 | 865 |
|
869 | 866 | Examples:
|
870 | 867 | >>> df = df.fill_null(0) # Fill all nulls with 0 where possible
|
871 |
| - >>> df = df.fill_null("missing", subset=["name", "category"]) # Fill string columns |
| 868 | + >>> # Fill nulls in specific string columns |
| 869 | + >>> df = df.fill_null("missing", subset=["name", "category"]) |
872 | 870 |
|
873 | 871 | Notes:
|
874 | 872 | - Only fills nulls in columns where the value can be cast to the column type
|
875 | 873 | - For columns where casting fails, the original column is kept unchanged
|
876 | 874 | - For columns not in subset, the original column is kept unchanged
|
877 | 875 | """
|
878 |
| - |
879 | 876 | # Get columns to process
|
880 | 877 | if subset is None:
|
881 | 878 | subset = self.schema().names
|
@@ -916,23 +913,24 @@ def fill_nan(
|
916 | 913 | """Fill NaN values in specified numeric columns with a value.
|
917 | 914 |
|
918 | 915 | Args:
|
919 |
| - value: Numeric value to replace NaN values with |
920 |
| - subset: Optional list of column names to fill. If None, fills all numeric columns. |
| 916 | + value: Numeric value to replace NaN values with. |
| 917 | + subset: Optional list of column names to fill. If None, fills all numeric |
| 918 | + columns. |
921 | 919 |
|
922 | 920 | Returns:
|
923 |
| - DataFrame with NaN values replaced in numeric columns |
| 921 | + DataFrame with NaN values replaced in numeric columns. |
924 | 922 |
|
925 | 923 | Examples:
|
926 | 924 | >>> df = df.fill_nan(0) # Fill all NaNs with 0 in numeric columns
|
927 |
| - >>> df = df.fill_nan(99.9, subset=["price", "score"]) # Fill specific columns |
| 925 | + >>> # Fill NaNs in specific numeric columns |
| 926 | + >>> df = df.fill_nan(99.9, subset=["price", "score"]) |
928 | 927 |
|
929 | 928 | Notes:
|
930 | 929 | - Only fills NaN values in numeric columns (float32, float64)
|
931 | 930 | - Non-numeric columns are kept unchanged
|
932 | 931 | - For columns not in subset, the original column is kept unchanged
|
933 | 932 | - Value must be numeric (int or float)
|
934 | 933 | """
|
935 |
| - |
936 | 934 | if not isinstance(value, (int, float)):
|
937 | 935 | raise ValueError("Value must be numeric (int or float)")
|
938 | 936 |
|
|
0 commit comments