38
38
from datafusion .plan import ExecutionPlan , LogicalPlan
39
39
from datafusion .record_batch import RecordBatchStream
40
40
41
+ import pyarrow as pa
42
+ from datafusion import functions as f
41
43
42
44
if TYPE_CHECKING :
43
45
import pathlib
44
46
from typing import Callable , Sequence
45
47
46
48
import pandas as pd
47
49
import polars as pl
48
- import pyarrow as pa
49
50
50
51
from enum import Enum
51
52
@@ -874,8 +875,6 @@ def fill_null(self, value: Any, subset: list[str] | None = None) -> "DataFrame":
874
875
- For columns where casting fails, the original column is kept unchanged
875
876
- For columns not in subset, the original column is kept unchanged
876
877
"""
877
- import pyarrow as pa
878
- from datafusion import functions as f
879
878
880
879
# Get columns to process
881
880
if subset is None :
@@ -910,38 +909,39 @@ def fill_null(self, value: Any, subset: list[str] | None = None) -> "DataFrame":
910
909
exprs .append (f .col (col_name ))
911
910
912
911
return self .select (* exprs )
913
-
914
- def fill_nan (self , value : float | int , subset : list [str ] | None = None ) -> "DataFrame" :
912
+
913
+ def fill_nan (
914
+ self , value : float | int , subset : list [str ] | None = None
915
+ ) -> "DataFrame" :
915
916
"""Fill NaN values in specified numeric columns with a value.
916
-
917
+
917
918
Args:
918
919
value: Numeric value to replace NaN values with
919
920
subset: Optional list of column names to fill. If None, fills all numeric columns.
920
-
921
+
921
922
Returns:
922
923
DataFrame with NaN values replaced in numeric columns
923
-
924
+
924
925
Examples:
925
926
>>> df = df.fill_nan(0) # Fill all NaNs with 0 in numeric columns
926
927
>>> df = df.fill_nan(99.9, subset=["price", "score"]) # Fill specific columns
927
-
928
+
928
929
Notes:
929
930
- Only fills NaN values in numeric columns (float32, float64)
930
931
- Non-numeric columns are kept unchanged
931
932
- For columns not in subset, the original column is kept unchanged
932
933
- Value must be numeric (int or float)
933
934
"""
934
- import pyarrow as pa
935
- from datafusion import functions as f
936
-
935
+
937
936
if not isinstance (value , (int , float )):
938
937
raise ValueError ("Value must be numeric (int or float)" )
939
938
940
939
# Get columns to process
941
940
if subset is None :
942
941
# Only get numeric columns if no subset specified
943
942
subset = [
944
- field .name for field in self .schema ()
943
+ field .name
944
+ for field in self .schema ()
945
945
if pa .types .is_floating (field .type )
946
946
]
947
947
else :
@@ -962,5 +962,5 @@ def fill_nan(self, value: float | int, subset: list[str] | None = None) -> "Data
962
962
else :
963
963
# Keep columns not in subset unchanged
964
964
exprs .append (f .col (col_name ))
965
-
965
+
966
966
return self .select (* exprs )
0 commit comments