3838from datafusion .plan import ExecutionPlan , LogicalPlan
3939from datafusion .record_batch import RecordBatchStream
4040
41+ import pyarrow as pa
42+ from datafusion import functions as f
4143
4244if TYPE_CHECKING :
4345 import pathlib
4446 from typing import Callable , Sequence
4547
4648 import pandas as pd
4749 import polars as pl
48- import pyarrow as pa
4950
5051from enum import Enum
5152
@@ -874,8 +875,6 @@ def fill_null(self, value: Any, subset: list[str] | None = None) -> "DataFrame":
874875 - For columns where casting fails, the original column is kept unchanged
875876 - For columns not in subset, the original column is kept unchanged
876877 """
877- import pyarrow as pa
878- from datafusion import functions as f
879878
880879 # Get columns to process
881880 if subset is None :
@@ -910,38 +909,39 @@ def fill_null(self, value: Any, subset: list[str] | None = None) -> "DataFrame":
910909 exprs .append (f .col (col_name ))
911910
912911 return self .select (* exprs )
913-
914- def fill_nan (self , value : float | int , subset : list [str ] | None = None ) -> "DataFrame" :
912+
913+ def fill_nan (
914+ self , value : float | int , subset : list [str ] | None = None
915+ ) -> "DataFrame" :
915916 """Fill NaN values in specified numeric columns with a value.
916-
917+
917918 Args:
918919 value: Numeric value to replace NaN values with
919920 subset: Optional list of column names to fill. If None, fills all numeric columns.
920-
921+
921922 Returns:
922923 DataFrame with NaN values replaced in numeric columns
923-
924+
924925 Examples:
925926 >>> df = df.fill_nan(0) # Fill all NaNs with 0 in numeric columns
926927 >>> df = df.fill_nan(99.9, subset=["price", "score"]) # Fill specific columns
927-
928+
928929 Notes:
929930 - Only fills NaN values in numeric columns (float32, float64)
930931 - Non-numeric columns are kept unchanged
931932 - For columns not in subset, the original column is kept unchanged
932933 - Value must be numeric (int or float)
933934 """
934- import pyarrow as pa
935- from datafusion import functions as f
936-
935+
937936 if not isinstance (value , (int , float )):
938937 raise ValueError ("Value must be numeric (int or float)" )
939938
940939 # Get columns to process
941940 if subset is None :
942941 # Only get numeric columns if no subset specified
943942 subset = [
944- field .name for field in self .schema ()
943+ field .name
944+ for field in self .schema ()
945945 if pa .types .is_floating (field .type )
946946 ]
947947 else :
@@ -962,5 +962,5 @@ def fill_nan(self, value: float | int, subset: list[str] | None = None) -> "Data
962962 else :
963963 # Keep columns not in subset unchanged
964964 exprs .append (f .col (col_name ))
965-
965+
966966 return self .select (* exprs )
0 commit comments