Skip to content

Commit 93314e7

Browse files
samukwekusamuel.oranyeli
and
samuel.oranyeli
authored
make complete as lazy as possible for polars (#1378)
Co-authored-by: samuel.oranyeli <[email protected]>
1 parent 5d13c62 commit 93314e7

File tree

1 file changed

+8
-13
lines changed

1 file changed

+8
-13
lines changed

janitor/polars/complete.py

+8-13
Original file line numberDiff line numberDiff line change
@@ -81,45 +81,40 @@ def _complete(
8181
for column in _columns:
8282
uniques = uniques.unnest(columns=column)
8383

84-
if fill_value is None:
84+
no_columns_to_fill = set(df.columns) == set(uniques.columns)
85+
if fill_value is None or no_columns_to_fill:
8586
return uniques.join(df, on=uniques.columns, how="full", coalesce=True)
8687
idx = None
8788
columns_to_select = df.columns
8889
if not explicit:
8990
idx = "".join(df.columns)
91+
idx = f"{idx}_"
9092
df = df.with_row_index(name=idx)
9193
df = uniques.join(df, on=uniques.columns, how="full", coalesce=True)
9294
# exclude columns that were not used
9395
# to generate the combinations
9496
exclude_columns = uniques.columns
9597
if idx:
9698
exclude_columns.append(idx)
97-
expression = pl.exclude(exclude_columns).is_null().any()
98-
booleans = df.select(expression)
99-
if isinstance(booleans, pl.LazyFrame):
100-
booleans = booleans.collect()
10199
_columns = [
102-
column
103-
for column in booleans.columns
104-
if booleans.get_column(column).item()
100+
column for column in columns_to_select if column not in exclude_columns
105101
]
106-
if _columns and isinstance(fill_value, dict):
102+
if isinstance(fill_value, dict):
107103
fill_value = [
108104
pl.col(column_name).fill_null(value=value)
109105
for column_name, value in fill_value.items()
110106
if column_name in _columns
111107
]
112-
elif _columns:
108+
else:
113109
fill_value = [
114110
pl.col(column).fill_null(value=fill_value) for column in _columns
115111
]
116-
if _columns and not explicit:
112+
if not explicit:
117113
condition = pl.col(idx).is_null()
118114
fill_value = [
119115
pl.when(condition).then(_fill_value).otherwise(pl.col(column_name))
120116
for column_name, _fill_value in zip(_columns, fill_value)
121117
]
122-
if _columns:
123-
df = df.with_columns(fill_value)
118+
df = df.with_columns(fill_value)
124119

125120
return df.select(columns_to_select)

0 commit comments

Comments
 (0)