@@ -81,45 +81,40 @@ def _complete(
81
81
for column in _columns :
82
82
uniques = uniques .unnest (columns = column )
83
83
84
- if fill_value is None :
84
+ no_columns_to_fill = set (df .columns ) == set (uniques .columns )
85
+ if fill_value is None or no_columns_to_fill :
85
86
return uniques .join (df , on = uniques .columns , how = "full" , coalesce = True )
86
87
idx = None
87
88
columns_to_select = df .columns
88
89
if not explicit :
89
90
idx = "" .join (df .columns )
91
+ idx = f"{ idx } _"
90
92
df = df .with_row_index (name = idx )
91
93
df = uniques .join (df , on = uniques .columns , how = "full" , coalesce = True )
92
94
# exclude columns that were not used
93
95
# to generate the combinations
94
96
exclude_columns = uniques .columns
95
97
if idx :
96
98
exclude_columns .append (idx )
97
- expression = pl .exclude (exclude_columns ).is_null ().any ()
98
- booleans = df .select (expression )
99
- if isinstance (booleans , pl .LazyFrame ):
100
- booleans = booleans .collect ()
101
99
_columns = [
102
- column
103
- for column in booleans .columns
104
- if booleans .get_column (column ).item ()
100
+ column for column in columns_to_select if column not in exclude_columns
105
101
]
106
- if _columns and isinstance (fill_value , dict ):
102
+ if isinstance (fill_value , dict ):
107
103
fill_value = [
108
104
pl .col (column_name ).fill_null (value = value )
109
105
for column_name , value in fill_value .items ()
110
106
if column_name in _columns
111
107
]
112
- elif _columns :
108
+ else :
113
109
fill_value = [
114
110
pl .col (column ).fill_null (value = fill_value ) for column in _columns
115
111
]
116
- if _columns and not explicit :
112
+ if not explicit :
117
113
condition = pl .col (idx ).is_null ()
118
114
fill_value = [
119
115
pl .when (condition ).then (_fill_value ).otherwise (pl .col (column_name ))
120
116
for column_name , _fill_value in zip (_columns , fill_value )
121
117
]
122
- if _columns :
123
- df = df .with_columns (fill_value )
118
+ df = df .with_columns (fill_value )
124
119
125
120
return df .select (columns_to_select )
0 commit comments