2
2
3
3
from __future__ import annotations
4
4
5
+ from functools import singledispatch
6
+
5
7
from janitor .utils import check , import_message
6
8
7
9
from .polars_flavor import register_dataframe_method
@@ -28,8 +30,6 @@ def row_to_names(
28
30
"""
29
31
Elevates a row, or rows, to be the column names of a DataFrame.
30
32
31
- For a LazyFrame, the user should materialize into a DataFrame before using `row_to_names`..
32
-
33
33
Examples:
34
34
Replace column names with the first row.
35
35
@@ -103,8 +103,7 @@ def row_to_names(
103
103
104
104
Args:
105
105
row_numbers: Position of the row(s) containing the variable names.
106
- Note that indexing starts from 0. It can also be a list/slice.
107
- Defaults to 0 (first row).
106
+ It can be an integer, list or a slice.
108
107
remove_rows: Whether the row(s) should be removed from the DataFrame.
109
108
remove_rows_above: Whether the row(s) above the selected row should
110
109
be removed from the DataFrame.
@@ -115,85 +114,93 @@ def row_to_names(
115
114
A polars DataFrame.
116
115
""" # noqa: E501
117
116
return _row_to_names (
117
+ row_numbers ,
118
118
df = df ,
119
- row_numbers = row_numbers ,
120
119
remove_rows = remove_rows ,
121
120
remove_rows_above = remove_rows_above ,
122
121
separator = separator ,
123
122
)
124
123
125
124
125
+ @singledispatch
126
126
def _row_to_names (
127
- df : pl .DataFrame ,
128
- row_numbers : int | list | slice ,
129
- remove_rows : bool ,
130
- remove_rows_above : bool ,
131
- separator : str ,
127
+ row_numbers , df , remove_rows , remove_rows_above , separator
132
128
) -> pl .DataFrame :
133
129
"""
134
- Function to convert rows in the DataFrame to column names .
130
+ Base function for row_to_names .
135
131
"""
136
- check ("separator" , separator , [str ])
137
- if isinstance (row_numbers , int ):
138
- row_numbers = slice (row_numbers , row_numbers + 1 )
139
- elif isinstance (row_numbers , slice ):
140
- if row_numbers .step is not None :
141
- raise ValueError (
142
- "The step argument for slice is not supported in row_to_names."
143
- )
144
- elif isinstance (row_numbers , list ):
145
- for entry in row_numbers :
146
- check ("entry in the row_numbers argument" , entry , [int ])
147
- else :
148
- raise TypeError (
149
- "row_numbers should be either an integer, "
150
- "a slice or a list; "
151
- f"instead got type { type (row_numbers ).__name__ } "
132
+ raise TypeError (
133
+ "row_numbers should be either an integer, "
134
+ "a slice or a list; "
135
+ f"instead got type { type (row_numbers ).__name__ } "
136
+ )
137
+
138
+
139
+ @_row_to_names .register (int ) # noqa: F811
140
+ def _row_to_names_dispatch ( # noqa: F811
141
+ row_numbers , df , remove_rows , remove_rows_above , separator
142
+ ):
143
+ expression = pl .col ("*" ).cast (pl .String ).gather (row_numbers )
144
+ expression = pl .struct (expression )
145
+ headers = df .select (expression ).to_series (0 ).to_list ()[0 ]
146
+ df = df .rename (mapping = headers )
147
+ if remove_rows_above and remove_rows :
148
+ return df .slice (row_numbers + 1 )
149
+ elif remove_rows_above :
150
+ return df .slice (row_numbers )
151
+ elif remove_rows :
152
+ expression = pl .int_range (pl .len ()).ne (row_numbers )
153
+ return df .filter (expression )
154
+ return df
155
+
156
+
157
+ @_row_to_names .register (slice ) # noqa: F811
158
+ def _row_to_names_dispatch ( # noqa: F811
159
+ row_numbers , df , remove_rows , remove_rows_above , separator
160
+ ):
161
+ if row_numbers .step is not None :
162
+ raise ValueError (
163
+ "The step argument for slice is not supported in row_to_names."
152
164
)
153
- is_a_slice = isinstance (row_numbers , slice )
154
- if is_a_slice :
155
- expression = pl .all ().str .concat (delimiter = separator )
156
- expression = pl .struct (expression )
157
- offset = row_numbers .start
158
- length = row_numbers .stop - row_numbers .start
159
- mapping = df .slice (
160
- offset = offset ,
161
- length = length ,
165
+ headers = df .slice (row_numbers .start , row_numbers .stop - row_numbers .start )
166
+ headers = headers .cast (pl .String )
167
+ expression = pl .all ().str .concat (delimiter = separator )
168
+ expression = pl .struct (expression )
169
+ headers = headers .select (expression ).to_series (0 ).to_list ()[0 ]
170
+ df = df .rename (mapping = headers )
171
+ if remove_rows_above and remove_rows :
172
+ return df .slice (row_numbers .stop )
173
+ elif remove_rows_above :
174
+ return df .slice (row_numbers .start )
175
+ elif remove_rows :
176
+ expression = pl .int_range (pl .len ()).is_between (
177
+ row_numbers .start , row_numbers .stop , closed = "left"
162
178
)
163
- mapping = mapping .select (expression )
164
- else :
165
- expression = pl .all ().gather (row_numbers )
166
- expression = expression .str .concat (delimiter = separator )
167
- expression = pl .struct (expression )
168
- mapping = df .select (expression )
169
-
170
- mapping = mapping .to_series (0 )[0 ]
171
- df = df .rename (mapping = mapping )
172
- if remove_rows_above :
173
- if not is_a_slice :
174
- raise ValueError (
175
- "The remove_rows_above argument is applicable "
176
- "only if the row_numbers argument is an integer "
177
- "or a slice."
178
- )
179
- if remove_rows :
180
- return df .slice (offset = row_numbers .stop )
181
- return df .slice (offset = row_numbers .start )
179
+ return df .filter (~ expression )
180
+ return df
182
181
183
- if remove_rows :
184
- if is_a_slice :
185
- df = [
186
- df .slice (offset = 0 , length = row_numbers .start ),
187
- df .slice (offset = row_numbers .stop ),
188
- ]
189
- return pl .concat (df , rechunk = True )
190
- name = "" .join (df .columns )
191
- name = f"{ name } _"
192
- df = (
193
- df .with_row_index (name = name )
194
- .filter (pl .col (name = name ).is_in (row_numbers ).not_ ())
195
- .select (pl .exclude (name ))
182
+
183
+ @_row_to_names .register (list ) # noqa: F811
184
+ def _row_to_names_dispatch ( # noqa: F811
185
+ row_numbers , df , remove_rows , remove_rows_above , separator
186
+ ):
187
+ if remove_rows_above :
188
+ raise ValueError (
189
+ "The remove_rows_above argument is applicable "
190
+ "only if the row_numbers argument is an integer "
191
+ "or a slice."
196
192
)
197
- return df
198
193
194
+ for entry in row_numbers :
195
+ check ("entry in the row_numbers argument" , entry , [int ])
196
+
197
+ expression = pl .col ("*" ).gather (row_numbers )
198
+ headers = df .select (expression ).cast (pl .String )
199
+ expression = pl .all ().str .concat (delimiter = separator )
200
+ expression = pl .struct (expression )
201
+ headers = headers .select (expression ).to_series (0 ).to_list ()[0 ]
202
+ df = df .rename (mapping = headers )
203
+ if remove_rows :
204
+ expression = pl .int_range (pl .len ()).is_in (row_numbers )
205
+ return df .filter (~ expression )
199
206
return df
0 commit comments