2
2
3
3
from __future__ import annotations
4
4
5
- import warnings
5
+ from functools import singledispatch
6
6
7
7
import numpy as np
8
8
import pandas as pd
15
15
@deprecated_alias (row_number = "row_numbers" , remove_row = "remove_rows" )
16
16
def row_to_names (
17
17
df : pd .DataFrame ,
18
- row_numbers : int | list = 0 ,
18
+ row_numbers : int | list | slice = 0 ,
19
19
remove_rows : bool = False ,
20
20
remove_rows_above : bool = False ,
21
21
reset_index : bool = False ,
@@ -47,7 +47,7 @@ def row_to_names(
47
47
1 9 y
48
48
>>> df.row_to_names([0,1], remove_rows=True, reset_index=True)
49
49
nums chars
50
- 6 x
50
+ 6 x
51
51
0 9 y
52
52
53
53
Remove rows above the elevated row and the elevated row itself.
@@ -72,8 +72,7 @@ def row_to_names(
72
72
Args:
73
73
df: A pandas DataFrame.
74
74
row_numbers: Position of the row(s) containing the variable names.
75
- Note that indexing starts from 0. It can also be a list,
76
- in which case, a MultiIndex column is created.
75
+ It can be an integer, a list or a slice.
77
76
Defaults to 0 (first row).
78
77
remove_rows: Whether the row(s) should be removed from the DataFrame.
79
78
remove_rows_above: Whether the row(s) above the selected row should
@@ -83,53 +82,149 @@ def row_to_names(
83
82
Returns:
84
83
A pandas DataFrame with set column names.
85
84
""" # noqa: E501
86
- if not pd .options .mode .copy_on_write :
87
- df = df .copy ()
88
-
89
- check ("row_numbers" , row_numbers , [int , list ])
90
- if isinstance (row_numbers , list ):
91
- for entry in row_numbers :
92
- check ("entry in the row_numbers argument" , entry , [int ])
93
-
94
- warnings .warn (
95
- "The function row_to_names will, in the official 1.0 release, "
96
- "change its behaviour to reset the dataframe's index by default. "
97
- "You can prepare for this change right now by explicitly setting "
98
- "`reset_index=True` when calling on `row_to_names`."
85
+
86
+ return _row_to_names (
87
+ row_numbers ,
88
+ df = df ,
89
+ remove_rows = remove_rows ,
90
+ remove_rows_above = remove_rows_above ,
91
+ reset_index = reset_index ,
92
+ )
93
+
94
+
95
+ @singledispatch
96
+ def _row_to_names (
97
+ row_numbers , df , remove_rows , remove_rows_above , reset_index
98
+ ) -> pd .DataFrame :
99
+ """
100
+ Base function for row_to_names.
101
+ """
102
+ raise TypeError (
103
+ "row_numbers should be either an integer, "
104
+ "a slice or a list; "
105
+ f"instead got type { type (row_numbers ).__name__ } "
99
106
)
100
- # should raise if positional indexers are missing
101
- # IndexError: positional indexers are out-of-bounds
102
- headers = df .iloc [row_numbers ]
107
+
108
+
109
+ @_row_to_names .register (int ) # noqa: F811
110
+ def _row_to_names_dispatch ( # noqa: F811
111
+ row_numbers , df , remove_rows , remove_rows_above , reset_index
112
+ ):
113
+ df_ = df [:]
114
+ headers = df_ .iloc [row_numbers ]
115
+ df_ .columns = headers
116
+ df_ .columns .name = None
117
+ if not remove_rows and not remove_rows_above and not reset_index :
118
+ return df_
119
+ if not remove_rows and not remove_rows_above and reset_index :
120
+ return df_ .reset_index (drop = True )
121
+
122
+ len_df = len (df_ )
123
+ arrays = [arr ._values for _ , arr in df_ .items ()]
124
+ if remove_rows_above and remove_rows :
125
+ indexer = np .arange (row_numbers + 1 , len_df )
126
+ elif remove_rows_above :
127
+ indexer = np .arange (row_numbers , len_df )
128
+ elif remove_rows :
129
+ indexer = np .arange (len_df )
130
+ mask = np .ones (len_df , dtype = np .bool_ )
131
+ mask [row_numbers ] = False
132
+ indexer = indexer [mask ]
133
+ arrays = {num : arr [indexer ] for num , arr in enumerate (arrays )}
134
+ if reset_index :
135
+ df_index = pd .RangeIndex (start = 0 , stop = indexer .size )
136
+ else :
137
+ df_index = df_ .index [indexer ]
138
+ _df = pd .DataFrame (data = arrays , index = df_index , copy = False )
139
+ _df .columns = df_ .columns
140
+ return _df
141
+
142
+
143
+ @_row_to_names .register (slice ) # noqa: F811
144
+ def _row_to_names_dispatch ( # noqa: F811
145
+ row_numbers , df , remove_rows , remove_rows_above , reset_index
146
+ ):
147
+ if row_numbers .step is not None :
148
+ raise ValueError (
149
+ "The step argument for slice is not supported in row_to_names."
150
+ )
151
+ df_ = df [:]
152
+ headers = df_ .iloc [row_numbers ]
103
153
if isinstance (headers , pd .DataFrame ) and (len (headers ) == 1 ):
104
154
headers = headers .squeeze ()
105
- if isinstance ( headers , pd . Series ):
106
- headers = pd . Index ( headers )
155
+ df_ . columns = headers
156
+ df_ . columns . name = None
107
157
else :
108
- headers = [entry . array for _ , entry in headers .items ()]
158
+ headers = [array . _values for _ , array in headers .items ()]
109
159
headers = pd .MultiIndex .from_tuples (headers )
160
+ df_ .columns = headers
161
+ if not remove_rows and not remove_rows_above and not reset_index :
162
+ return df_
163
+ if not remove_rows and not remove_rows_above and reset_index :
164
+ return df_ .reset_index (drop = True )
165
+ len_df = len (df_ )
166
+ arrays = [arr ._values for _ , arr in df_ .items ()]
167
+ if remove_rows_above and remove_rows :
168
+ indexer = np .arange (row_numbers .stop , len_df )
169
+ elif remove_rows_above :
170
+ indexer = np .arange (row_numbers .start , len_df )
171
+ elif remove_rows :
172
+ indexer = np .arange (len_df )
173
+ mask = np .ones (len_df , dtype = np .bool_ )
174
+ mask [row_numbers ] = False
175
+ indexer = indexer [mask ]
176
+ arrays = {num : arr [indexer ] for num , arr in enumerate (arrays )}
177
+ if reset_index :
178
+ df_index = pd .RangeIndex (start = 0 , stop = indexer .size )
179
+ else :
180
+ df_index = df_ .index [indexer ]
181
+ _df = pd .DataFrame (data = arrays , index = df_index , copy = False )
182
+ _df .columns = df_ .columns
183
+ return _df
110
184
111
- df .columns = headers
112
- df .columns .name = None
113
185
114
- df_index = df .index
186
+ @_row_to_names .register (list ) # noqa: F811
187
+ def _row_to_names_dispatch ( # noqa: F811
188
+ row_numbers , df , remove_rows , remove_rows_above , reset_index
189
+ ):
115
190
if remove_rows_above :
116
- if isinstance (row_numbers , list ):
117
- if not (np .diff (row_numbers ) == 1 ).all ():
118
- raise ValueError (
119
- "The remove_rows_above argument is applicable "
120
- "only if the row_numbers argument is an integer, "
121
- "or the integers in a list are consecutive increasing, "
122
- "with a difference of 1."
123
- )
124
- tail = row_numbers [0 ]
125
- else :
126
- tail = row_numbers
127
- df = df .iloc [tail :]
128
- if remove_rows :
129
- if isinstance (row_numbers , int ):
130
- row_numbers = [row_numbers ]
131
- df_index = df .index .symmetric_difference (df_index [row_numbers ])
132
- df = df .loc [df_index ]
191
+ raise ValueError (
192
+ "The remove_rows_above argument is applicable "
193
+ "only if the row_numbers argument is an integer "
194
+ "or a slice."
195
+ )
196
+
197
+ for entry in row_numbers :
198
+ check ("entry in the row_numbers argument" , entry , [int ])
199
+
200
+ df_ = df [:]
201
+ headers = df_ .iloc [row_numbers ]
202
+ if isinstance (headers , pd .DataFrame ) and (len (headers ) == 1 ):
203
+ headers = headers .squeeze ()
204
+ df_ .columns = headers
205
+ df_ .columns .name = None
206
+ else :
207
+ headers = [array ._values for _ , array in headers .items ()]
208
+ headers = pd .MultiIndex .from_tuples (headers )
209
+ df_ .columns = headers
210
+
211
+ if not remove_rows and reset_index :
212
+ return df_ .reset_index (drop = True )
213
+ if not remove_rows and not reset_index :
214
+ return df_
215
+
216
+ len_df = len (df_ )
217
+ arrays = [arr ._values for _ , arr in df_ .items ()]
218
+ indexer = np .arange (len_df )
219
+ mask = np .ones (len_df , dtype = np .bool_ )
220
+ mask [row_numbers ] = False
221
+ indexer = indexer [mask ]
222
+
223
+ arrays = {num : arr [indexer ] for num , arr in enumerate (arrays )}
133
224
if reset_index :
134
- df .index = range (len (df ))
135
- return df
225
+ df_index = pd .RangeIndex (start = 0 , stop = indexer .size )
226
+ else :
227
+ df_index = df_ .index [indexer ]
228
+ _df = pd .DataFrame (data = arrays , index = df_index , copy = False )
229
+ _df .columns = df_ .columns
230
+ return _df
0 commit comments