Skip to content

Commit a672fef

Browse files
authored
Polars pivot_longer implementation (#1355)
Implemented `pivot_longer` for polars.
1 parent 70fe127 commit a672fef

File tree

8 files changed

+1831
-313
lines changed

8 files changed

+1831
-313
lines changed

CHANGELOG.md

+278-277
Large diffs are not rendered by default.

janitor/functions/clean_names.py

+19-22
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
from __future__ import annotations
44

55
import unicodedata
6-
from typing import Optional, Union
76

87
import pandas as pd
98
import pandas_flavor as pf
@@ -18,9 +17,9 @@
1817
@deprecated_alias(preserve_original_columns="preserve_original_labels")
1918
def clean_names(
2019
df: pd.DataFrame,
21-
axis: Union[str, None] = "columns",
22-
column_names: Union[str, list] = None,
23-
strip_underscores: Optional[Union[str, bool]] = None,
20+
axis: str = "columns",
21+
column_names: str | list = None,
22+
strip_underscores: str | bool = None,
2423
case_type: str = "lower",
2524
remove_special: bool = False,
2625
strip_accents: bool = True,
@@ -170,14 +169,14 @@ def clean_names(
170169

171170

172171
def _clean_names(
173-
obj: Union[pd.Index, pd.Series],
174-
strip_underscores: Optional[Union[str, bool]] = None,
175-
case_type: str = "lower",
176-
remove_special: bool = False,
177-
strip_accents: bool = False,
178-
enforce_string: bool = False,
179-
truncate_limit: int = None,
180-
) -> Union[pd.Index, pd.Series]:
172+
obj: pd.Index | pd.Series,
173+
strip_underscores: str | bool,
174+
case_type: str,
175+
remove_special: bool,
176+
strip_accents: bool,
177+
enforce_string: bool,
178+
truncate_limit: int,
179+
) -> pd.Index | pd.Series:
181180
"""
182181
Generic function to clean labels in a pandas object.
183182
"""
@@ -202,9 +201,9 @@ def _clean_names(
202201

203202

204203
def _change_case(
205-
obj: Union[pd.Index, pd.Series],
204+
obj: pd.Index | pd.Series,
206205
case_type: str,
207-
) -> Union[pd.Index, pd.Series]:
206+
) -> pd.Index | pd.Series:
208207
"""Change case of labels in obj."""
209208
case_types = {"preserve", "upper", "lower", "snake"}
210209
case_type = case_type.lower()
@@ -226,9 +225,7 @@ def _change_case(
226225
)
227226

228227

229-
def _normalize_1(
230-
obj: Union[pd.Index, pd.Series]
231-
) -> Union[pd.Index, pd.Series]:
228+
def _normalize_1(obj: pd.Index | pd.Series) -> pd.Index | pd.Series:
232229
"""Perform normalization of labels in obj."""
233230
FIXES = [(r"[ /:,?()\.-]", "_"), (r"['’]", ""), (r"[\xa0]", "_")]
234231
for search, replace in FIXES:
@@ -238,8 +235,8 @@ def _normalize_1(
238235

239236

240237
def _strip_accents(
241-
obj: Union[pd.Index, pd.Series],
242-
) -> Union[pd.Index, pd.Series]:
238+
obj: pd.Index | pd.Series,
239+
) -> pd.Index | pd.Series:
243240
"""Remove accents from a label.
244241
245242
Inspired from [StackOverflow][so].
@@ -258,9 +255,9 @@ def _strip_accents(
258255

259256

260257
def _strip_underscores_func(
261-
obj: Union[pd.Index, pd.Series],
262-
strip_underscores: Union[str, bool] = None,
263-
) -> Union[pd.Index, pd.Series]:
258+
obj: pd.Index | pd.Series,
259+
strip_underscores: str | bool = None,
260+
) -> pd.Index | pd.Series:
264261
"""Strip underscores."""
265262
underscore_options = {None, "left", "right", "both", "l", "r", True}
266263
if strip_underscores not in underscore_options:

janitor/functions/pivot.py

+2-12
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ def pivot_longer(
9898
6 setosa Petal.Width 0.2
9999
7 virginica Petal.Width 1.8
100100
101-
Split the column labels into parts:
101+
Split the column labels into individual columns:
102102
>>> df.pivot_longer(
103103
... index = 'Species',
104104
... names_to = ('part', 'dimension'),
@@ -167,7 +167,7 @@ def pivot_longer(
167167
value int64
168168
dtype: object
169169
170-
Use multiple `.value` to reshape dataframe:
170+
Use multiple `.value` to reshape the dataframe:
171171
>>> df = pd.DataFrame(
172172
... [
173173
... {
@@ -265,16 +265,6 @@ def pivot_longer(
265265
... "Gin": [16, 200, 34],
266266
... "Vodka": [20, 33, 18],
267267
... },
268-
... columns=[
269-
... "City",
270-
... "State",
271-
... "Name",
272-
... "Mango",
273-
... "Orange",
274-
... "Watermelon",
275-
... "Gin",
276-
... "Vodka",
277-
... ],
278268
... )
279269
>>> df
280270
City State Name Mango Orange Watermelon Gin Vodka

0 commit comments

Comments
 (0)