Skip to content

Commit 7e501bc

Browse files
samukwekusamuel.oranyeliericmjl
authored
Create polars equivalent of pandas_flavor (#1374)
This PR creates the polars equivalent of `pandas_flavor`. h/t Samuel Oranyeli for his work here! Co-authored-by: samuel.oranyeli <[email protected]> Co-authored-by: Eric Ma <[email protected]>
1 parent 9d1ab35 commit 7e501bc

14 files changed

+992
-1330
lines changed

janitor/polars/__init__.py

+8-7
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
1-
from .dataframe import PolarsDataFrame
2-
from .expressions import PolarsExpr
3-
from .lazyframe import PolarsLazyFrame
4-
from .pivot_longer import pivot_longer_spec
1+
from .clean_names import clean_names, make_clean_names
2+
from .complete import complete
3+
from .pivot_longer import pivot_longer, pivot_longer_spec
4+
from .row_to_names import row_to_names
55

66
__all__ = [
77
"pivot_longer_spec",
8+
"pivot_longer",
89
"clean_names",
9-
"PolarsDataFrame",
10-
"PolarsLazyFrame",
11-
"PolarsExpr",
10+
"make_clean_names",
11+
"row_to_names",
12+
"complete",
1213
]

janitor/polars/clean_names.py

+162
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,12 @@
1515
)
1616
from janitor.utils import import_message
1717

18+
from .polars_flavor import (
19+
register_dataframe_method,
20+
register_expr_method,
21+
register_lazyframe_method,
22+
)
23+
1824
try:
1925
import polars as pl
2026
except ImportError:
@@ -26,6 +32,162 @@
2632
)
2733

2834

35+
@register_lazyframe_method
36+
@register_dataframe_method
37+
def clean_names(
38+
df: pl.DataFrame | pl.LazyFrame,
39+
strip_underscores: str | bool = None,
40+
case_type: str = "lower",
41+
remove_special: bool = False,
42+
strip_accents: bool = False,
43+
truncate_limit: int = None,
44+
) -> pl.DataFrame | pl.LazyFrame:
45+
"""
46+
Clean the column names in a polars DataFrame.
47+
48+
`clean_names` can also be applied to a LazyFrame.
49+
50+
Examples:
51+
>>> import polars as pl
52+
>>> import janitor.polars
53+
>>> df = pl.DataFrame(
54+
... {
55+
... "Aloha": range(3),
56+
... "Bell Chart": range(3),
57+
... "Animals@#$%^": range(3)
58+
... }
59+
... )
60+
>>> df
61+
shape: (3, 3)
62+
┌───────┬────────────┬──────────────┐
63+
│ Aloha ┆ Bell Chart ┆ Animals@#$%^ │
64+
│ --- ┆ --- ┆ --- │
65+
│ i64 ┆ i64 ┆ i64 │
66+
╞═══════╪════════════╪══════════════╡
67+
│ 0 ┆ 0 ┆ 0 │
68+
│ 1 ┆ 1 ┆ 1 │
69+
│ 2 ┆ 2 ┆ 2 │
70+
└───────┴────────────┴──────────────┘
71+
>>> df.clean_names(remove_special=True)
72+
shape: (3, 3)
73+
┌───────┬────────────┬─────────┐
74+
│ aloha ┆ bell_chart ┆ animals │
75+
│ --- ┆ --- ┆ --- │
76+
│ i64 ┆ i64 ┆ i64 │
77+
╞═══════╪════════════╪═════════╡
78+
│ 0 ┆ 0 ┆ 0 │
79+
│ 1 ┆ 1 ┆ 1 │
80+
│ 2 ┆ 2 ┆ 2 │
81+
└───────┴────────────┴─────────┘
82+
83+
!!! info "New in version 0.28.0"
84+
85+
Args:
86+
strip_underscores: Removes the outer underscores from all
87+
column names. Default None keeps outer underscores. Values can be
88+
either 'left', 'right' or 'both' or the respective shorthand 'l',
89+
'r' and True.
90+
case_type: Whether to make the column names lower or uppercase.
91+
Current case may be preserved with 'preserve',
92+
while snake case conversion (from CamelCase or camelCase only)
93+
can be turned on using "snake".
94+
Default 'lower' makes all characters lowercase.
95+
remove_special: Remove special characters from the column names.
96+
Only letters, numbers and underscores are preserved.
97+
strip_accents: Whether or not to remove accents from
98+
the labels.
99+
truncate_limit: Truncates formatted column names to
100+
the specified length. Default None does not truncate.
101+
102+
Returns:
103+
A polars DataFrame/LazyFrame.
104+
""" # noqa: E501
105+
return df.rename(
106+
lambda col: _clean_column_names(
107+
obj=col,
108+
strip_accents=strip_accents,
109+
strip_underscores=strip_underscores,
110+
case_type=case_type,
111+
remove_special=remove_special,
112+
truncate_limit=truncate_limit,
113+
)
114+
)
115+
116+
117+
@register_expr_method
118+
def make_clean_names(
119+
expression,
120+
strip_underscores: str | bool = None,
121+
case_type: str = "lower",
122+
remove_special: bool = False,
123+
strip_accents: bool = False,
124+
enforce_string: bool = False,
125+
truncate_limit: int = None,
126+
) -> pl.Expr:
127+
"""
128+
Clean the labels in a polars Expression.
129+
130+
Examples:
131+
>>> import polars as pl
132+
>>> import janitor.polars
133+
>>> df = pl.DataFrame({"raw": ["Abçdê fgí j"]})
134+
>>> df
135+
shape: (1, 1)
136+
┌─────────────┐
137+
│ raw │
138+
│ --- │
139+
│ str │
140+
╞═════════════╡
141+
│ Abçdê fgí j │
142+
└─────────────┘
143+
144+
Clean the column values:
145+
>>> df.with_columns(pl.col("raw").make_clean_names(strip_accents=True))
146+
shape: (1, 1)
147+
┌─────────────┐
148+
│ raw │
149+
│ --- │
150+
│ str │
151+
╞═════════════╡
152+
│ abcde_fgi_j │
153+
└─────────────┘
154+
155+
!!! info "New in version 0.28.0"
156+
157+
Args:
158+
strip_underscores: Removes the outer underscores
159+
from all labels in the expression.
160+
Default None keeps outer underscores.
161+
Values can be either 'left', 'right'
162+
or 'both' or the respective shorthand 'l',
163+
'r' and True.
164+
case_type: Whether to make the labels in the expression lower or uppercase.
165+
Current case may be preserved with 'preserve',
166+
while snake case conversion (from CamelCase or camelCase only)
167+
can be turned on using "snake".
168+
Default 'lower' makes all characters lowercase.
169+
remove_special: Remove special characters from the values in the expression.
170+
Only letters, numbers and underscores are preserved.
171+
strip_accents: Whether or not to remove accents from
172+
the expression.
173+
enforce_string: Whether or not to cast the expression to a string type.
174+
truncate_limit: Truncates formatted labels in the expression to
175+
the specified length. Default None does not truncate.
176+
177+
Returns:
178+
A polars Expression.
179+
"""
180+
return _clean_expr_names(
181+
obj=expression,
182+
strip_accents=strip_accents,
183+
strip_underscores=strip_underscores,
184+
case_type=case_type,
185+
remove_special=remove_special,
186+
enforce_string=enforce_string,
187+
truncate_limit=truncate_limit,
188+
)
189+
190+
29191
def _change_case_expr(
30192
obj: pl.Expr,
31193
case_type: str,

0 commit comments

Comments
 (0)