Skip to content

Commit 5c281b2

Browse files
authored
[ENH]row_to_names for polars (#1363)
Added `row_to_names` for polars DataFrames.
1 parent 2dff4f6 commit 5c281b2

12 files changed

+1236
-748
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# Changelog
22

33
## [Unreleased]
4+
- [ENH] Added a `row_to_names` method for polars. Issue #1352
45
- [ENH] `read_commandline` function now supports polars - Issue #1352
56

67
- [ENH] Improved performance for non-equi joins when using numba - @samukweku PR #1341

janitor/functions/row_to_names.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
"""Implementation of the `row_to_names` function."""
22

3+
from __future__ import annotations
4+
35
import warnings
46

57
import numpy as np
@@ -13,7 +15,7 @@
1315
@deprecated_alias(row_number="row_numbers", remove_row="remove_rows")
1416
def row_to_names(
1517
df: pd.DataFrame,
16-
row_numbers: int = 0,
18+
row_numbers: int | list = 0,
1719
remove_rows: bool = False,
1820
remove_rows_above: bool = False,
1921
reset_index: bool = False,
@@ -73,7 +75,7 @@ def row_to_names(
7375
Note that indexing starts from 0. It can also be a list,
7476
in which case, a MultiIndex column is created.
7577
Defaults to 0 (first row).
76-
remove_row: Whether the row(s) should be removed from the DataFrame.
78+
remove_rows: Whether the row(s) should be removed from the DataFrame.
7779
remove_rows_above: Whether the row(s) above the selected row should
7880
be removed from the DataFrame.
7981
reset_index: Whether the index should be reset on the returning DataFrame.
@@ -84,10 +86,10 @@ def row_to_names(
8486
if not pd.options.mode.copy_on_write:
8587
df = df.copy()
8688

87-
check("row_number", row_numbers, [int, list])
89+
check("row_numbers", row_numbers, [int, list])
8890
if isinstance(row_numbers, list):
8991
for entry in row_numbers:
90-
check("entry in the row_number argument", entry, [int])
92+
check("entry in the row_numbers argument", entry, [int])
9193

9294
warnings.warn(
9395
"The function row_to_names will, in the official 1.0 release, "

janitor/polars/__init__.py

+12-736
Large diffs are not rendered by default.

janitor/polars/clean_names.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -115,11 +115,11 @@ def _strip_underscores_func_expr(
115115

116116
def _clean_column_names(
117117
obj: str,
118-
strip_underscores: str | bool = None,
119-
case_type: str = "lower",
120-
remove_special: bool = False,
121-
strip_accents: bool = False,
122-
truncate_limit: int = None,
118+
strip_underscores: str | bool,
119+
case_type: str,
120+
remove_special: bool,
121+
strip_accents: bool,
122+
truncate_limit: int,
123123
) -> str:
124124
"""
125125
Function to clean the column names of a polars DataFrame.

janitor/polars/dataframe.py

+434
Large diffs are not rendered by default.

janitor/polars/expressions.py

+93
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
from __future__ import annotations
2+
3+
from janitor.utils import import_message
4+
5+
from .clean_names import _clean_expr_names
6+
7+
try:
8+
import polars as pl
9+
except ImportError:
10+
import_message(
11+
submodule="polars",
12+
package="polars",
13+
conda_channel="conda-forge",
14+
pip_install=True,
15+
)
16+
17+
18+
@pl.api.register_expr_namespace("janitor")
19+
class PolarsExpr:
20+
def __init__(self, expr: pl.Expr) -> pl.Expr:
21+
self._expr = expr
22+
23+
def clean_names(
24+
self,
25+
strip_underscores: str | bool = None,
26+
case_type: str = "lower",
27+
remove_special: bool = False,
28+
strip_accents: bool = False,
29+
enforce_string: bool = False,
30+
truncate_limit: int = None,
31+
) -> pl.Expr:
32+
"""
33+
Clean the labels in a polars Expression.
34+
35+
Examples:
36+
>>> import polars as pl
37+
>>> import janitor.polars
38+
>>> df = pl.DataFrame({"raw": ["Abçdê fgí j"]})
39+
>>> df
40+
shape: (1, 1)
41+
┌─────────────┐
42+
│ raw │
43+
│ --- │
44+
│ str │
45+
╞═════════════╡
46+
│ Abçdê fgí j │
47+
└─────────────┘
48+
49+
Clean the column values:
50+
>>> df.with_columns(pl.col("raw").janitor.clean_names(strip_accents=True))
51+
shape: (1, 1)
52+
┌─────────────┐
53+
│ raw │
54+
│ --- │
55+
│ str │
56+
╞═════════════╡
57+
│ abcde_fgi_j │
58+
└─────────────┘
59+
60+
!!! info "New in version 0.28.0"
61+
62+
Args:
63+
strip_underscores: Removes the outer underscores
64+
from all labels in the expression.
65+
Default None keeps outer underscores.
66+
Values can be either 'left', 'right'
67+
or 'both' or the respective shorthand 'l',
68+
'r' and True.
69+
case_type: Whether to make the labels in the expression lower or uppercase.
70+
Current case may be preserved with 'preserve',
71+
while snake case conversion (from CamelCase or camelCase only)
72+
can be turned on using "snake".
73+
Default 'lower' makes all characters lowercase.
74+
remove_special: Remove special characters from the values in the expression.
75+
Only letters, numbers and underscores are preserved.
76+
strip_accents: Whether or not to remove accents from
77+
the expression.
78+
enforce_string: Whether or not to cast the expression to a string type.
79+
truncate_limit: Truncates formatted labels in the expression to
80+
the specified length. Default None does not truncate.
81+
82+
Returns:
83+
A polars Expression.
84+
"""
85+
return _clean_expr_names(
86+
obj=self._expr,
87+
strip_accents=strip_accents,
88+
strip_underscores=strip_underscores,
89+
case_type=case_type,
90+
remove_special=remove_special,
91+
enforce_string=enforce_string,
92+
truncate_limit=truncate_limit,
93+
)

0 commit comments

Comments
 (0)