Skip to content

Commit bd0d913

Browse files
author
samuel.oranyeli
committed
cleanup docs
1 parent 09e7ae3 commit bd0d913

File tree

8 files changed

+995
-195
lines changed

8 files changed

+995
-195
lines changed

janitor/polars/__init__.py

+12-187
Original file line numberDiff line numberDiff line change
@@ -1,187 +1,12 @@
1-
from __future__ import annotations
2-
3-
from polars.type_aliases import ColumnNameOrSelector
4-
5-
from janitor.utils import check, import_message
6-
7-
from .clean_names import _clean_column_names, _clean_expr_names
8-
from .row_to_names import _row_to_names
9-
from .pivot_longer import _pivot_longer, _pivot_longer_dot_value
10-
11-
try:
12-
import polars as pl
13-
except ImportError:
14-
import_message(
15-
submodule="polars",
16-
package="polars",
17-
conda_channel="conda-forge",
18-
pip_install=True,
19-
)
20-
21-
22-
@pl.api.register_dataframe_namespace("janitor")
23-
class PolarsFrame:
24-
def __init__(self, df: pl.DataFrame) -> pl.DataFrame:
25-
self._df = df
26-
27-
def clean_names(
28-
self,
29-
strip_underscores: str | bool = None,
30-
case_type: str = "lower",
31-
remove_special: bool = False,
32-
strip_accents: bool = False,
33-
truncate_limit: int = None,
34-
) -> pl.DataFrame:
35-
"""
36-
Clean the column names in a polars DataFrame.
37-
38-
Examples:
39-
>>> import polars as pl
40-
>>> import janitor.polars
41-
>>> df = pl.DataFrame(
42-
... {
43-
... "Aloha": range(3),
44-
... "Bell Chart": range(3),
45-
... "Animals@#$%^": range(3)
46-
... }
47-
... )
48-
>>> df
49-
shape: (3, 3)
50-
┌───────┬────────────┬──────────────┐
51-
│ Aloha ┆ Bell Chart ┆ Animals@#$%^ │
52-
│ --- ┆ --- ┆ --- │
53-
│ i64 ┆ i64 ┆ i64 │
54-
╞═══════╪════════════╪══════════════╡
55-
│ 0 ┆ 0 ┆ 0 │
56-
│ 1 ┆ 1 ┆ 1 │
57-
│ 2 ┆ 2 ┆ 2 │
58-
└───────┴────────────┴──────────────┘
59-
>>> df.janitor.clean_names(remove_special=True)
60-
shape: (3, 3)
61-
┌───────┬────────────┬─────────┐
62-
│ aloha ┆ bell_chart ┆ animals │
63-
│ --- ┆ --- ┆ --- │
64-
│ i64 ┆ i64 ┆ i64 │
65-
╞═══════╪════════════╪═════════╡
66-
│ 0 ┆ 0 ┆ 0 │
67-
│ 1 ┆ 1 ┆ 1 │
68-
│ 2 ┆ 2 ┆ 2 │
69-
└───────┴────────────┴─────────┘
70-
71-
!!! info "New in version 0.28.0"
72-
73-
Args:
74-
strip_underscores: Removes the outer underscores from all
75-
column names. Default None keeps outer underscores. Values can be
76-
either 'left', 'right' or 'both' or the respective shorthand 'l',
77-
'r' and True.
78-
case_type: Whether to make the column names lower or uppercase.
79-
Current case may be preserved with 'preserve',
80-
while snake case conversion (from CamelCase or camelCase only)
81-
can be turned on using "snake".
82-
Default 'lower' makes all characters lowercase.
83-
remove_special: Remove special characters from the column names.
84-
Only letters, numbers and underscores are preserved.
85-
strip_accents: Whether or not to remove accents from
86-
the labels.
87-
truncate_limit: Truncates formatted column names to
88-
the specified length. Default None does not truncate.
89-
90-
Returns:
91-
A polars DataFrame.
92-
""" # noqa: E501
93-
return self._df.rename(
94-
lambda col: _clean_column_names(
95-
obj=col,
96-
strip_accents=strip_accents,
97-
strip_underscores=strip_underscores,
98-
case_type=case_type,
99-
remove_special=remove_special,
100-
truncate_limit=truncate_limit,
101-
)
102-
)
103-
104-
)
105-
106-
107-
@pl.api.register_lazyframe_namespace("janitor")
108-
class PolarsLazyFrame:
109-
def __init__(self, df: pl.LazyFrame) -> pl.LazyFrame:
110-
self._df = df
111-
112-
def clean_names(
113-
self,
114-
strip_underscores: str | bool = None,
115-
case_type: str = "lower",
116-
remove_special: bool = False,
117-
strip_accents: bool = False,
118-
truncate_limit: int = None,
119-
) -> pl.LazyFrame:
120-
"""
121-
Clean the column names in a polars LazyFrame.
122-
123-
Examples:
124-
>>> import polars as pl
125-
>>> import janitor.polars
126-
>>> df = pl.LazyFrame(
127-
... {
128-
... "Aloha": range(3),
129-
... "Bell Chart": range(3),
130-
... "Animals@#$%^": range(3)
131-
... }
132-
... )
133-
>>> df.collect()
134-
shape: (3, 3)
135-
┌───────┬────────────┬──────────────┐
136-
│ Aloha ┆ Bell Chart ┆ Animals@#$%^ │
137-
│ --- ┆ --- ┆ --- │
138-
│ i64 ┆ i64 ┆ i64 │
139-
╞═══════╪════════════╪══════════════╡
140-
│ 0 ┆ 0 ┆ 0 │
141-
│ 1 ┆ 1 ┆ 1 │
142-
│ 2 ┆ 2 ┆ 2 │
143-
└───────┴────────────┴──────────────┘
144-
>>> df.janitor.clean_names(remove_special=True).collect()
145-
shape: (3, 3)
146-
┌───────┬────────────┬─────────┐
147-
│ aloha ┆ bell_chart ┆ animals │
148-
│ --- ┆ --- ┆ --- │
149-
│ i64 ┆ i64 ┆ i64 │
150-
╞═══════╪════════════╪═════════╡
151-
│ 0 ┆ 0 ┆ 0 │
152-
│ 1 ┆ 1 ┆ 1 │
153-
│ 2 ┆ 2 ┆ 2 │
154-
└───────┴────────────┴─────────┘
155-
156-
!!! info "New in version 0.28.0"
157-
158-
Args:
159-
strip_underscores: Removes the outer underscores from all
160-
column names. Default None keeps outer underscores. Values can be
161-
either 'left', 'right' or 'both' or the respective shorthand 'l',
162-
'r' and True.
163-
case_type: Whether to make the column names lower or uppercase.
164-
Current case may be preserved with 'preserve',
165-
while snake case conversion (from CamelCase or camelCase only)
166-
can be turned on using "snake".
167-
Default 'lower' makes all characters lowercase.
168-
remove_special: Remove special characters from the column names.
169-
Only letters, numbers and underscores are preserved.
170-
strip_accents: Whether or not to remove accents from
171-
the labels.
172-
truncate_limit: Truncates formatted column names to
173-
the specified length. Default None does not truncate.
174-
175-
Returns:
176-
A polars LazyFrame.
177-
""" # noqa: E501
178-
return self._df.rename(
179-
lambda col: _clean_column_names(
180-
obj=col,
181-
strip_accents=strip_accents,
182-
strip_underscores=strip_underscores,
183-
case_type=case_type,
184-
remove_special=remove_special,
185-
truncate_limit=truncate_limit,
186-
)
187-
)
1+
from .dataframe import PolarsDataFrame
2+
from .expressions import PolarsExpr
3+
from .lazyframe import PolarsLazyFrame
4+
from .pivot_longer import pivot_longer_spec
5+
6+
__all__ = [
7+
"pivot_longer_spec",
8+
"clean_names",
9+
"PolarsDataFrame",
10+
"PolarsLazyFrame",
11+
"PolarsExpr",
12+
]

janitor/polars/clean_names.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -115,11 +115,11 @@ def _strip_underscores_func_expr(
115115

116116
def _clean_column_names(
117117
obj: str,
118-
strip_underscores: str | bool = None,
119-
case_type: str = "lower",
120-
remove_special: bool = False,
121-
strip_accents: bool = False,
122-
truncate_limit: int = None,
118+
strip_underscores: str | bool,
119+
case_type: str,
120+
remove_special: bool,
121+
strip_accents: bool,
122+
truncate_limit: int,
123123
) -> str:
124124
"""
125125
Function to clean the column names of a polars DataFrame.

0 commit comments

Comments
 (0)