|
1 |
| -from __future__ import annotations |
2 |
| - |
3 |
| -from polars.type_aliases import ColumnNameOrSelector |
4 |
| - |
5 |
| -from janitor.utils import check, import_message |
6 |
| - |
7 |
| -from .clean_names import _clean_column_names, _clean_expr_names |
8 |
| -from .row_to_names import _row_to_names |
9 |
| -from .pivot_longer import _pivot_longer, _pivot_longer_dot_value |
10 |
| - |
11 |
| -try: |
12 |
| - import polars as pl |
13 |
| -except ImportError: |
14 |
| - import_message( |
15 |
| - submodule="polars", |
16 |
| - package="polars", |
17 |
| - conda_channel="conda-forge", |
18 |
| - pip_install=True, |
19 |
| - ) |
20 |
| - |
21 |
| - |
22 |
| -@pl.api.register_dataframe_namespace("janitor") |
23 |
| -class PolarsFrame: |
24 |
| - def __init__(self, df: pl.DataFrame) -> pl.DataFrame: |
25 |
| - self._df = df |
26 |
| - |
27 |
| - def clean_names( |
28 |
| - self, |
29 |
| - strip_underscores: str | bool = None, |
30 |
| - case_type: str = "lower", |
31 |
| - remove_special: bool = False, |
32 |
| - strip_accents: bool = False, |
33 |
| - truncate_limit: int = None, |
34 |
| - ) -> pl.DataFrame: |
35 |
| - """ |
36 |
| - Clean the column names in a polars DataFrame. |
37 |
| -
|
38 |
| - Examples: |
39 |
| - >>> import polars as pl |
40 |
| - >>> import janitor.polars |
41 |
| - >>> df = pl.DataFrame( |
42 |
| - ... { |
43 |
| - ... "Aloha": range(3), |
44 |
| - ... "Bell Chart": range(3), |
45 |
| - ... "Animals@#$%^": range(3) |
46 |
| - ... } |
47 |
| - ... ) |
48 |
| - >>> df |
49 |
| - shape: (3, 3) |
50 |
| - ┌───────┬────────────┬──────────────┐ |
51 |
| - │ Aloha ┆ Bell Chart ┆ Animals@#$%^ │ |
52 |
| - │ --- ┆ --- ┆ --- │ |
53 |
| - │ i64 ┆ i64 ┆ i64 │ |
54 |
| - ╞═══════╪════════════╪══════════════╡ |
55 |
| - │ 0 ┆ 0 ┆ 0 │ |
56 |
| - │ 1 ┆ 1 ┆ 1 │ |
57 |
| - │ 2 ┆ 2 ┆ 2 │ |
58 |
| - └───────┴────────────┴──────────────┘ |
59 |
| - >>> df.janitor.clean_names(remove_special=True) |
60 |
| - shape: (3, 3) |
61 |
| - ┌───────┬────────────┬─────────┐ |
62 |
| - │ aloha ┆ bell_chart ┆ animals │ |
63 |
| - │ --- ┆ --- ┆ --- │ |
64 |
| - │ i64 ┆ i64 ┆ i64 │ |
65 |
| - ╞═══════╪════════════╪═════════╡ |
66 |
| - │ 0 ┆ 0 ┆ 0 │ |
67 |
| - │ 1 ┆ 1 ┆ 1 │ |
68 |
| - │ 2 ┆ 2 ┆ 2 │ |
69 |
| - └───────┴────────────┴─────────┘ |
70 |
| -
|
71 |
| - !!! info "New in version 0.28.0" |
72 |
| -
|
73 |
| - Args: |
74 |
| - strip_underscores: Removes the outer underscores from all |
75 |
| - column names. Default None keeps outer underscores. Values can be |
76 |
| - either 'left', 'right' or 'both' or the respective shorthand 'l', |
77 |
| - 'r' and True. |
78 |
| - case_type: Whether to make the column names lower or uppercase. |
79 |
| - Current case may be preserved with 'preserve', |
80 |
| - while snake case conversion (from CamelCase or camelCase only) |
81 |
| - can be turned on using "snake". |
82 |
| - Default 'lower' makes all characters lowercase. |
83 |
| - remove_special: Remove special characters from the column names. |
84 |
| - Only letters, numbers and underscores are preserved. |
85 |
| - strip_accents: Whether or not to remove accents from |
86 |
| - the labels. |
87 |
| - truncate_limit: Truncates formatted column names to |
88 |
| - the specified length. Default None does not truncate. |
89 |
| -
|
90 |
| - Returns: |
91 |
| - A polars DataFrame. |
92 |
| - """ # noqa: E501 |
93 |
| - return self._df.rename( |
94 |
| - lambda col: _clean_column_names( |
95 |
| - obj=col, |
96 |
| - strip_accents=strip_accents, |
97 |
| - strip_underscores=strip_underscores, |
98 |
| - case_type=case_type, |
99 |
| - remove_special=remove_special, |
100 |
| - truncate_limit=truncate_limit, |
101 |
| - ) |
102 |
| - ) |
103 |
| - |
104 |
| - ) |
105 |
| - |
106 |
| - |
107 |
| -@pl.api.register_lazyframe_namespace("janitor") |
108 |
| -class PolarsLazyFrame: |
109 |
| - def __init__(self, df: pl.LazyFrame) -> pl.LazyFrame: |
110 |
| - self._df = df |
111 |
| - |
112 |
| - def clean_names( |
113 |
| - self, |
114 |
| - strip_underscores: str | bool = None, |
115 |
| - case_type: str = "lower", |
116 |
| - remove_special: bool = False, |
117 |
| - strip_accents: bool = False, |
118 |
| - truncate_limit: int = None, |
119 |
| - ) -> pl.LazyFrame: |
120 |
| - """ |
121 |
| - Clean the column names in a polars LazyFrame. |
122 |
| -
|
123 |
| - Examples: |
124 |
| - >>> import polars as pl |
125 |
| - >>> import janitor.polars |
126 |
| - >>> df = pl.LazyFrame( |
127 |
| - ... { |
128 |
| - ... "Aloha": range(3), |
129 |
| - ... "Bell Chart": range(3), |
130 |
| - ... "Animals@#$%^": range(3) |
131 |
| - ... } |
132 |
| - ... ) |
133 |
| - >>> df.collect() |
134 |
| - shape: (3, 3) |
135 |
| - ┌───────┬────────────┬──────────────┐ |
136 |
| - │ Aloha ┆ Bell Chart ┆ Animals@#$%^ │ |
137 |
| - │ --- ┆ --- ┆ --- │ |
138 |
| - │ i64 ┆ i64 ┆ i64 │ |
139 |
| - ╞═══════╪════════════╪══════════════╡ |
140 |
| - │ 0 ┆ 0 ┆ 0 │ |
141 |
| - │ 1 ┆ 1 ┆ 1 │ |
142 |
| - │ 2 ┆ 2 ┆ 2 │ |
143 |
| - └───────┴────────────┴──────────────┘ |
144 |
| - >>> df.janitor.clean_names(remove_special=True).collect() |
145 |
| - shape: (3, 3) |
146 |
| - ┌───────┬────────────┬─────────┐ |
147 |
| - │ aloha ┆ bell_chart ┆ animals │ |
148 |
| - │ --- ┆ --- ┆ --- │ |
149 |
| - │ i64 ┆ i64 ┆ i64 │ |
150 |
| - ╞═══════╪════════════╪═════════╡ |
151 |
| - │ 0 ┆ 0 ┆ 0 │ |
152 |
| - │ 1 ┆ 1 ┆ 1 │ |
153 |
| - │ 2 ┆ 2 ┆ 2 │ |
154 |
| - └───────┴────────────┴─────────┘ |
155 |
| -
|
156 |
| - !!! info "New in version 0.28.0" |
157 |
| -
|
158 |
| - Args: |
159 |
| - strip_underscores: Removes the outer underscores from all |
160 |
| - column names. Default None keeps outer underscores. Values can be |
161 |
| - either 'left', 'right' or 'both' or the respective shorthand 'l', |
162 |
| - 'r' and True. |
163 |
| - case_type: Whether to make the column names lower or uppercase. |
164 |
| - Current case may be preserved with 'preserve', |
165 |
| - while snake case conversion (from CamelCase or camelCase only) |
166 |
| - can be turned on using "snake". |
167 |
| - Default 'lower' makes all characters lowercase. |
168 |
| - remove_special: Remove special characters from the column names. |
169 |
| - Only letters, numbers and underscores are preserved. |
170 |
| - strip_accents: Whether or not to remove accents from |
171 |
| - the labels. |
172 |
| - truncate_limit: Truncates formatted column names to |
173 |
| - the specified length. Default None does not truncate. |
174 |
| -
|
175 |
| - Returns: |
176 |
| - A polars LazyFrame. |
177 |
| - """ # noqa: E501 |
178 |
| - return self._df.rename( |
179 |
| - lambda col: _clean_column_names( |
180 |
| - obj=col, |
181 |
| - strip_accents=strip_accents, |
182 |
| - strip_underscores=strip_underscores, |
183 |
| - case_type=case_type, |
184 |
| - remove_special=remove_special, |
185 |
| - truncate_limit=truncate_limit, |
186 |
| - ) |
187 |
| - ) |
| 1 | +from .dataframe import PolarsDataFrame |
| 2 | +from .expressions import PolarsExpr |
| 3 | +from .lazyframe import PolarsLazyFrame |
| 4 | +from .pivot_longer import pivot_longer_spec |
| 5 | + |
| 6 | +__all__ = [ |
| 7 | + "pivot_longer_spec", |
| 8 | + "clean_names", |
| 9 | + "PolarsDataFrame", |
| 10 | + "PolarsLazyFrame", |
| 11 | + "PolarsExpr", |
| 12 | +] |
0 commit comments