Skip to content

Commit 6c5c4d6

Browse files
committed
Add MWE for label_encode and factorize_columns
1 parent 531fa98 commit 6c5c4d6

File tree

2 files changed

+69
-49
lines changed

2 files changed

+69
-49
lines changed

janitor/functions/factorize_columns.py

+30-30
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
"""Implementation of the `factorize_columns` function"""
12
from typing import Hashable, Iterable, Union
23
import pandas_flavor as pf
34
import pandas as pd
@@ -13,50 +14,49 @@ def factorize_columns(
1314
**kwargs,
1415
) -> pd.DataFrame:
1516
"""
16-
Converts labels into numerical data
17+
Converts labels into numerical data.
1718
1819
This method will create a new column with the string `_enc` appended
1920
after the original column's name.
2021
This can be overriden with the suffix parameter.
2122
22-
Internally this method uses pandas `factorize` method.
23+
Internally, this method uses pandas `factorize` method.
2324
It takes in an optional suffix and keyword arguments also.
2425
An empty string as suffix will override the existing column.
2526
2627
This method mutates the original DataFrame.
2728
28-
Functional usage syntax:
29+
Example:
2930
30-
```python
31-
df = factorize_columns(
32-
df,
33-
column_names="my_categorical_column",
34-
suffix="_enc"
35-
) # one way
36-
```
37-
38-
Method chaining syntax:
39-
40-
```python
41-
import pandas as pd
42-
import janitor
43-
categorical_cols = ['col1', 'col2', 'col4']
44-
df = (
45-
pd.DataFrame(...)
46-
.factorize_columns(
47-
column_names=categorical_cols,
48-
suffix="_enc"
49-
)
50-
)
51-
```
31+
>>> import pandas as pd
32+
>>> import janitor
33+
>>> df = pd.DataFrame({
34+
... "foo": ["b", "b", "a", "c", "b"],
35+
... "bar": range(4, 9),
36+
... })
37+
>>> df
38+
foo bar
39+
0 b 4
40+
1 b 5
41+
2 a 6
42+
3 c 7
43+
4 b 8
44+
>>> df.factorize_columns(column_names="foo")
45+
foo bar foo_enc
46+
0 b 4 0
47+
1 b 5 0
48+
2 a 6 1
49+
3 c 7 2
50+
4 b 8 0
5251
5352
:param df: The pandas DataFrame object.
54-
:param column_names: A column name or an iterable (list
55-
or tuple) of column names.
56-
:param suffix: Suffix to be used for the new column. Default value is _enc.
57-
An empty string suffix means, it will override the existing column
53+
:param column_names: A column name or an iterable (list or tuple) of
54+
column names.
55+
:param suffix: Suffix to be used for the new column.
56+
An empty string suffix means, it will override the existing column.
5857
:param **kwargs: Keyword arguments. It takes any of the keyword arguments,
59-
which the pandas factorize method takes like sort,na_sentinel,size_hint
58+
which the pandas factorize method takes like `sort`, `na_sentinel`,
59+
`size_hint`.
6060
6161
:returns: A pandas DataFrame.
6262
"""

janitor/functions/label_encode.py

+39-19
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
"""Implementation of `label_encode` function"""
12
from typing import Hashable, Iterable, Union
23
import warnings
34
import pandas_flavor as pf
@@ -10,43 +11,62 @@
1011
@pf.register_dataframe_method
1112
@deprecated_alias(columns="column_names")
1213
def label_encode(
13-
df: pd.DataFrame, column_names: Union[str, Iterable[str], Hashable]
14+
df: pd.DataFrame,
15+
column_names: Union[str, Iterable[str], Hashable],
1416
) -> pd.DataFrame:
1517
"""
1618
Convert labels into numerical data.
1719
1820
This method will create a new column with the string `_enc` appended
19-
after the original column's name. Consider this to be syntactic sugar.
21+
after the original column's name.
22+
Consider this to be syntactic sugar.
23+
This function uses the `factorize` pandas function under the hood.
2024
21-
This method behaves differently from `encode_categorical`. This method
22-
creates a new column of numeric data. `encode_categorical` replaces the
23-
dtype of the original column with a *categorical* dtype.
25+
This method behaves differently from
26+
[`encode_categorical`][janitor.functions.encode_categorical.encode_categorical].
27+
This method creates a new column of numeric data.
28+
[`encode_categorical`][janitor.functions.encode_categorical.encode_categorical]
29+
replaces the dtype of the original column with a *categorical* dtype.
2430
2531
This method mutates the original DataFrame.
2632
27-
Functional usage syntax:
33+
Example:
2834
29-
```python
30-
df = label_encode(df, column_names="my_categorical_column") # one way
31-
```
35+
>>> import pandas as pd
36+
>>> import janitor
37+
>>> df = pd.DataFrame({
38+
... "foo": ["b", "b", "a", "c", "b"],
39+
... "bar": range(4, 9),
40+
... })
41+
>>> df
42+
foo bar
43+
0 b 4
44+
1 b 5
45+
2 a 6
46+
3 c 7
47+
4 b 8
48+
>>> df.label_encode(column_names="foo")
49+
foo bar foo_enc
50+
0 b 4 0
51+
1 b 5 0
52+
2 a 6 1
53+
3 c 7 2
54+
4 b 8 0
3255
33-
Method chaining syntax:
56+
!!!note
3457
35-
```python
36-
import pandas as pd
37-
import janitor
38-
categorical_cols = ['col1', 'col2', 'col4']
39-
df = pd.DataFrame(...).label_encode(column_names=categorical_cols)
40-
```
58+
This function will be deprecated in a 1.x release.
59+
Please use [`factorize_columns`][janitor.functions.factorize_columns.factorize_columns]
60+
instead.
4161
4262
:param df: The pandas DataFrame object.
4363
:param column_names: A column name or an iterable (list
4464
or tuple) of column names.
4565
:returns: A pandas DataFrame.
46-
"""
66+
""" # noqa: E501
4767
warnings.warn(
48-
"label_encode will be deprecated in a 1.x release. \
49-
Please use factorize_columns instead"
68+
"`label_encode` will be deprecated in a 1.x release. "
69+
"Please use `factorize_columns` instead."
5070
)
5171
df = _factorize(df, column_names, "_enc")
5272
return df

0 commit comments

Comments
 (0)