|
| 1 | +"""Implementation of `label_encode` function""" |
1 | 2 | from typing import Hashable, Iterable, Union
|
2 | 3 | import warnings
|
3 | 4 | import pandas_flavor as pf
|
|
10 | 11 | @pf.register_dataframe_method
|
11 | 12 | @deprecated_alias(columns="column_names")
|
12 | 13 | def label_encode(
|
13 |
| - df: pd.DataFrame, column_names: Union[str, Iterable[str], Hashable] |
| 14 | + df: pd.DataFrame, |
| 15 | + column_names: Union[str, Iterable[str], Hashable], |
14 | 16 | ) -> pd.DataFrame:
|
15 | 17 | """
|
16 | 18 | Convert labels into numerical data.
|
17 | 19 |
|
18 | 20 | This method will create a new column with the string `_enc` appended
|
19 |
| - after the original column's name. Consider this to be syntactic sugar. |
| 21 | + after the original column's name. |
| 22 | + Consider this to be syntactic sugar. |
| 23 | + This function uses the `factorize` pandas function under the hood. |
20 | 24 |
|
21 |
| - This method behaves differently from `encode_categorical`. This method |
22 |
| - creates a new column of numeric data. `encode_categorical` replaces the |
23 |
| - dtype of the original column with a *categorical* dtype. |
| 25 | + This method behaves differently from |
| 26 | + [`encode_categorical`][janitor.functions.encode_categorical.encode_categorical]. |
| 27 | + This method creates a new column of numeric data. |
| 28 | + [`encode_categorical`][janitor.functions.encode_categorical.encode_categorical] |
| 29 | + replaces the dtype of the original column with a *categorical* dtype. |
24 | 30 |
|
25 | 31 | This method mutates the original DataFrame.
|
26 | 32 |
|
27 |
| - Functional usage syntax: |
| 33 | + Example: |
28 | 34 |
|
29 |
| - ```python |
30 |
| - df = label_encode(df, column_names="my_categorical_column") # one way |
31 |
| - ``` |
| 35 | + >>> import pandas as pd |
| 36 | + >>> import janitor |
| 37 | + >>> df = pd.DataFrame({ |
| 38 | + ... "foo": ["b", "b", "a", "c", "b"], |
| 39 | + ... "bar": range(4, 9), |
| 40 | + ... }) |
| 41 | + >>> df |
| 42 | + foo bar |
| 43 | + 0 b 4 |
| 44 | + 1 b 5 |
| 45 | + 2 a 6 |
| 46 | + 3 c 7 |
| 47 | + 4 b 8 |
| 48 | + >>> df.label_encode(column_names="foo") |
| 49 | + foo bar foo_enc |
| 50 | + 0 b 4 0 |
| 51 | + 1 b 5 0 |
| 52 | + 2 a 6 1 |
| 53 | + 3 c 7 2 |
| 54 | + 4 b 8 0 |
32 | 55 |
|
33 |
| - Method chaining syntax: |
| 56 | + !!!note |
34 | 57 |
|
35 |
| - ```python |
36 |
| - import pandas as pd |
37 |
| - import janitor |
38 |
| - categorical_cols = ['col1', 'col2', 'col4'] |
39 |
| - df = pd.DataFrame(...).label_encode(column_names=categorical_cols) |
40 |
| - ``` |
| 58 | + This function will be deprecated in a 1.x release. |
| 59 | + Please use [`factorize_columns`][janitor.functions.factorize_columns.factorize_columns] |
| 60 | + instead. |
41 | 61 |
|
42 | 62 | :param df: The pandas DataFrame object.
|
43 | 63 | :param column_names: A column name or an iterable (list
|
44 | 64 | or tuple) of column names.
|
45 | 65 | :returns: A pandas DataFrame.
|
46 |
| - """ |
| 66 | + """ # noqa: E501 |
47 | 67 | warnings.warn(
|
48 |
| - "label_encode will be deprecated in a 1.x release. \ |
49 |
| - Please use factorize_columns instead" |
| 68 | + "`label_encode` will be deprecated in a 1.x release. " |
| 69 | + "Please use `factorize_columns` instead." |
50 | 70 | )
|
51 | 71 | df = _factorize(df, column_names, "_enc")
|
52 | 72 | return df
|
0 commit comments