pyjanitor-devs
diff --git a/‎janitor/functions/__init__.py
+1-1 b/‎janitor/functions/__init__.py
+1-1
diff --git a/‎janitor/functions/change_type.py
+50-50 b/‎janitor/functions/change_type.py
+50-50
diff --git a/‎janitor/functions/coalesce.py
+2-3 b/‎janitor/functions/coalesce.py
+2-3
diff --git a/‎janitor/functions/collapse_levels.py
+3-1 b/‎janitor/functions/collapse_levels.py
+3-1
diff --git a/‎janitor/functions/drop_constant_columns.py
+1-8 b/‎janitor/functions/drop_constant_columns.py
+1-8
diff --git a/‎janitor/functions/encode_categorical.py
+3-5 b/‎janitor/functions/encode_categorical.py
+3-5
diff --git a/‎janitor/functions/get_dupes.py
+1-2 b/‎janitor/functions/get_dupes.py
+1-2
diff --git a/‎janitor/functions/impute.py
+47-40 b/‎janitor/functions/impute.py
+47-40
@@ -64,7 +64,7 @@
 from .reorder_columns import reorder_columns
 from .round_to_fraction import round_to_fraction
 from .row_to_names import row_to_names
-from .select import select_columns, select_rows
+from .select import select_columns, select_rows, select
 from .shuffle import shuffle
 from .sort_column_value_order import sort_column_value_order
 from .sort_naturally import sort_naturally
 
@@ -24,65 +24,65 @@ def change_type(
 ) -> pd.DataFrame:
     """Change the type of a column.
 
-     This method does not mutate the original DataFrame.
+    This method does not mutate the original DataFrame.
 
-     Exceptions that are raised can be ignored. For example, if one has a mixed
-     dtype column that has non-integer strings and integers, and you want to
-     coerce everything to integers, you can optionally ignore the non-integer
-     strings and replace them with `NaN` or keep the original value.
+    Exceptions that are raised can be ignored. For example, if one has a mixed
+    dtype column that has non-integer strings and integers, and you want to
+    coerce everything to integers, you can optionally ignore the non-integer
+    strings and replace them with `NaN` or keep the original value.
 
-     Intended to be the method-chaining alternative to:
+    Intended to be the method-chaining alternative to:
 
-     ```python
-     df[col] = df[col].astype(dtype)
+    ```python
+    df[col] = df[col].astype(dtype)
     ```
 
-     !!!note
-
-         This function will be deprecated in a 1.x release.
-         Please use `pd.DataFrame.astype` instead.
-
-     Example: Change the type of a column.
-
-         >>> import pandas as pd
-         >>> import janitor
-         >>> df = pd.DataFrame({"col1": range(3), "col2": ["m", 5, True]})
-         >>> df
-            col1  col2
-         0     0     m
-         1     1     5
-         2     2  True
-         >>> df.change_type(
-         ...     "col1", dtype=str,
-         ... ).change_type(
-         ...     "col2", dtype=float, ignore_exception="fillna",
-         ... )
-           col1  col2
-         0    0   NaN
-         1    1   5.0
-         2    2   1.0
+    !!!note
 
-     Example: Change the type of multiple columns.
+        This function will be deprecated in a 1.x release.
+        Please use `pd.DataFrame.astype` instead.
 
-     Change the type of all columns, please use `DataFrame.astype` instead.
+    Example: Change the type of a column.
 
-         >>> import pandas as pd
-         >>> import janitor
-         >>> df = pd.DataFrame({"col1": range(3), "col2": ["m", 5, True]})
-         >>> df.change_type(['col1', 'col2'], str)
+        >>> import pandas as pd
+        >>> import janitor
+        >>> df = pd.DataFrame({"col1": range(3), "col2": ["m", 5, True]})
+        >>> df
            col1  col2
-         0    0     m
-         1    1     5
-         2    2  True
-
-     :param df: A pandas DataFrame.
-     :param column_name: The column(s) in the dataframe.
-     :param dtype: The datatype to convert to. Should be one of the standard
-         Python types, or a numpy datatype.
-     :param ignore_exception: one of `{False, "fillna", "keep_values"}`.
-     :returns: A pandas DataFrame with changed column types.
-     :raises ValueError: If unknown option provided for
-         `ignore_exception`.
+        0     0     m
+        1     1     5
+        2     2  True
+        >>> df.change_type(
+        ...     "col1", dtype=str,
+        ... ).change_type(
+        ...     "col2", dtype=float, ignore_exception="fillna",
+        ... )
+          col1  col2
+        0    0   NaN
+        1    1   5.0
+        2    2   1.0
+
+    Example: Change the type of multiple columns.
+
+    Change the type of all columns, please use `DataFrame.astype` instead.
+
+        >>> import pandas as pd
+        >>> import janitor
+        >>> df = pd.DataFrame({"col1": range(3), "col2": ["m", 5, True]})
+        >>> df.change_type(['col1', 'col2'], str)
+          col1  col2
+        0    0     m
+        1    1     5
+        2    2  True
+
+    :param df: A pandas DataFrame.
+    :param column_name: The column(s) in the dataframe.
+    :param dtype: The datatype to convert to. Should be one of the standard
+        Python types, or a numpy datatype.
+    :param ignore_exception: one of `{False, "fillna", "keep_values"}`.
+    :returns: A pandas DataFrame with changed column types.
+    :raises ValueError: If unknown option provided for
+        `ignore_exception`.
     """
 
     df = df.copy()  # avoid mutating the original DataFrame
 
@@ -4,7 +4,7 @@
 import pandas_flavor as pf
 
 from janitor.utils import check, deprecated_alias
-from janitor.functions.utils import _select_index
+from janitor.functions.utils import get_index_labels
 
 
 @pf.register_dataframe_method
@@ -95,8 +95,7 @@ def coalesce(
             "The number of columns to coalesce should be a minimum of 2."
         )
 
-    indices = _select_index([*column_names], df, axis="columns")
-    column_names = df.columns[indices]
+    column_names = get_index_labels([*column_names], df, axis="columns")
 
     if target_column_name:
         check("target_column_name", target_column_name, [str])
 
@@ -9,7 +9,7 @@
 def collapse_levels(df: pd.DataFrame, sep: str = "_") -> pd.DataFrame:
     """Flatten multi-level column dataframe to a single level.
 
-    This method mutates the original DataFrame.
+    This method does not mutate the original DataFrame.
 
     Given a DataFrame containing multi-level columns, flatten to single-level
     by string-joining the column labels in each level.
@@ -72,6 +72,8 @@ class  max_speed    type
     if not isinstance(df.columns, pd.MultiIndex):
         return df
 
+    df = df[:]
+
     df.columns = [
         sep.join(str(el) for el in tup if str(el) != "")
         for tup in df  # noqa: PD011
 
@@ -34,11 +34,4 @@ def drop_constant_columns(df: pd.DataFrame) -> pd.DataFrame:
     :param df: Input Pandas DataFrame
     :returns: The Pandas DataFrame with the constant columns dropped.
     """
-    # Find the constant columns
-    constant_columns = []
-    for col in df.columns:
-        if len(df[col].unique()) == 1:
-            constant_columns.append(col)
-
-    # Drop constant columns from df and return it
-    return df.drop(labels=constant_columns, axis=1)
+    return df.loc[:, df.nunique().ne(1)]
@@ -7,7 +7,8 @@
 import pandas_flavor as pf
 from pandas.api.types import is_list_like
 
-from janitor.utils import check, check_column, deprecated_alias
+from janitor.utils import check_column, deprecated_alias
+from janitor.functions.utils import get_index_labels
 
 
 @pf.register_dataframe_method
@@ -112,10 +113,7 @@ def encode_categorical(
     # kwargs takes care of scenarios where user wants an ordered category
     # or user supplies specific categories to create the categorical
     if column_names is not None:
-        check("column_names", column_names, [list, tuple, Hashable])
-        if isinstance(column_names, Hashable):
-            column_names = [column_names]
-        check_column(df, column_names)
+        column_names = get_index_labels([column_names], df, axis="columns")
         dtypes = {col: "category" for col in column_names}
         return df.astype(dtypes)
 
 
@@ -75,5 +75,4 @@ def get_dupes(
         all columns.
     :returns: The duplicate rows, as a pandas DataFrame.
     """
-    dupes = df.duplicated(subset=column_names, keep=False)
-    return df[dupes == True]  # noqa: E712
+    return df.loc[df.duplicated(subset=column_names, keep=False)]
@@ -1,29 +1,29 @@
 """Implementation of `impute` function"""
-from typing import Any, Hashable, Optional
+from typing import Any, Optional
+
 
-import lazy_loader as lazy
-import numpy as np
 import pandas_flavor as pf
 import pandas as pd
 
 from janitor.utils import deprecated_alias
-
-ss = lazy.load("scipy.stats")
+from janitor.functions.utils import get_index_labels
+from itertools import product
 
 
 @pf.register_dataframe_method
 @deprecated_alias(column="column_name")
+@deprecated_alias(column_name="column_names")
 @deprecated_alias(statistic="statistic_column_name")
 def impute(
     df: pd.DataFrame,
-    column_name: Hashable,
+    column_names: Any,
     value: Optional[Any] = None,
     statistic_column_name: Optional[str] = None,
 ) -> pd.DataFrame:
     """
     Method-chainable imputation of values in a column.
 
-    This method mutates the original DataFrame.
+    This method does not mutate the original DataFrame.
 
     Underneath the hood, this function calls the `.fillna()` method available
     to every `pandas.Series` object.
@@ -34,8 +34,11 @@ def impute(
     take on the value provided.
 
     If `statistic_column_name` is provided, then all null values in the
-    selected column will take on the summary statistic value of other non-null
-    values.
+    selected column(s) will take on the summary statistic value
+    of other non-null values.
+
+    Column selection in `column_names` is possible using the
+    [`select_columns`][janitor.functions.select.select_columns] syntax.
 
     Currently supported statistics include:
 
@@ -63,7 +66,7 @@ def impute(
 
     Imputing null values with 0 (using the `value` parameter):
 
-        >>> df.impute(column_name="sales", value=0.0)
+        >>> df.impute(column_names="sales", value=0.0)
            a  sales  score
         0  1    0.0    NaN
         1  2    0.0    3.0
@@ -72,14 +75,14 @@ def impute(
     Imputing null values with median (using the `statistic_column_name`
     parameter):
 
-        >>> df.impute(column_name="score", statistic_column_name="median")
+        >>> df.impute(column_names="score", statistic_column_name="median")
            a  sales  score
-        0  1    0.0    2.5
-        1  2    0.0    3.0
-        2  3    0.0    2.0
+        0  1    NaN    2.5
+        1  2    NaN    3.0
+        2  3    NaN    2.0
 
     :param df: A pandas DataFrame.
-    :param column_name: The name of the column on which to impute values.
+    :param column_names: The name of the column(s) on which to impute values.
     :param value: The value used for imputation, passed into `.fillna` method
         of the underlying pandas Series.
     :param statistic_column_name: The column statistic to impute.
@@ -90,42 +93,46 @@ def impute(
         `average`, `median`, `mode`, `minimum`, `min`, `maximum`, or `max`.
     """
     # Firstly, we check that only one of `value` or `statistic` are provided.
+    if (value is None) and (statistic_column_name is None):
+        raise ValueError("Kindly specify a value or a statistic_column_name")
+
     if value is not None and statistic_column_name is not None:
         raise ValueError(
             "Only one of `value` or `statistic_column_name` should be "
             "provided."
         )
 
-    # If statistic is provided, then we compute the relevant summary statistic
-    # from the other data.
-    funcs = {
-        "mean": np.mean,
-        "average": np.mean,  # aliased
-        "median": np.median,
-        "mode": ss.mode,
-        "minimum": np.min,
-        "min": np.min,  # aliased
-        "maximum": np.max,
-        "max": np.max,  # aliased
-    }
-    if statistic_column_name is not None:
+    column_names = get_index_labels([column_names], df, axis="columns")
+
+    if value is not None:
+        value = dict(product(column_names, [value]))
+
+    else:
+        # If statistic is provided, then we compute
+        # the relevant summary statistic
+        # from the other data.
+        funcs = {
+            "mean": "mean",
+            "average": "mean",  # aliased
+            "median": "median",
+            "mode": "mode",
+            "minimum": "min",
+            "min": "min",  # aliased
+            "maximum": "max",
+            "max": "max",  # aliased
+        }
         # Check that the statistic keyword argument is one of the approved.
         if statistic_column_name not in funcs:
             raise KeyError(
                 f"`statistic_column_name` must be one of {funcs.keys()}."
             )
 
-        value = funcs[statistic_column_name](
-            df[column_name].dropna().to_numpy()
-        )
-        # special treatment for mode, because scipy stats mode returns a
-        # moderesult object.
+        value = dict(product(column_names, [funcs[statistic_column_name]]))
+
+        value = df.agg(value)
+
+        # special treatment for mode
         if statistic_column_name == "mode":
-            value = value.mode[0]
+            value = {key: val.at[0] for key, val in value.items()}
 
-    # The code is architected this way - if `value` is not provided but
-    # statistic is, we then overwrite the None value taken on by `value`, and
-    # use it to set the imputation column.
-    if value is not None:
-        df[column_name] = df[column_name].fillna(value)
-    return df
+    return df.fillna(value=value)