[DEPR] deprecate pivot_wider (#1263)

samukweku · ericmjl · samuel.oranyeli · commit 861d9a642932 · 2023-06-01T16:06:48.000+10:00
* simplify logic

* minor updates

* changelog

* fix userwarning on then function

---------

Co-authored-by: Eric Ma &lt;ericmjl@users.noreply.github.com&gt;
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,11 +9,12 @@
 -   [INF] Update some `mkdocs` compatibility code. PR #1231 @thatlittleboy
 -   [INF] Migrated docstring style from Sphinx to Google for better compatibility with `mkdocstrings`. PR #1235 @thatlittleboy
 -   [INF] Prevent selection of chevrons (`>>>`) and outputs in Example code blocks. PR #1237 @thatlittleboy
--   [DEPR] Add deprecation warnings for `process_text`, `rename_column`, `rename_columns`, `filter_on`, `remove_columns`, `fill_direction`. #1045 @samukweku
+-   [DEPR] Add deprecation warnings for `process_text`, `rename_column`, `rename_columns`, `filter_on`, `remove_columns`, `fill_direction`. Issue #1045 @samukweku
 -   [ENH] `pivot_longer` now supports named groups where `names_pattern` is a regular expression. A dictionary can now be passed to `names_pattern`, and is internally evaluated as a list/tuple of regular expressions. Issue #1209 @samukweku
 -   [ENH] Improve selection in `conditional_join`. Issue #1223 @samukweku
 -   [ENH] Add `col` class for selecting columns within an expression. Currently limited to use within `conditional_join`. PR #1260 @samukweku.
 -   [ENH] Performance improvement for range joins in `conditional_join`, when `use_numba = False`. Performance improvement for equi-join, when `use_numba = True`. PR #1256, #1267 @samukweku
+-   [DEPR] Add deprecation warning for `pivot_wider`. Issue #1045 @samukweku
 
 ## [v0.24.0] - 2022-11-12
 
diff --git a/janitor/functions/pivot.py b/janitor/functions/pivot.py
@@ -10,14 +10,15 @@
 from pandas.api.types import (
     is_list_like,
     is_categorical_dtype,
+    is_extension_array_dtype,
 )
 from pandas.core.dtypes.concat import concat_compat
 
 from janitor.functions.utils import (
     get_index_labels,
     _computations_expand_grid,
 )
-from janitor.utils import check
+from janitor.utils import check, refactored_function
 
 
 @pf.register_dataframe_method
@@ -1123,13 +1124,8 @@ def _base_melt(
     reps = len(columns)
     outcome = {name: columns.get_level_values(name) for name in columns.names}
 
-    # offers a fast route
-    # while still returning the underlying array
-    # which could be an extension array
-    # thus helping in preserving dtypes where possible
-    if df._mgr.any_extension_types:
-        values = df._mgr
-        values = [values.iget_values(i) for i in range(df.columns.size)]
+    if df.dtypes.map(is_extension_array_dtype).any(axis=None):
+        values = [arr._values for _, arr in df.items()]
         values = concat_compat(values)
     else:
         values = df._values.ravel(order="F")
@@ -1168,15 +1164,8 @@ def _pivot_longer_dot_value(
     """
     if np.count_nonzero(mapping.columns == ".value") > 1:
         outcome = mapping.pop(".value")
-        out = outcome.iloc[:, 0]
-        # for loop preferred over agg
-        # primarily for speed
-        # if the column is a large array
-        # direct addition is surprisingly faster than
-        # the convenient agg(','.join, axis = 1) option
-        for _, val in outcome.iloc[:, 1:].items():
-            out += val
-        mapping[".value"] = out
+        outcome = outcome.sum(axis=1, numeric_only=False)
+        mapping.insert(loc=0, column=".value", value=outcome)
 
     exclude = {
         word
@@ -1234,7 +1223,7 @@ def _pivot_longer_dot_value(
         indexer = pd.DataFrame(indexer, copy=False)
 
         indexer.columns = columns
-        df = df.reindex(columns=indexer)
+        df = df.reindex(columns=indexer, copy=False)
         df.columns = df.columns.get_level_values(".value")
         values = _dict_from_grouped_names(df=df)
         outcome = indexer.loc[indexer[".value"] == outcome[0], other]
@@ -1286,7 +1275,7 @@ def _headers_single_series(df: pd.DataFrame, mapping: pd.Series) -> tuple:
         df.columns = [mapping, positions]
         indexer = group_size.index, np.arange(group_max)
         indexer = pd.MultiIndex.from_product(indexer)
-        df = df.reindex(columns=indexer)
+        df = df.reindex(columns=indexer, copy=False)
         df.columns = df.columns.get_level_values(0)
     else:
         df.columns = mapping
@@ -1394,6 +1383,12 @@ def _final_frame_longer(
 
 
 @pf.register_dataframe_method
+@refactored_function(
+    message=(
+        "This function will be deprecated in a 1.x release. "
+        "Please use `pd.DataFrame.pivot` instead."
+    )
+)
 def pivot_wider(
     df: pd.DataFrame,
     index: Optional[Union[list, str]] = None,
@@ -1408,6 +1403,11 @@ def pivot_wider(
 ) -> pd.DataFrame:
     """Reshapes data from *long* to *wide* form.
 
+    !!!note
+
+        This function will be deprecated in a 1.x release.
+        Please use `pd.DataFrame.pivot` instead.
+
     The number of columns are increased, while decreasing
     the number of rows. It is the inverse of the
     [`pivot_longer`][janitor.functions.pivot.pivot_longer]
diff --git a/janitor/functions/then.py b/janitor/functions/then.py
@@ -10,7 +10,6 @@
     message="This function will be deprecated in a 1.x release. "
     "Kindly use `pd.DataFrame.pipe` instead."
 )
-@pf.register_dataframe_method
 def then(df: pd.DataFrame, func: Callable) -> pd.DataFrame:
     """Add an arbitrary function to run in the `pyjanitor` method chain.
 

Original file line number	Diff line number	Diff line change
`@@ -10,7 +10,6 @@`
`10`	`10`	`message="This function will be deprecated in a 1.x release. "`
`11`	`11`	"Kindly use `pd.DataFrame.pipe` instead."
`12`	`12`	`)`
`13`		`-@pf.register_dataframe_method`
`14`	`13`	`def then(df: pd.DataFrame, func: Callable) -> pd.DataFrame:`
`15`	`14`	"""Add an arbitrary function to run in the `pyjanitor` method chain.
`16`	`15`