Skip to content

Commit 861d9a6

Browse files
samukwekuericmjl
authored and
samuel.oranyeli
committed
[DEPR] deprecate pivot_wider (#1263)
* simplify logic * minor updates * changelog * fix userwarning on then function --------- Co-authored-by: Eric Ma <[email protected]>
1 parent 72d5ca3 commit 861d9a6

File tree

3 files changed

+21
-21
lines changed

3 files changed

+21
-21
lines changed

CHANGELOG.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,12 @@
99
- [INF] Update some `mkdocs` compatibility code. PR #1231 @thatlittleboy
1010
- [INF] Migrated docstring style from Sphinx to Google for better compatibility with `mkdocstrings`. PR #1235 @thatlittleboy
1111
- [INF] Prevent selection of chevrons (`>>>`) and outputs in Example code blocks. PR #1237 @thatlittleboy
12-
- [DEPR] Add deprecation warnings for `process_text`, `rename_column`, `rename_columns`, `filter_on`, `remove_columns`, `fill_direction`. #1045 @samukweku
12+
- [DEPR] Add deprecation warnings for `process_text`, `rename_column`, `rename_columns`, `filter_on`, `remove_columns`, `fill_direction`. Issue #1045 @samukweku
1313
- [ENH] `pivot_longer` now supports named groups where `names_pattern` is a regular expression. A dictionary can now be passed to `names_pattern`, and is internally evaluated as a list/tuple of regular expressions. Issue #1209 @samukweku
1414
- [ENH] Improve selection in `conditional_join`. Issue #1223 @samukweku
1515
- [ENH] Add `col` class for selecting columns within an expression. Currently limited to use within `conditional_join`. PR #1260 @samukweku.
1616
- [ENH] Performance improvement for range joins in `conditional_join`, when `use_numba = False`. Performance improvement for equi-join, when `use_numba = True`. PR #1256, #1267 @samukweku
17+
- [DEPR] Add deprecation warning for `pivot_wider`. Issue #1045 @samukweku
1718

1819
## [v0.24.0] - 2022-11-12
1920

janitor/functions/pivot.py

+19-19
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,15 @@
1010
from pandas.api.types import (
1111
is_list_like,
1212
is_categorical_dtype,
13+
is_extension_array_dtype,
1314
)
1415
from pandas.core.dtypes.concat import concat_compat
1516

1617
from janitor.functions.utils import (
1718
get_index_labels,
1819
_computations_expand_grid,
1920
)
20-
from janitor.utils import check
21+
from janitor.utils import check, refactored_function
2122

2223

2324
@pf.register_dataframe_method
@@ -1123,13 +1124,8 @@ def _base_melt(
11231124
reps = len(columns)
11241125
outcome = {name: columns.get_level_values(name) for name in columns.names}
11251126

1126-
# offers a fast route
1127-
# while still returning the underlying array
1128-
# which could be an extension array
1129-
# thus helping in preserving dtypes where possible
1130-
if df._mgr.any_extension_types:
1131-
values = df._mgr
1132-
values = [values.iget_values(i) for i in range(df.columns.size)]
1127+
if df.dtypes.map(is_extension_array_dtype).any(axis=None):
1128+
values = [arr._values for _, arr in df.items()]
11331129
values = concat_compat(values)
11341130
else:
11351131
values = df._values.ravel(order="F")
@@ -1168,15 +1164,8 @@ def _pivot_longer_dot_value(
11681164
"""
11691165
if np.count_nonzero(mapping.columns == ".value") > 1:
11701166
outcome = mapping.pop(".value")
1171-
out = outcome.iloc[:, 0]
1172-
# for loop preferred over agg
1173-
# primarily for speed
1174-
# if the column is a large array
1175-
# direct addition is surprisingly faster than
1176-
# the convenient agg(','.join, axis = 1) option
1177-
for _, val in outcome.iloc[:, 1:].items():
1178-
out += val
1179-
mapping[".value"] = out
1167+
outcome = outcome.sum(axis=1, numeric_only=False)
1168+
mapping.insert(loc=0, column=".value", value=outcome)
11801169

11811170
exclude = {
11821171
word
@@ -1234,7 +1223,7 @@ def _pivot_longer_dot_value(
12341223
indexer = pd.DataFrame(indexer, copy=False)
12351224

12361225
indexer.columns = columns
1237-
df = df.reindex(columns=indexer)
1226+
df = df.reindex(columns=indexer, copy=False)
12381227
df.columns = df.columns.get_level_values(".value")
12391228
values = _dict_from_grouped_names(df=df)
12401229
outcome = indexer.loc[indexer[".value"] == outcome[0], other]
@@ -1286,7 +1275,7 @@ def _headers_single_series(df: pd.DataFrame, mapping: pd.Series) -> tuple:
12861275
df.columns = [mapping, positions]
12871276
indexer = group_size.index, np.arange(group_max)
12881277
indexer = pd.MultiIndex.from_product(indexer)
1289-
df = df.reindex(columns=indexer)
1278+
df = df.reindex(columns=indexer, copy=False)
12901279
df.columns = df.columns.get_level_values(0)
12911280
else:
12921281
df.columns = mapping
@@ -1394,6 +1383,12 @@ def _final_frame_longer(
13941383

13951384

13961385
@pf.register_dataframe_method
1386+
@refactored_function(
1387+
message=(
1388+
"This function will be deprecated in a 1.x release. "
1389+
"Please use `pd.DataFrame.pivot` instead."
1390+
)
1391+
)
13971392
def pivot_wider(
13981393
df: pd.DataFrame,
13991394
index: Optional[Union[list, str]] = None,
@@ -1408,6 +1403,11 @@ def pivot_wider(
14081403
) -> pd.DataFrame:
14091404
"""Reshapes data from *long* to *wide* form.
14101405
1406+
!!!note
1407+
1408+
This function will be deprecated in a 1.x release.
1409+
Please use `pd.DataFrame.pivot` instead.
1410+
14111411
The number of columns are increased, while decreasing
14121412
the number of rows. It is the inverse of the
14131413
[`pivot_longer`][janitor.functions.pivot.pivot_longer]

janitor/functions/then.py

-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
message="This function will be deprecated in a 1.x release. "
1111
"Kindly use `pd.DataFrame.pipe` instead."
1212
)
13-
@pf.register_dataframe_method
1413
def then(df: pd.DataFrame, func: Callable) -> pd.DataFrame:
1514
"""Add an arbitrary function to run in the `pyjanitor` method chain.
1615

0 commit comments

Comments
 (0)