Skip to content

Commit 88accb3

Browse files
Merge branch 'main' into fix-59242
2 parents 6ea5785 + 8a53447 commit 88accb3

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+631
-800
lines changed

.github/workflows/unit-tests.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -385,10 +385,12 @@ jobs:
385385
nogil: true
386386

387387
- name: Build Environment
388+
# TODO: Once numpy 2.2.1 is out, don't install nightly version
389+
# Tests segfault with numpy 2.2.0: https://github.com/numpy/numpy/pull/27955
388390
run: |
389391
python --version
390-
python -m pip install --upgrade pip setuptools wheel numpy meson[ninja]==1.2.1 meson-python==0.13.1
391-
python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple cython
392+
python -m pip install --upgrade pip setuptools wheel meson[ninja]==1.2.1 meson-python==0.13.1
393+
python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple cython numpy
392394
python -m pip install versioneer[toml]
393395
python -m pip install python-dateutil pytz tzdata hypothesis>=6.84.0 pytest>=7.3.2 pytest-xdist>=3.4.0 pytest-cov
394396
python -m pip install -ve . --no-build-isolation --no-index --no-deps -Csetup-args="--werror"

ci/code_checks.sh

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
8181
-i "pandas.Timestamp.resolution PR02" \
8282
-i "pandas.Timestamp.tzinfo GL08" \
8383
-i "pandas.arrays.ArrowExtensionArray PR07,SA01" \
84-
-i "pandas.arrays.IntervalArray.length SA01" \
8584
-i "pandas.arrays.NumpyExtensionArray SA01" \
8685
-i "pandas.arrays.TimedeltaArray PR07,SA01" \
8786
-i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \
@@ -94,11 +93,8 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
9493
-i "pandas.core.resample.Resampler.std SA01" \
9594
-i "pandas.core.resample.Resampler.transform PR01,RT03,SA01" \
9695
-i "pandas.core.resample.Resampler.var SA01" \
97-
-i "pandas.errors.UndefinedVariableError PR01,SA01" \
9896
-i "pandas.errors.ValueLabelTypeMismatch SA01" \
99-
-i "pandas.io.json.build_table_schema PR07,RT03,SA01" \
10097
-i "pandas.plotting.andrews_curves RT03,SA01" \
101-
-i "pandas.plotting.scatter_matrix PR07,SA01" \
10298
-i "pandas.tseries.offsets.BDay PR02,SA01" \
10399
-i "pandas.tseries.offsets.BQuarterBegin.is_on_offset GL08" \
104100
-i "pandas.tseries.offsets.BQuarterBegin.n GL08" \

doc/source/user_guide/cookbook.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -459,7 +459,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
459459
df
460460
461461
# List the size of the animals with the highest weight.
462-
df.groupby("animal").apply(lambda subf: subf["size"][subf["weight"].idxmax()], include_groups=False)
462+
df.groupby("animal").apply(lambda subf: subf["size"][subf["weight"].idxmax()])
463463
464464
`Using get_group
465465
<https://stackoverflow.com/questions/14734533/how-to-access-pandas-groupby-dataframe-by-key>`__
@@ -482,7 +482,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
482482
return pd.Series(["L", avg_weight, True], index=["size", "weight", "adult"])
483483
484484
485-
expected_df = gb.apply(GrowUp, include_groups=False)
485+
expected_df = gb.apply(GrowUp)
486486
expected_df
487487
488488
`Expanding apply

doc/source/user_guide/groupby.rst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1074,7 +1074,7 @@ missing values with the ``ffill()`` method.
10741074
).set_index("date")
10751075
df_re
10761076
1077-
df_re.groupby("group").resample("1D", include_groups=False).ffill()
1077+
df_re.groupby("group").resample("1D").ffill()
10781078
10791079
.. _groupby.filter:
10801080

@@ -1252,13 +1252,13 @@ the argument ``group_keys`` which defaults to ``True``. Compare
12521252

12531253
.. ipython:: python
12541254
1255-
df.groupby("A", group_keys=True).apply(lambda x: x, include_groups=False)
1255+
df.groupby("A", group_keys=True).apply(lambda x: x)
12561256
12571257
with
12581258

12591259
.. ipython:: python
12601260
1261-
df.groupby("A", group_keys=False).apply(lambda x: x, include_groups=False)
1261+
df.groupby("A", group_keys=False).apply(lambda x: x)
12621262
12631263
12641264
Numba accelerated routines
@@ -1742,7 +1742,7 @@ column index name will be used as the name of the inserted column:
17421742
result = {"b_sum": x["b"].sum(), "c_mean": x["c"].mean()}
17431743
return pd.Series(result, name="metrics")
17441744
1745-
result = df.groupby("a").apply(compute_metrics, include_groups=False)
1745+
result = df.groupby("a").apply(compute_metrics)
17461746
17471747
result
17481748

doc/source/whatsnew/v3.0.0.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ Other enhancements
5656
- :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`)
5757
- :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`)
5858
- :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)
59+
- :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`)
5960
- :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
6061
- :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
6162
- :meth:`str.get_dummies` now accepts a ``dtype`` parameter to specify the dtype of the resulting DataFrame (:issue:`47872`)
@@ -553,6 +554,7 @@ Other Removals
553554
- Removed the ``method`` keyword in ``ExtensionArray.fillna``, implement ``ExtensionArray._pad_or_backfill`` instead (:issue:`53621`)
554555
- Removed the attribute ``dtypes`` from :class:`.DataFrameGroupBy` (:issue:`51997`)
555556
- Enforced deprecation of ``argmin``, ``argmax``, ``idxmin``, and ``idxmax`` returning a result when ``skipna=False`` and an NA value is encountered or all values are NA values; these operations will now raise in such cases (:issue:`33941`, :issue:`51276`)
557+
- Removed specifying ``include_groups=True`` in :class:`.DataFrameGroupBy.apply` and :class:`.Resampler.apply` (:issue:`7155`)
556558

557559
.. ---------------------------------------------------------------------------
558560
.. _whatsnew_300.performance:
@@ -801,6 +803,7 @@ Other
801803
- Bug in ``Series.list`` methods not preserving the original :class:`Index`. (:issue:`58425`)
802804
- Bug in ``Series.list`` methods not preserving the original name. (:issue:`60522`)
803805
- Bug in printing a :class:`DataFrame` with a :class:`DataFrame` stored in :attr:`DataFrame.attrs` raised a ``ValueError`` (:issue:`60455`)
806+
- Bug in printing a :class:`Series` with a :class:`DataFrame` stored in :attr:`Series.attrs` raised a ``ValueError`` (:issue:`60568`)
804807

805808
.. ***DO NOT USE THIS SECTION***
806809

pandas/core/arrays/interval.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1306,6 +1306,20 @@ def length(self) -> Index:
13061306
"""
13071307
Return an Index with entries denoting the length of each Interval.
13081308
1309+
The length of an interval is calculated as the difference between
1310+
its `right` and `left` bounds. This property is particularly useful
1311+
when working with intervals where the size of the interval is an important
1312+
attribute, such as in time-series analysis or spatial data analysis.
1313+
1314+
See Also
1315+
--------
1316+
arrays.IntervalArray.left : Return the left endpoints of each Interval in
1317+
the IntervalArray as an Index.
1318+
arrays.IntervalArray.right : Return the right endpoints of each Interval in
1319+
the IntervalArray as an Index.
1320+
arrays.IntervalArray.mid : Return the midpoint of each Interval in the
1321+
IntervalArray as an Index.
1322+
13091323
Examples
13101324
--------
13111325

pandas/core/dtypes/common.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,7 @@ def is_period_dtype(arr_or_dtype) -> bool:
430430
Check whether an array-like or dtype is of the Period dtype.
431431
432432
.. deprecated:: 2.2.0
433-
Use isinstance(dtype, pd.Period) instead.
433+
Use isinstance(dtype, pd.PeriodDtype) instead.
434434
435435
Parameters
436436
----------

pandas/core/frame.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5009,7 +5009,7 @@ def assign(self, **kwargs) -> DataFrame:
50095009
50105010
Parameters
50115011
----------
5012-
**kwargs : dict of {str: callable or Series}
5012+
**kwargs : callable or Series
50135013
The column names are keywords. If the values are
50145014
callable, they are computed on the DataFrame and
50155015
assigned to the new columns. The callable must not

pandas/core/generic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -665,7 +665,7 @@ def size(self) -> int:
665665
666666
See Also
667667
--------
668-
ndarray.size : Number of elements in the array.
668+
numpy.ndarray.size : Number of elements in the array.
669669
670670
Examples
671671
--------

pandas/core/groupby/groupby.py

Lines changed: 23 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1393,7 +1393,7 @@ def _aggregate_with_numba(self, func, *args, engine_kwargs=None, **kwargs):
13931393
# -----------------------------------------------------------------
13941394
# apply/agg/transform
13951395

1396-
def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT:
1396+
def apply(self, func, *args, include_groups: bool = False, **kwargs) -> NDFrameT:
13971397
"""
13981398
Apply function ``func`` group-wise and combine the results together.
13991399
@@ -1419,18 +1419,17 @@ def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT:
14191419
*args : tuple
14201420
Optional positional arguments to pass to ``func``.
14211421
1422-
include_groups : bool, default True
1422+
include_groups : bool, default False
14231423
When True, will attempt to apply ``func`` to the groupings in
14241424
the case that they are columns of the DataFrame. If this raises a
14251425
TypeError, the result will be computed with the groupings excluded.
14261426
When False, the groupings will be excluded when applying ``func``.
14271427
14281428
.. versionadded:: 2.2.0
14291429
1430-
.. deprecated:: 2.2.0
1430+
.. versionchanged:: 3.0.0
14311431
1432-
Setting include_groups to True is deprecated. Only the value
1433-
False will be allowed in a future version of pandas.
1432+
The default changed from True to False, and True is no longer allowed.
14341433
14351434
**kwargs : dict
14361435
Optional keyword arguments to pass to ``func``.
@@ -1520,7 +1519,7 @@ def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT:
15201519
each group together into a Series, including setting the index as
15211520
appropriate:
15221521
1523-
>>> g1.apply(lambda x: x.C.max() - x.B.min(), include_groups=False)
1522+
>>> g1.apply(lambda x: x.C.max() - x.B.min())
15241523
A
15251524
a 5
15261525
b 2
@@ -1529,11 +1528,13 @@ def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT:
15291528
Example 4: The function passed to ``apply`` returns ``None`` for one of the
15301529
group. This group is filtered from the result:
15311530
1532-
>>> g1.apply(lambda x: None if x.iloc[0, 0] == 3 else x, include_groups=False)
1531+
>>> g1.apply(lambda x: None if x.iloc[0, 0] == 3 else x)
15331532
B C
15341533
0 1 4
15351534
1 2 6
15361535
"""
1536+
if include_groups:
1537+
raise ValueError("include_groups=True is no longer allowed.")
15371538
if isinstance(func, str):
15381539
if hasattr(self, func):
15391540
res = getattr(self, func)
@@ -1560,33 +1561,7 @@ def f(g):
15601561
else:
15611562
f = func
15621563

1563-
if not include_groups:
1564-
return self._python_apply_general(f, self._obj_with_exclusions)
1565-
1566-
try:
1567-
result = self._python_apply_general(f, self._selected_obj)
1568-
if (
1569-
not isinstance(self.obj, Series)
1570-
and self._selection is None
1571-
and self._selected_obj.shape != self._obj_with_exclusions.shape
1572-
):
1573-
warnings.warn(
1574-
message=_apply_groupings_depr.format(type(self).__name__, "apply"),
1575-
category=DeprecationWarning,
1576-
stacklevel=find_stack_level(),
1577-
)
1578-
except TypeError:
1579-
# gh-20949
1580-
# try again, with .apply acting as a filtering
1581-
# operation, by excluding the grouping column
1582-
# This would normally not be triggered
1583-
# except if the udf is trying an operation that
1584-
# fails on *some* columns, e.g. a numeric operation
1585-
# on a string grouper column
1586-
1587-
return self._python_apply_general(f, self._obj_with_exclusions)
1588-
1589-
return result
1564+
return self._python_apply_general(f, self._obj_with_exclusions)
15901565

15911566
@final
15921567
def _python_apply_general(
@@ -3424,7 +3399,9 @@ def describe(
34243399
return result
34253400

34263401
@final
3427-
def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resampler:
3402+
def resample(
3403+
self, rule, *args, include_groups: bool = False, **kwargs
3404+
) -> Resampler:
34283405
"""
34293406
Provide resampling when using a TimeGrouper.
34303407
@@ -3449,10 +3426,9 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp
34493426
34503427
.. versionadded:: 2.2.0
34513428
3452-
.. deprecated:: 2.2.0
3429+
.. versionchanged:: 3.0
34533430
3454-
Setting include_groups to True is deprecated. Only the value
3455-
False will be allowed in a future version of pandas.
3431+
The default was changed to False, and True is no longer allowed.
34563432
34573433
**kwargs
34583434
Possible arguments are `how`, `fill_method`, `limit`, `kind` and
@@ -3485,7 +3461,7 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp
34853461
Downsample the DataFrame into 3 minute bins and sum the values of
34863462
the timestamps falling into a bin.
34873463
3488-
>>> df.groupby("a").resample("3min", include_groups=False).sum()
3464+
>>> df.groupby("a").resample("3min").sum()
34893465
b
34903466
a
34913467
0 2000-01-01 00:00:00 2
@@ -3494,7 +3470,7 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp
34943470
34953471
Upsample the series into 30 second bins.
34963472
3497-
>>> df.groupby("a").resample("30s", include_groups=False).sum()
3473+
>>> df.groupby("a").resample("30s").sum()
34983474
b
34993475
a
35003476
0 2000-01-01 00:00:00 1
@@ -3508,7 +3484,7 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp
35083484
35093485
Resample by month. Values are assigned to the month of the period.
35103486
3511-
>>> df.groupby("a").resample("ME", include_groups=False).sum()
3487+
>>> df.groupby("a").resample("ME").sum()
35123488
b
35133489
a
35143490
0 2000-01-31 3
@@ -3517,11 +3493,7 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp
35173493
Downsample the series into 3 minute bins as above, but close the right
35183494
side of the bin interval.
35193495
3520-
>>> (
3521-
... df.groupby("a")
3522-
... .resample("3min", closed="right", include_groups=False)
3523-
... .sum()
3524-
... )
3496+
>>> (df.groupby("a").resample("3min", closed="right").sum())
35253497
b
35263498
a
35273499
0 1999-12-31 23:57:00 1
@@ -3532,11 +3504,7 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp
35323504
the bin interval, but label each bin using the right edge instead of
35333505
the left.
35343506
3535-
>>> (
3536-
... df.groupby("a")
3537-
... .resample("3min", closed="right", label="right", include_groups=False)
3538-
... .sum()
3539-
... )
3507+
>>> (df.groupby("a").resample("3min", closed="right", label="right").sum())
35403508
b
35413509
a
35423510
0 2000-01-01 00:00:00 1
@@ -3545,11 +3513,10 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp
35453513
"""
35463514
from pandas.core.resample import get_resampler_for_grouping
35473515

3548-
# mypy flags that include_groups could be specified via `*args` or `**kwargs`
3549-
# GH#54961 would resolve.
3550-
return get_resampler_for_grouping( # type: ignore[misc]
3551-
self, rule, *args, include_groups=include_groups, **kwargs
3552-
)
3516+
if include_groups:
3517+
raise ValueError("include_groups=True is no longer allowed.")
3518+
3519+
return get_resampler_for_grouping(self, rule, *args, **kwargs)
35533520

35543521
@final
35553522
def rolling(
@@ -5561,13 +5528,3 @@ def _insert_quantile_level(idx: Index, qs: npt.NDArray[np.float64]) -> MultiInde
55615528
mi = MultiIndex(levels=levels, codes=codes, names=[idx.name, None])
55625529

55635530
return mi
5564-
5565-
5566-
# GH#7155
5567-
_apply_groupings_depr = (
5568-
"{}.{} operated on the grouping columns. This behavior is deprecated, "
5569-
"and in a future version of pandas the grouping columns will be excluded "
5570-
"from the operation. Either pass `include_groups=False` to exclude the "
5571-
"groupings or explicitly select the grouping columns after groupby to silence "
5572-
"this warning."
5573-
)

0 commit comments

Comments
 (0)