Skip to content

Commit a8bb0f3

Browse files
committed
Merge upstream/main into fix-issue-59846
2 parents 183a686 + 3e8a878 commit a8bb0f3

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+309
-193
lines changed

.pre-commit-config.yaml

+6-6
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
minimum_pre_commit_version: 2.15.0
1+
minimum_pre_commit_version: 4.0.0
22
exclude: ^LICENSES/|\.(html|csv|svg)$
33
# reserve "manual" for relatively slow hooks which we still want to run in CI
44
default_stages: [
@@ -19,13 +19,13 @@ ci:
1919
skip: [pyright, mypy]
2020
repos:
2121
- repo: https://github.com/astral-sh/ruff-pre-commit
22-
rev: v0.9.9
22+
rev: v0.11.4
2323
hooks:
2424
- id: ruff
2525
args: [--exit-non-zero-on-fix]
2626
exclude: ^pandas/tests/frame/test_query_eval.py
2727
- id: ruff
28-
# TODO: remove autofixe-only rules when they are checked by ruff
28+
# TODO: remove autofix only rules when they are checked by ruff
2929
name: ruff-selected-autofixes
3030
alias: ruff-selected-autofixes
3131
files: ^pandas
@@ -34,7 +34,7 @@ repos:
3434
- id: ruff-format
3535
exclude: ^scripts|^pandas/tests/frame/test_query_eval.py
3636
- repo: https://github.com/jendrikseipp/vulture
37-
rev: 'v2.14'
37+
rev: v2.14
3838
hooks:
3939
- id: vulture
4040
entry: python scripts/run_vulture.py
@@ -95,14 +95,14 @@ repos:
9595
- id: sphinx-lint
9696
args: ["--enable", "all", "--disable", "line-too-long"]
9797
- repo: https://github.com/pre-commit/mirrors-clang-format
98-
rev: v19.1.7
98+
rev: v20.1.0
9999
hooks:
100100
- id: clang-format
101101
files: ^pandas/_libs/src|^pandas/_libs/include
102102
args: [-i]
103103
types_or: [c, c++]
104104
- repo: https://github.com/trim21/pre-commit-mirror-meson
105-
rev: v1.7.0
105+
rev: v1.7.2
106106
hooks:
107107
- id: meson-fmt
108108
args: ['--inplace']

asv_bench/benchmarks/frame_methods.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -517,7 +517,7 @@ def setup(self):
517517
self.df = DataFrame(np.random.randn(1000, 100))
518518

519519
self.s = Series(np.arange(1028.0))
520-
self.df2 = DataFrame({i: self.s for i in range(1028)})
520+
self.df2 = DataFrame(dict.fromkeys(range(1028), self.s))
521521
self.df3 = DataFrame(np.random.randn(1000, 3), columns=list("ABC"))
522522

523523
def time_apply_user_func(self):

ci/code_checks.sh

-3
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
7272
-i "pandas.Series.dt PR01" `# Accessors are implemented as classes, but we do not document the Parameters section` \
7373
-i "pandas.Period.freq GL08" \
7474
-i "pandas.Period.ordinal GL08" \
75-
-i "pandas.Timestamp.max PR02" \
76-
-i "pandas.Timestamp.min PR02" \
77-
-i "pandas.Timestamp.resolution PR02" \
7875
-i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \
7976
-i "pandas.core.groupby.SeriesGroupBy.plot PR02" \
8077
-i "pandas.core.resample.Resampler.quantile PR01,PR07" \

doc/source/whatsnew/v3.0.0.rst

+3
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ Other enhancements
6868
- :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`)
6969
- :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`)
7070
- :meth:`DataFrame.apply` supports using third-party execution engines like the Bodo.ai JIT compiler (:issue:`60668`)
71+
- :meth:`DataFrame.iloc` and :meth:`Series.iloc` now support boolean masks in ``__getitem__`` for more consistent indexing behavior (:issue:`60994`)
7172
- :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)
7273
- :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`)
7374
- :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
@@ -763,6 +764,7 @@ Plotting
763764
- Bug in :meth:`DataFrame.plot.line` raising ``ValueError`` when set both color and a ``dict`` style (:issue:`59461`)
764765
- Bug in :meth:`DataFrame.plot` that causes a shift to the right when the frequency multiplier is greater than one. (:issue:`57587`)
765766
- Bug in :meth:`DataFrameGroupBy.plot` with ``kind="scatter"`` where all groups used the same color instead of different colors for each group (:issue:`59846`)
767+
- Bug in :meth:`Series.plot` preventing a line and scatter plot from being aligned (:issue:`61005`)
766768
- Bug in :meth:`Series.plot` with ``kind="pie"`` with :class:`ArrowDtype` (:issue:`59192`)
767769

768770
Groupby/resample/rolling
@@ -774,6 +776,7 @@ Groupby/resample/rolling
774776
- Bug in :meth:`.DataFrameGroupBy.quantile` when ``interpolation="nearest"`` is inconsistent with :meth:`DataFrame.quantile` (:issue:`47942`)
775777
- Bug in :meth:`.Resampler.interpolate` on a :class:`DataFrame` with non-uniform sampling and/or indices not aligning with the resulting resampled index would result in wrong interpolation (:issue:`21351`)
776778
- Bug in :meth:`DataFrame.ewm` and :meth:`Series.ewm` when passed ``times`` and aggregation functions other than mean (:issue:`51695`)
779+
- Bug in :meth:`DataFrame.resample` and :meth:`Series.resample` were not keeping the index name when the index had :class:`ArrowDtype` timestamp dtype (:issue:`61222`)
777780
- Bug in :meth:`DataFrame.resample` changing index type to :class:`MultiIndex` when the dataframe is empty and using an upsample method (:issue:`55572`)
778781
- Bug in :meth:`DataFrameGroupBy.agg` that raises ``AttributeError`` when there is dictionary input and duplicated columns, instead of returning a DataFrame with the aggregation of all duplicate columns. (:issue:`55041`)
779782
- Bug in :meth:`DataFrameGroupBy.apply` and :meth:`SeriesGroupBy.apply` for empty data frame with ``group_keys=False`` still creating output index using group keys. (:issue:`60471`)

environment.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ dependencies:
8080
- flake8=7.1.0 # run in subprocess over docstring examples
8181
- mypy=1.13.0 # pre-commit uses locally installed mypy
8282
- tokenize-rt # scripts/check_for_inconsistent_pandas_namespace.py
83-
- pre-commit>=4.0.1
83+
- pre-commit>=4.2.0
8484

8585
# documentation
8686
- gitpython # obtain contributors from git for whatsnew

pandas/_libs/tslibs/timedeltas.pyi

+2-4
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ from typing import (
33
ClassVar,
44
Literal,
55
TypeAlias,
6-
TypeVar,
76
overload,
87
)
98

@@ -60,7 +59,6 @@ UnitChoices: TypeAlias = Literal[
6059
"nanos",
6160
"nanosecond",
6261
]
63-
_S = TypeVar("_S", bound=timedelta)
6462

6563
def get_unit_for_round(freq, creso: int) -> int: ...
6664
def disallow_ambiguous_unit(unit: str | None) -> None: ...
@@ -95,11 +93,11 @@ class Timedelta(timedelta):
9593
_value: int # np.int64
9694
# error: "__new__" must return a class instance (got "Union[Timestamp, NaTType]")
9795
def __new__( # type: ignore[misc]
98-
cls: type[_S],
96+
cls: type[Self],
9997
value=...,
10098
unit: str | None = ...,
10199
**kwargs: float | np.integer | np.floating,
102-
) -> _S | NaTType: ...
100+
) -> Self | NaTType: ...
103101
@classmethod
104102
def _from_value_and_reso(cls, value: np.int64, reso: int) -> Timedelta: ...
105103
@property

pandas/_libs/tslibs/timestamps.pyx

+77-7
Original file line numberDiff line numberDiff line change
@@ -200,8 +200,9 @@ class MinMaxReso:
200200
201201
See also: timedeltas.MinMaxReso
202202
"""
203-
def __init__(self, name):
203+
def __init__(self, name, docstring):
204204
self._name = name
205+
self.__doc__ = docstring
205206

206207
def __get__(self, obj, type=None):
207208
cls = Timestamp
@@ -216,11 +217,15 @@ class MinMaxReso:
216217

217218
if obj is None:
218219
# i.e. this is on the class, default to nanos
219-
return cls(val)
220+
result = cls(val)
220221
elif self._name == "resolution":
221-
return Timedelta._from_value_and_reso(val, obj._creso)
222+
result = Timedelta._from_value_and_reso(val, obj._creso)
222223
else:
223-
return Timestamp._from_value_and_reso(val, obj._creso, tz=None)
224+
result = Timestamp._from_value_and_reso(val, obj._creso, tz=None)
225+
226+
result.__doc__ = self.__doc__
227+
228+
return result
224229

225230
def __set__(self, obj, value):
226231
raise AttributeError(f"{self._name} is not settable.")
@@ -235,9 +240,74 @@ cdef class _Timestamp(ABCTimestamp):
235240
dayofweek = _Timestamp.day_of_week
236241
dayofyear = _Timestamp.day_of_year
237242

238-
min = MinMaxReso("min")
239-
max = MinMaxReso("max")
240-
resolution = MinMaxReso("resolution") # GH#21336, GH#21365
243+
_docstring_min = """
244+
Returns the minimum bound possible for Timestamp.
245+
246+
This property provides access to the smallest possible value that
247+
can be represented by a Timestamp object.
248+
249+
Returns
250+
-------
251+
Timestamp
252+
253+
See Also
254+
--------
255+
Timestamp.max: Returns the maximum bound possible for Timestamp.
256+
Timestamp.resolution: Returns the smallest possible difference between
257+
non-equal Timestamp objects.
258+
259+
Examples
260+
--------
261+
>>> pd.Timestamp.min
262+
Timestamp('1677-09-21 00:12:43.145224193')
263+
"""
264+
265+
_docstring_max = """
266+
Returns the maximum bound possible for Timestamp.
267+
268+
This property provides access to the largest possible value that
269+
can be represented by a Timestamp object.
270+
271+
Returns
272+
-------
273+
Timestamp
274+
275+
See Also
276+
--------
277+
Timestamp.min: Returns the minimum bound possible for Timestamp.
278+
Timestamp.resolution: Returns the smallest possible difference between
279+
non-equal Timestamp objects.
280+
281+
Examples
282+
--------
283+
>>> pd.Timestamp.max
284+
Timestamp('2262-04-11 23:47:16.854775807')
285+
"""
286+
287+
_docstring_reso = """
288+
Returns the smallest possible difference between non-equal Timestamp objects.
289+
290+
The resolution value is determined by the underlying representation of time
291+
units and is equivalent to Timedelta(nanoseconds=1).
292+
293+
Returns
294+
-------
295+
Timedelta
296+
297+
See Also
298+
--------
299+
Timestamp.max: Returns the maximum bound possible for Timestamp.
300+
Timestamp.min: Returns the minimum bound possible for Timestamp.
301+
302+
Examples
303+
--------
304+
>>> pd.Timestamp.resolution
305+
Timedelta('0 days 00:00:00.000000001')
306+
"""
307+
308+
min = MinMaxReso("min", _docstring_min)
309+
max = MinMaxReso("max", _docstring_max)
310+
resolution = MinMaxReso("resolution", _docstring_reso) # GH#21336, GH#21365
241311

242312
@property
243313
def value(self) -> int:

pandas/core/algorithms.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ def _reconstruct_data(
215215
values = cls._from_sequence(values, dtype=dtype) # type: ignore[assignment]
216216

217217
else:
218-
values = values.astype(dtype, copy=False)
218+
values = values.astype(dtype, copy=False) # type: ignore[assignment]
219219

220220
return values
221221

pandas/core/apply.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,7 @@ def transform(self) -> DataFrame | Series:
327327
if is_series:
328328
func = {com.get_callable_name(v) or v: v for v in func}
329329
else:
330-
func = {col: func for col in obj}
330+
func = dict.fromkeys(obj, func)
331331

332332
if is_dict_like(func):
333333
func = cast(AggFuncTypeDict, func)

pandas/core/array_algos/quantile.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ def quantile_with_mask(
102102
interpolation=interpolation,
103103
)
104104

105-
result = np.asarray(result)
105+
result = np.asarray(result) # type: ignore[assignment]
106106
result = result.T
107107

108108
return result
@@ -196,7 +196,7 @@ def _nanquantile(
196196
# Caller is responsible for ensuring mask shape match
197197
assert mask.shape == values.shape
198198
result = [
199-
_nanquantile_1d(val, m, qs, na_value, interpolation=interpolation)
199+
_nanquantile_1d(val, m, qs, na_value, interpolation=interpolation) # type: ignore[arg-type]
200200
for (val, m) in zip(list(values), list(mask))
201201
]
202202
if values.dtype.kind == "f":

pandas/core/arrays/_mixins.py

+1-7
Original file line numberDiff line numberDiff line change
@@ -142,18 +142,12 @@ def view(self, dtype: Dtype | None = None) -> ArrayLike:
142142

143143
dt64_values = arr.view(dtype)
144144
return DatetimeArray._simple_new(dt64_values, dtype=dtype)
145-
146145
elif lib.is_np_dtype(dtype, "m") and is_supported_dtype(dtype):
147146
from pandas.core.arrays import TimedeltaArray
148147

149148
td64_values = arr.view(dtype)
150149
return TimedeltaArray._simple_new(td64_values, dtype=dtype)
151-
152-
# error: Argument "dtype" to "view" of "_ArrayOrScalarCommon" has incompatible
153-
# type "Union[ExtensionDtype, dtype[Any]]"; expected "Union[dtype[Any], None,
154-
# type, _SupportsDType, str, Union[Tuple[Any, int], Tuple[Any, Union[int,
155-
# Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]]"
156-
return arr.view(dtype=dtype) # type: ignore[arg-type]
150+
return arr.view(dtype=dtype)
157151

158152
def take(
159153
self,

pandas/core/arrays/arrow/_arrow_utils.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def pyarrow_array_to_numpy_and_mask(
4444
mask = pyarrow.BooleanArray.from_buffers(
4545
pyarrow.bool_(), len(arr), [None, bitmask], offset=arr.offset
4646
)
47-
mask = np.asarray(mask)
47+
mask = np.asarray(mask) # type: ignore[assignment]
4848
else:
4949
mask = np.ones(len(arr), dtype=bool)
5050
return data, mask

pandas/core/arrays/arrow/array.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2540,7 +2540,7 @@ def _str_get_dummies(self, sep: str = "|", dtype: NpDtype | None = None):
25402540
dummies_dtype = np.bool_
25412541
dummies = np.zeros(n_rows * n_cols, dtype=dummies_dtype)
25422542
dummies[indices] = True
2543-
dummies = dummies.reshape((n_rows, n_cols))
2543+
dummies = dummies.reshape((n_rows, n_cols)) # type: ignore[assignment]
25442544
result = type(self)(pa.array(list(dummies)))
25452545
return result, uniques_sorted.to_pylist()
25462546

pandas/core/arrays/base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -596,7 +596,7 @@ def to_numpy(
596596
if copy or na_value is not lib.no_default:
597597
result = result.copy()
598598
if na_value is not lib.no_default:
599-
result[self.isna()] = na_value
599+
result[self.isna()] = na_value # type: ignore[index]
600600
return result
601601

602602
# ------------------------------------------------------------------------

pandas/core/arrays/categorical.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1853,7 +1853,7 @@ def value_counts(self, dropna: bool = True) -> Series:
18531853
count = np.bincount(obs, minlength=ncat or 0)
18541854
else:
18551855
count = np.bincount(np.where(mask, code, ncat))
1856-
ix = np.append(ix, -1)
1856+
ix = np.append(ix, -1) # type: ignore[assignment]
18571857

18581858
ix = coerce_indexer_dtype(ix, self.dtype.categories)
18591859
ix_categorical = self._from_backing_data(ix)

pandas/core/arrays/datetimelike.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2394,7 +2394,7 @@ def take(
23942394
)
23952395

23962396
indices = np.asarray(indices, dtype=np.intp)
2397-
maybe_slice = lib.maybe_indices_to_slice(indices, len(self))
2397+
maybe_slice = lib.maybe_indices_to_slice(indices, len(self)) # type: ignore[arg-type]
23982398

23992399
if isinstance(maybe_slice, slice):
24002400
freq = self._get_getitem_freq(maybe_slice)

pandas/core/arrays/datetimes.py

+3-6
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ def _simple_new( # type: ignore[override]
331331
else:
332332
# DatetimeTZDtype. If we have e.g. DatetimeTZDtype[us, UTC],
333333
# then values.dtype should be M8[us].
334-
assert dtype._creso == get_unit_from_dtype(values.dtype)
334+
assert dtype._creso == get_unit_from_dtype(values.dtype) # type: ignore[union-attr]
335335

336336
result = super()._simple_new(values, dtype)
337337
result._freq = freq
@@ -542,7 +542,7 @@ def _unbox_scalar(self, value) -> np.datetime64:
542542
raise ValueError("'value' should be a Timestamp.")
543543
self._check_compatible_with(value)
544544
if value is NaT:
545-
return np.datetime64(value._value, self.unit)
545+
return np.datetime64(value._value, self.unit) # type: ignore[call-overload]
546546
else:
547547
return value.as_unit(self.unit, round_ok=False).asm8
548548

@@ -813,10 +813,7 @@ def _add_offset(self, offset: BaseOffset) -> Self:
813813
try:
814814
res_values = offset._apply_array(values._ndarray)
815815
if res_values.dtype.kind == "i":
816-
# error: Argument 1 to "view" of "ndarray" has incompatible type
817-
# "dtype[datetime64] | DatetimeTZDtype"; expected
818-
# "dtype[Any] | type[Any] | _SupportsDType[dtype[Any]]"
819-
res_values = res_values.view(values.dtype) # type: ignore[arg-type]
816+
res_values = res_values.view(values.dtype)
820817
except NotImplementedError:
821818
if get_option("performance_warnings"):
822819
warnings.warn(

pandas/core/arrays/masked.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -515,7 +515,7 @@ def tolist(self) -> list:
515515
if self.ndim > 1:
516516
return [x.tolist() for x in self]
517517
dtype = None if self._hasna else self._data.dtype
518-
return self.to_numpy(dtype=dtype, na_value=libmissing.NA).tolist()
518+
return self.to_numpy(dtype=dtype, na_value=libmissing.NA).tolist() # type: ignore[return-value]
519519

520520
@overload
521521
def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray: ...
@@ -1497,10 +1497,10 @@ def all(
14971497
result = values.all(axis=axis)
14981498

14991499
if skipna:
1500-
return result
1500+
return result # type: ignore[return-value]
15011501
else:
15021502
if not result or len(self) == 0 or not self._mask.any():
1503-
return result
1503+
return result # type: ignore[return-value]
15041504
else:
15051505
return self.dtype.na_value
15061506

pandas/core/arrays/sparse/scipy_sparse.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def _levels_to_axis(
7979
ax_coords = codes[valid_ilocs]
8080

8181
ax_labels = ax_labels.tolist()
82-
return ax_coords, ax_labels
82+
return ax_coords, ax_labels # pyright: ignore[reportReturnType]
8383

8484

8585
def _to_ijv(

0 commit comments

Comments
 (0)