Skip to content

Commit 22f12fc

Browse files
DEPS: bump pyarrow minimum version from 10.0 to 12.0 (#61723)
1 parent 3550556 commit 22f12fc

35 files changed

+112
-201
lines changed

ci/deps/actions-310-minimum_versions.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ dependencies:
4141
- qtpy=2.3.0
4242
- openpyxl=3.1.2
4343
- psycopg2=2.9.6
44-
- pyarrow=10.0.1
44+
- pyarrow=12.0.1
4545
- pyiceberg=0.7.1
4646
- pymysql=1.1.0
4747
- pyqt=5.15.9

ci/deps/actions-310.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ dependencies:
3939
- qtpy>=2.3.0
4040
- openpyxl>=3.1.2
4141
- psycopg2>=2.9.6
42-
- pyarrow>=10.0.1
42+
- pyarrow>=12.0.1
4343
- pyiceberg>=0.7.1
4444
- pymysql>=1.1.0
4545
- pyqt>=5.15.9

ci/deps/actions-311-downstream_compat.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ dependencies:
4040
- qtpy>=2.3.0
4141
- openpyxl>=3.1.2
4242
- psycopg2>=2.9.6
43-
- pyarrow>=10.0.1
43+
- pyarrow>=12.0.1
4444
- pyiceberg>=0.7.1
4545
- pymysql>=1.1.0
4646
- pyqt>=5.15.9

ci/deps/actions-311.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ dependencies:
4040
- pyqt>=5.15.9
4141
- openpyxl>=3.1.2
4242
- psycopg2>=2.9.6
43-
- pyarrow>=10.0.1
43+
- pyarrow>=12.0.1
4444
- pyiceberg>=0.7.1
4545
- pymysql>=1.1.0
4646
- pyreadstat>=1.2.6

ci/deps/actions-312.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ dependencies:
4040
- pyqt>=5.15.9
4141
- openpyxl>=3.1.2
4242
- psycopg2>=2.9.6
43-
- pyarrow>=10.0.1
43+
- pyarrow>=12.0.1
4444
- pyiceberg>=0.7.1
4545
- pymysql>=1.1.0
4646
- pyreadstat>=1.2.6

ci/deps/actions-313.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ dependencies:
4141
- pyqt>=5.15.9
4242
- openpyxl>=3.1.2
4343
- psycopg2>=2.9.6
44-
- pyarrow>=10.0.1
44+
- pyarrow>=12.0.1
4545
- pymysql>=1.1.0
4646
- pyreadstat>=1.2.6
4747
- pytables>=3.8.0

doc/source/getting_started/install.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,7 @@ Dependency Minimum Version pip ex
307307
`PyTables <https://github.com/PyTables/PyTables>`__ 3.8.0 hdf5 HDF5-based reading / writing
308308
`zlib <https://github.com/madler/zlib>`__ hdf5 Compression for HDF5
309309
`fastparquet <https://github.com/dask/fastparquet>`__ 2024.2.0 - Parquet reading / writing (pyarrow is default)
310-
`pyarrow <https://github.com/apache/arrow>`__ 10.0.1 parquet, feather Parquet, ORC, and feather reading / writing
310+
`pyarrow <https://github.com/apache/arrow>`__ 12.0.1 parquet, feather Parquet, ORC, and feather reading / writing
311311
`PyIceberg <https://py.iceberg.apache.org/>`__ 0.7.1 iceberg Apache Iceberg reading / writing
312312
`pyreadstat <https://github.com/Roche/pyreadstat>`__ 1.2.6 spss SPSS files (.sav) reading
313313
`odfpy <https://github.com/eea/odfpy>`__ 1.4.1 excel Open document format (.odf, .ods, .odt) reading / writing

doc/source/whatsnew/v3.0.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,8 @@ Optional libraries below the lowest tested version may still work, but are not c
321321
+------------------------+---------------------+
322322
| Package | New Minimum Version |
323323
+========================+=====================+
324+
| pyarrow | 12.0.1 |
325+
+------------------------+---------------------+
324326
| pytz | 2023.4 |
325327
+------------------------+---------------------+
326328
| fastparquet | 2024.2.0 |

environment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ dependencies:
4343
- openpyxl>=3.1.2
4444
- odfpy>=1.4.1
4545
- psycopg2>=2.9.6
46-
- pyarrow>=10.0.1
46+
- pyarrow>=12.0.1
4747
- pyiceberg>=0.7.1
4848
- pymysql>=1.1.0
4949
- pyreadstat>=1.2.6

pandas/_testing/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
set_locale,
1919
)
2020

21-
from pandas.compat import pa_version_under10p1
21+
from pandas.compat import HAS_PYARROW
2222

2323
import pandas as pd
2424
from pandas import (
@@ -183,7 +183,7 @@
183183
]
184184
]
185185

186-
if not pa_version_under10p1:
186+
if HAS_PYARROW:
187187
import pyarrow as pa
188188

189189
UNSIGNED_INT_PYARROW_DTYPES = [pa.uint8(), pa.uint16(), pa.uint32(), pa.uint64()]

pandas/compat/__init__.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,7 @@
2626
from pandas.compat.numpy import is_numpy_dev
2727
from pandas.compat.pyarrow import (
2828
HAS_PYARROW,
29-
pa_version_under10p1,
30-
pa_version_under11p0,
29+
pa_version_under12p1,
3130
pa_version_under13p0,
3231
pa_version_under14p0,
3332
pa_version_under14p1,
@@ -160,8 +159,7 @@ def is_ci_environment() -> bool:
160159
"PYPY",
161160
"WASM",
162161
"is_numpy_dev",
163-
"pa_version_under10p1",
164-
"pa_version_under11p0",
162+
"pa_version_under12p1",
165163
"pa_version_under13p0",
166164
"pa_version_under14p0",
167165
"pa_version_under14p1",

pandas/compat/_optional.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
"openpyxl": "3.1.2",
3939
"psycopg2": "2.9.6", # (dt dec pq3 ext lo64)
4040
"pymysql": "1.1.0",
41-
"pyarrow": "10.0.1",
41+
"pyarrow": "12.0.1",
4242
"pyiceberg": "0.7.1",
4343
"pyreadstat": "1.2.6",
4444
"pytest": "7.3.2",

pandas/compat/pyarrow.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,7 @@
88
import pyarrow as pa
99

1010
_palv = Version(Version(pa.__version__).base_version)
11-
pa_version_under10p1 = _palv < Version("10.0.1")
12-
pa_version_under11p0 = _palv < Version("11.0.0")
13-
pa_version_under12p0 = _palv < Version("12.0.0")
11+
pa_version_under12p1 = _palv < Version("12.0.1")
1412
pa_version_under13p0 = _palv < Version("13.0.0")
1513
pa_version_under14p0 = _palv < Version("14.0.0")
1614
pa_version_under14p1 = _palv < Version("14.0.1")
@@ -20,11 +18,9 @@
2018
pa_version_under18p0 = _palv < Version("18.0.0")
2119
pa_version_under19p0 = _palv < Version("19.0.0")
2220
pa_version_under20p0 = _palv < Version("20.0.0")
23-
HAS_PYARROW = True
21+
HAS_PYARROW = _palv >= Version("12.0.1")
2422
except ImportError:
25-
pa_version_under10p1 = True
26-
pa_version_under11p0 = True
27-
pa_version_under12p0 = True
23+
pa_version_under12p1 = True
2824
pa_version_under13p0 = True
2925
pa_version_under14p0 = True
3026
pa_version_under14p1 = True

pandas/core/arrays/_arrow_string_mixins.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,12 @@
1212

1313
from pandas._libs import lib
1414
from pandas.compat import (
15-
pa_version_under10p1,
16-
pa_version_under11p0,
15+
HAS_PYARROW,
1716
pa_version_under13p0,
1817
pa_version_under17p0,
1918
)
2019

21-
if not pa_version_under10p1:
20+
if HAS_PYARROW:
2221
import pyarrow as pa
2322
import pyarrow.compute as pc
2423

@@ -132,7 +131,7 @@ def _str_get(self, i: int) -> Self:
132131
def _str_slice(
133132
self, start: int | None = None, stop: int | None = None, step: int | None = None
134133
) -> Self:
135-
if pa_version_under11p0:
134+
if pa_version_under13p0:
136135
# GH#59724
137136
result = self._apply_elementwise(lambda val: val[start:stop:step])
138137
return type(self)(pa.chunked_array(result, type=self._pa_array.type))

pandas/core/arrays/arrow/accessors.py

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,11 @@
1111
cast,
1212
)
1313

14-
from pandas.compat import (
15-
pa_version_under10p1,
16-
pa_version_under11p0,
17-
)
14+
from pandas.compat import HAS_PYARROW
1815

1916
from pandas.core.dtypes.common import is_list_like
2017

21-
if not pa_version_under10p1:
18+
if HAS_PYARROW:
2219
import pyarrow as pa
2320
import pyarrow.compute as pc
2421

@@ -46,7 +43,7 @@ def _is_valid_pyarrow_dtype(self, pyarrow_dtype) -> bool:
4643

4744
def _validate(self, data) -> None:
4845
dtype = data.dtype
49-
if pa_version_under10p1 or not isinstance(dtype, ArrowDtype):
46+
if not HAS_PYARROW or not isinstance(dtype, ArrowDtype):
5047
# Raise AttributeError so that inspect can handle non-struct Series.
5148
raise AttributeError(self._validation_msg.format(dtype=dtype))
5249

@@ -171,11 +168,6 @@ def __getitem__(self, key: int | slice) -> Series:
171168
name=self._data.name,
172169
)
173170
elif isinstance(key, slice):
174-
if pa_version_under11p0:
175-
raise NotImplementedError(
176-
f"List slice not supported by pyarrow {pa.__version__}."
177-
)
178-
179171
# TODO: Support negative start/stop/step, ideally this would be added
180172
# upstream in pyarrow.
181173
start, stop, step = key.start, key.stop, key.step

pandas/core/arrays/arrow/array.py

Lines changed: 7 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@
2222
timezones,
2323
)
2424
from pandas.compat import (
25-
pa_version_under10p1,
26-
pa_version_under11p0,
25+
HAS_PYARROW,
26+
pa_version_under12p1,
2727
pa_version_under13p0,
2828
)
2929
from pandas.util._decorators import doc
@@ -74,7 +74,7 @@
7474
from pandas.io._util import _arrow_dtype_mapping
7575
from pandas.tseries.frequencies import to_offset
7676

77-
if not pa_version_under10p1:
77+
if HAS_PYARROW:
7878
import pyarrow as pa
7979
import pyarrow.compute as pc
8080

@@ -208,16 +208,6 @@ def floordiv_compat(
208208
from pandas.core.arrays.timedeltas import TimedeltaArray
209209

210210

211-
def get_unit_from_pa_dtype(pa_dtype) -> str:
212-
# https://github.com/pandas-dev/pandas/pull/50998#discussion_r1100344804
213-
if pa_version_under11p0:
214-
unit = str(pa_dtype).split("[", 1)[-1][:-1]
215-
if unit not in ["s", "ms", "us", "ns"]:
216-
raise ValueError(pa_dtype)
217-
return unit
218-
return pa_dtype.unit
219-
220-
221211
def to_pyarrow_type(
222212
dtype: ArrowDtype | pa.DataType | Dtype | None,
223213
) -> pa.DataType | None:
@@ -300,7 +290,7 @@ class ArrowExtensionArray(
300290
_dtype: ArrowDtype
301291

302292
def __init__(self, values: pa.Array | pa.ChunkedArray) -> None:
303-
if pa_version_under10p1:
293+
if pa_version_under12p1:
304294
msg = "pyarrow>=10.0.1 is required for PyArrow backed ArrowExtensionArray."
305295
raise ImportError(msg)
306296
if isinstance(values, pa.Array):
@@ -1199,10 +1189,6 @@ def factorize(
11991189
null_encoding = "mask" if use_na_sentinel else "encode"
12001190

12011191
data = self._pa_array
1202-
pa_type = data.type
1203-
if pa_version_under11p0 and pa.types.is_duration(pa_type):
1204-
# https://github.com/apache/arrow/issues/15226#issuecomment-1376578323
1205-
data = data.cast(pa.int64())
12061192

12071193
if pa.types.is_dictionary(data.type):
12081194
if null_encoding == "encode":
@@ -1227,8 +1213,6 @@ def factorize(
12271213
)
12281214
uniques = type(self)(combined.dictionary)
12291215

1230-
if pa_version_under11p0 and pa.types.is_duration(pa_type):
1231-
uniques = cast(ArrowExtensionArray, uniques.astype(self.dtype))
12321216
return indices, uniques
12331217

12341218
def reshape(self, *args, **kwargs):
@@ -1515,19 +1499,7 @@ def unique(self) -> Self:
15151499
-------
15161500
ArrowExtensionArray
15171501
"""
1518-
pa_type = self._pa_array.type
1519-
1520-
if pa_version_under11p0 and pa.types.is_duration(pa_type):
1521-
# https://github.com/apache/arrow/issues/15226#issuecomment-1376578323
1522-
data = self._pa_array.cast(pa.int64())
1523-
else:
1524-
data = self._pa_array
1525-
1526-
pa_result = pc.unique(data)
1527-
1528-
if pa_version_under11p0 and pa.types.is_duration(pa_type):
1529-
pa_result = pa_result.cast(pa_type)
1530-
1502+
pa_result = pc.unique(self._pa_array)
15311503
return type(self)(pa_result)
15321504

15331505
def value_counts(self, dropna: bool = True) -> Series:
@@ -1547,18 +1519,12 @@ def value_counts(self, dropna: bool = True) -> Series:
15471519
--------
15481520
Series.value_counts
15491521
"""
1550-
pa_type = self._pa_array.type
1551-
if pa_version_under11p0 and pa.types.is_duration(pa_type):
1552-
# https://github.com/apache/arrow/issues/15226#issuecomment-1376578323
1553-
data = self._pa_array.cast(pa.int64())
1554-
else:
1555-
data = self._pa_array
1556-
15571522
from pandas import (
15581523
Index,
15591524
Series,
15601525
)
15611526

1527+
data = self._pa_array
15621528
vc = data.value_counts()
15631529

15641530
values = vc.field(0)
@@ -1568,9 +1534,6 @@ def value_counts(self, dropna: bool = True) -> Series:
15681534
values = values.filter(mask)
15691535
counts = counts.filter(mask)
15701536

1571-
if pa_version_under11p0 and pa.types.is_duration(pa_type):
1572-
values = values.cast(pa_type)
1573-
15741537
counts = ArrowExtensionArray(counts)
15751538

15761539
index = Index(type(self)(values))
@@ -1864,8 +1827,7 @@ def pyarrow_meth(data, skip_nulls, min_count=0): # type: ignore[misc]
18641827
if pa.types.is_duration(pa_type):
18651828
result = result.cast(pa_type)
18661829
elif pa.types.is_time(pa_type):
1867-
unit = get_unit_from_pa_dtype(pa_type)
1868-
result = result.cast(pa.duration(unit))
1830+
result = result.cast(pa.duration(pa_type.unit))
18691831
elif pa.types.is_date(pa_type):
18701832
# go with closest available unit, i.e. "s"
18711833
result = result.cast(pa.duration("s"))

pandas/core/arrays/string_.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
from pandas._libs.lib import ensure_string_array
2626
from pandas.compat import (
2727
HAS_PYARROW,
28-
pa_version_under10p1,
28+
pa_version_under12p1,
2929
)
3030
from pandas.compat.numpy import function as nv
3131
from pandas.util._decorators import (
@@ -182,9 +182,9 @@ def __init__(
182182
raise ValueError(
183183
f"Storage must be 'python' or 'pyarrow'. Got {storage} instead."
184184
)
185-
if storage == "pyarrow" and pa_version_under10p1:
185+
if storage == "pyarrow" and pa_version_under12p1:
186186
raise ImportError(
187-
"pyarrow>=10.0.1 is required for PyArrow backed StringArray."
187+
"pyarrow>=12.0.1 is required for PyArrow backed StringArray."
188188
)
189189

190190
if isinstance(na_value, float) and np.isnan(na_value):

pandas/core/arrays/string_arrow.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414
missing as libmissing,
1515
)
1616
from pandas.compat import (
17-
pa_version_under10p1,
17+
HAS_PYARROW,
18+
pa_version_under12p1,
1819
pa_version_under13p0,
1920
pa_version_under16p0,
2021
)
@@ -38,7 +39,7 @@
3839
)
3940
from pandas.core.strings.object_array import ObjectStringArrayMixin
4041

41-
if not pa_version_under10p1:
42+
if HAS_PYARROW:
4243
import pyarrow as pa
4344
import pyarrow.compute as pc
4445

@@ -63,8 +64,8 @@
6364

6465

6566
def _chk_pyarrow_available() -> None:
66-
if pa_version_under10p1:
67-
msg = "pyarrow>=10.0.1 is required for PyArrow backed ArrowExtensionArray."
67+
if pa_version_under12p1:
68+
msg = "pyarrow>=12.0.1 is required for PyArrow backed ArrowExtensionArray."
6869
raise ImportError(msg)
6970

7071

0 commit comments

Comments
 (0)