Skip to content

Commit 4186d56

Browse files
authored
Lower pyarrow requirement from 18 to 16 (#328)
* Lower pyarrow requirement from 18 to 17 This required a shim function, which should be removed in the future, when we finally drop old pyarrow versions Fixes #326 * Lower pyarrow req to 16 * do what Melissa said
1 parent b176739 commit 4186d56

File tree

4 files changed

+32
-6
lines changed

4 files changed

+32
-6
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ dependencies = [
2121
"numpy>=2",
2222
# We use internal pd._libs.missing and experimental ArrowExtensionArray
2323
"pandas>=2.2.3,<2.4",
24-
"pyarrow>=18",
24+
"pyarrow>=16", # remove struct_field_names when upgraded to 18+
2525
"universal_pathlib>=0.2",
2626
]
2727

src/nested_pandas/series/ext_array.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
chunk_lengths,
6060
is_pa_type_a_list,
6161
rechunk,
62+
struct_field_names,
6263
transpose_struct_list_type,
6364
)
6465

@@ -186,7 +187,7 @@ def convert_df_to_pa_scalar(df: pd.DataFrame, *, pa_type: pa.StructType | None)
186187
types = {}
187188
columns = df.columns
188189
if pa_type is not None:
189-
names = pa_type.names
190+
names = struct_field_names(pa_type)
190191
columns = names + list(set(columns) - set(names))
191192
for column in columns:
192193
series = df[column]
@@ -913,7 +914,7 @@ def list_offsets(self) -> pa.Array:
913914
@property
914915
def field_names(self) -> list[str]:
915916
"""Names of the nested columns"""
916-
return [field for field in self._pyarrow_dtype.names]
917+
return struct_field_names(self._pyarrow_dtype)
917918

918919
@property
919920
def list_lengths(self) -> np.ndarray:

src/nested_pandas/series/utils.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,15 @@
1111
from nested_pandas.series.dtype import NestedDtype
1212

1313

14+
def struct_field_names(struct_type: pa.StructType) -> list[str]:
15+
"""Return field names for a pyarrow.StructType in a pyarrow<18-compatible way.
16+
17+
Note: Once we bump our pyarrow requirement to ">=18", this helper can be
18+
replaced with direct usage of ``struct_type.names`` throughout the codebase.
19+
"""
20+
return [f.name for f in struct_type]
21+
22+
1423
def is_pa_type_a_list(pa_type: pa.DataType) -> bool:
1524
"""Check if the given pyarrow type is a list type.
1625
@@ -149,7 +158,7 @@ def transpose_struct_list_array(array: pa.StructArray, validate: bool = True) ->
149158
struct_flat_array = pa.StructArray.from_arrays(
150159
# Select values within the offsets
151160
[field.values[field.offsets[0].as_py() : field.offsets[-1].as_py()] for field in array.flatten()],
152-
names=array.type.names,
161+
names=struct_field_names(array.type),
153162
)
154163
return pa.ListArray.from_arrays(
155164
offsets=offsets,
@@ -196,7 +205,7 @@ def transpose_list_struct_scalar(scalar: pa.ListScalar) -> pa.StructScalar:
196205
"""
197206
struct_type = transpose_list_struct_type(scalar.type)
198207
struct_scalar = pa.scalar(
199-
{field: scalar.values.field(field) for field in struct_type.names},
208+
{field.name: scalar.values.field(field.name) for field in struct_type},
200209
type=struct_type,
201210
)
202211
return cast(pa.StructScalar, struct_scalar)
@@ -265,7 +274,7 @@ def transpose_list_struct_array(array: pa.ListArray) -> pa.StructArray:
265274

266275
return pa.StructArray.from_arrays(
267276
arrays=fields,
268-
names=array.type.value_type.names,
277+
names=struct_field_names(array.type.value_type),
269278
mask=mask,
270279
)
271280

tests/nested_pandas/series/test_series_utils.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from nested_pandas import NestedDtype
55
from nested_pandas.series.utils import (
66
nested_types_mapper,
7+
struct_field_names,
78
transpose_list_struct_array,
89
transpose_list_struct_scalar,
910
transpose_list_struct_type,
@@ -125,6 +126,21 @@ def test_transpose_list_struct_scalar():
125126
assert actual == desired
126127

127128

129+
def test_struct_field_names():
130+
"""Test struct_field_names and guard against requirement bumps."""
131+
132+
# Otherwise, validate the shim works as expected (for pyarrow<=17 requirement)
133+
t = pa.struct(
134+
[
135+
pa.field("a", pa.list_(pa.int64())),
136+
pa.field("b", pa.list_(pa.float64())),
137+
pa.field("c", pa.list_(pa.string())),
138+
]
139+
)
140+
# Ensure we get names in the correct order
141+
assert struct_field_names(t) == ["a", "b", "c"]
142+
143+
128144
@pytest.mark.parametrize(
129145
"pa_type,is_nested",
130146
[

0 commit comments

Comments
 (0)