Skip to content

Commit 7a0c3bd

Browse files
authored
Merge pull request #190 from lincc-frameworks/flat-chunked-array-fix
Fix issue with flat chunked arrays
2 parents 77a816d + 610853b commit 7a0c3bd

File tree

2 files changed

+32
-1
lines changed

2 files changed

+32
-1
lines changed

src/nested_pandas/series/packer.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -293,10 +293,17 @@ def view_sorted_series_as_list_array(
293293
if unique_index is None:
294294
unique_index = series.index[offset[:-1]]
295295

296+
# Input series may be represented by pyarrow.ChunkedArray, in this case pa.array(series) would fail
297+
# with "TypeError: Cannot convert a 'ChunkedArray' to a 'ListArray'".
298+
# https://github.com/lincc-frameworks/nested-pandas/issues/189
299+
flat_array = pa.array(series, from_pandas=True)
300+
if isinstance(flat_array, pa.ChunkedArray):
301+
flat_array = flat_array.combine_chunks()
296302
list_array = pa.ListArray.from_arrays(
297303
offset,
298-
pa.array(series, from_pandas=True),
304+
flat_array,
299305
)
306+
300307
return pd.Series(
301308
list_array,
302309
dtype=pd.ArrowDtype(list_array.type),

tests/nested_pandas/series/test_packer.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -524,6 +524,30 @@ def test_view_sorted_series_as_list_array_raises_when_not_sorted():
524524
packer.view_sorted_series_as_list_array(series)
525525

526526

527+
def test_view_sorted_series_as_list_array_chunked_input():
528+
"""Issue #189
529+
530+
https://github.com/lincc-frameworks/nested-pandas/issues/189
531+
"""
532+
series = pd.Series(
533+
pa.chunked_array([pa.array([0, 1, 2]), pa.array([None, 4])]),
534+
name="a",
535+
index=np.arange(5),
536+
dtype=pd.ArrowDtype(pa.int64()),
537+
)
538+
offset = np.array([0, 2, 4, 5])
539+
unique_index = ["x", "y", "z"]
540+
desired = pd.Series(
541+
pa.array([[0, 1], [2, None], [4]]),
542+
index=unique_index,
543+
dtype=pd.ArrowDtype(pa.list_(pa.int64())),
544+
name="a",
545+
)
546+
547+
actual = packer.view_sorted_series_as_list_array(series, offset, unique_index)
548+
assert_series_equal(actual, desired)
549+
550+
527551
@pytest.mark.parametrize(
528552
"index,offsets",
529553
[

0 commit comments

Comments
 (0)