@@ -543,6 +543,32 @@ def test_get_list_series():
543
543
)
544
544
545
545
546
+ def test_get_list_series_multiple_chunks ():
547
+ """Test that .nest.get_list_series() works when underlying array is chunked"""
548
+ struct_array = pa .StructArray .from_arrays (
549
+ arrays = [
550
+ [np .array ([1 , 2 , 3 ]), np .array ([4 , 5 , 6 ])],
551
+ [np .array ([6 , 4 , 2 ]), np .array ([1 , 2 , 3 ])],
552
+ ],
553
+ names = ["a" , "b" ],
554
+ )
555
+ chunked_array = pa .chunked_array ([struct_array ] * 3 )
556
+ series = pd .Series (chunked_array , dtype = NestedDtype (chunked_array .type ), index = [5 , 7 , 9 , 11 , 13 , 15 ])
557
+ assert series .array .num_chunks == 3
558
+
559
+ lists = series .nest .get_list_series ("a" )
560
+
561
+ assert_series_equal (
562
+ lists ,
563
+ pd .Series (
564
+ data = [np .array ([1 , 2 , 3 ]), np .array ([4 , 5 , 6 ])] * 3 ,
565
+ dtype = pd .ArrowDtype (pa .list_ (pa .int64 ())),
566
+ index = [5 , 7 , 9 , 11 , 13 , 15 ],
567
+ name = "a" ,
568
+ ),
569
+ )
570
+
571
+
546
572
def test_get ():
547
573
"""Test .nest.get() which is implemented by the base class"""
548
574
series = pack_seq (
@@ -588,6 +614,33 @@ def test___getitem___single_field():
588
614
)
589
615
590
616
617
+ def test___getitem___single_field_multiple_chunks ():
618
+ """Reproduces issue 142
619
+
620
+ https://github.com/lincc-frameworks/nested-pandas/issues/142
621
+ """
622
+ struct_array = pa .StructArray .from_arrays (
623
+ arrays = [
624
+ [np .array ([1.0 , 2.0 , 3.0 ]), np .array ([1.0 , 2.0 , 1.0 ])],
625
+ [np .array ([4.0 , 5.0 , 6.0 ]), np .array ([3.0 , 4.0 , 5.0 ])],
626
+ ],
627
+ names = ["a" , "b" ],
628
+ )
629
+ chunked_array = pa .chunked_array ([struct_array ] * 3 )
630
+ series = pd .Series (chunked_array , dtype = NestedDtype (chunked_array .type ), index = [0 , 1 , 2 , 3 , 4 , 5 ])
631
+ assert series .array .num_chunks == 3
632
+
633
+ assert_series_equal (
634
+ series .nest ["a" ],
635
+ pd .Series (
636
+ np .array ([1.0 , 2.0 , 3.0 , 1.0 , 2.0 , 1.0 ] * 3 ),
637
+ dtype = pd .ArrowDtype (pa .float64 ()),
638
+ index = [0 , 0 , 0 , 1 , 1 , 1 , 2 , 2 , 2 , 3 , 3 , 3 , 4 , 4 , 4 , 5 , 5 , 5 ],
639
+ name = "a" ,
640
+ ),
641
+ )
642
+
643
+
591
644
def test___getitem___multiple_fields ():
592
645
"""Test that the .nest[["b", "a"]] works for multiple fields."""
593
646
arrays = [
0 commit comments