File tree 2 files changed +49
-0
lines changed
2 files changed +49
-0
lines changed Original file line number Diff line number Diff line change @@ -254,6 +254,12 @@ cdef class BuilderManager:
254
254
# For list children, the nulls are stored in the parent.
255
255
key = field.encode(' utf-8' )
256
256
parent_type = self .parent_types.get(key, None )
257
+ # Check if the item was in our schema but never seen, and should have a parent.
258
+ if parent_type is None and " ." in field:
259
+ parent_key, _, _ = field.rpartition(' .' )
260
+ self .parent_names[key] = parent_key.encode(' utf-8' )
261
+ parent_type = BSON_TYPE_DOCUMENT
262
+ # Add nulls according to parent type.
257
263
if parent_type == BSON_TYPE_ARRAY:
258
264
continue
259
265
if parent_type == BSON_TYPE_DOCUMENT:
Original file line number Diff line number Diff line change @@ -533,6 +533,49 @@ def test_schema_arrays_of_documents_with_nulls(self):
533
533
expected = json .load (fid )
534
534
assert df .to_pylist () == expected
535
535
536
+ def test_schema_arrays_of_documents_orphaned_null (self ):
537
+ # From https://github.com/mongodb-labs/mongo-arrow/issues/265.
538
+ col = self .coll
539
+ col .delete_many ({})
540
+ schema = Schema (
541
+ {
542
+ "_id" : ObjectId ,
543
+ "test_list_struct" : [
544
+ {
545
+ "field1" : {
546
+ "sub_field1" : pa .string (),
547
+ "sub_field2" : pa .string (),
548
+ }
549
+ }
550
+ ],
551
+ }
552
+ )
553
+
554
+ col .insert_one (
555
+ {
556
+ "_id" : ObjectId ("000000000000000000000001" ),
557
+ "test_list_struct" : [
558
+ {
559
+ "field1" : {
560
+ "sub_field1" : "test_data" ,
561
+ }
562
+ },
563
+ {
564
+ "field1" : "test_data" ,
565
+ },
566
+ ],
567
+ }
568
+ )
569
+ df = aggregate_arrow_all (col , schema = schema , pipeline = [])
570
+ doc = df .to_pylist ()[0 ]
571
+ del doc ["_id" ]
572
+ assert doc == {
573
+ "test_list_struct" : [
574
+ {"field1" : {"sub_field1" : "test_data" , "sub_field2" : None }},
575
+ {"field1" : {"sub_field1" : None , "sub_field2" : None }},
576
+ ]
577
+ }
578
+
536
579
def test_auto_schema_nested (self ):
537
580
# Create table with random data of various types.
538
581
_ , data = self ._create_nested_data ()
You can’t perform that action at this time.
0 commit comments