Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions Sources/Arrow/ArrowArrayBuilder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,10 @@ public class StructArrayBuilder: ArrowArrayBuilder<StructBufferBuilder, NestedAr
try super.init(ArrowTypeStruct(ArrowType.ArrowStruct, fields: fields))
}

public override func appendAny(_ val: Any?) {
self.append(val as? [Any?])
}
Comment on lines +149 to +151
Copy link

Copilot AI Mar 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a unit test that exercises appendAny on StructArrayBuilder and ListArrayBuilder (especially list<struct<...>>), since this change fixes a correctness issue where appendAny previously bypassed child-builder distribution. Existing tests cover appendAny for primitive builders but not nested builders, so this could regress without targeted coverage.

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added testStructArrayBuilderAppendAny and testListOfStructAppendAny


public override func append(_ values: [Any?]?) {
self.bufferBuilder.append(values)
if let anyValues = values {
Expand Down Expand Up @@ -186,6 +190,10 @@ public class ListArrayBuilder: ArrowArrayBuilder<ListBufferBuilder, NestedArray>
try super.init(arrowType)
}

public override func appendAny(_ val: Any?) {
self.append(val as? [Any?])
}
Comment on lines +193 to +195
Copy link

Copilot AI Mar 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a unit test that calls ListArrayBuilder.appendAny with non-nil lists and verifies the produced array’s values child contains the appended elements. This override is fixing a nested-builder correctness bug; having a regression test here would help prevent silent reintroductions.

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added testListArrayBuilderAppendAny.


public override func append(_ values: [Any?]?) {
self.bufferBuilder.append(values)
if let vals = values {
Expand Down
2 changes: 1 addition & 1 deletion Sources/Arrow/ArrowSchema.swift
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ public class ArrowField {
public let name: String
public let isNullable: Bool

init(_ name: String, type: ArrowType, isNullable: Bool) {
public init(_ name: String, type: ArrowType, isNullable: Bool) {
self.name = name
self.type = type
self.isNullable = isNullable
Expand Down
76 changes: 54 additions & 22 deletions Sources/Arrow/ArrowWriter.swift
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,13 @@ public class ArrowWriter { // swiftlint:disable:this type_body_length
}

fieldsOffset = fbb.createVector(ofOffsets: offsets)
} else if let listField = field.type as? ArrowTypeList {
switch writeField(&fbb, field: listField.elementField) {
case .success(let offset):
fieldsOffset = fbb.createVector(ofOffsets: [offset])
case .failure(let error):
return .failure(error)
}
}

let nameOffset = fbb.create(string: field.name)
Expand Down Expand Up @@ -178,16 +185,23 @@ public class ArrowWriter { // swiftlint:disable:this type_body_length
fbb: inout FlatBufferBuilder) {
for index in (0 ..< fields.count).reversed() {
let column = columns[index]
let fieldNode =
org_apache_arrow_flatbuf_FieldNode(length: Int64(column.length),
nullCount: Int64(column.nullCount))
offsets.append(fbb.create(struct: fieldNode))
// FlatBuffer vectors use prepend semantics: last-written element becomes
// the first when read. Arrow IPC requires depth-first pre-order (parent
// before children), so children must be written before their parent here.
if let nestedType = column.type as? ArrowTypeStruct {
let nestedArray = column.array as? NestedArray
if let nestedFields = nestedArray?.fields {
writeFieldNodes(nestedType.fields, columns: nestedFields, offsets: &offsets, fbb: &fbb)
}
} else if let listType = column.type as? ArrowTypeList {
if let nestedArray = column.array as? NestedArray, let valuesHolder = nestedArray.values {
writeFieldNodes([listType.elementField], columns: [valuesHolder], offsets: &offsets, fbb: &fbb)
}
}
let fieldNode =
org_apache_arrow_flatbuf_FieldNode(length: Int64(column.length),
nullCount: Int64(column.nullCount))
offsets.append(fbb.create(struct: fieldNode))
Comment on lines +188 to +204
Copy link

Copilot AI Mar 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add IPC-level unit tests for list columns (at least list<int32> and list<struct<...>>) that round-trip through ArrowWriter.writeStreaming/ArrowReader.readStreaming. This change adds list handling and adjusts nested field-node ordering; without regression tests it’s easy to reintroduce invalid node/buffer ordering that external readers reject.

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added testListInt32RBInMemoryToFromStream and testListStructRBInMemoryToFromStream in IPCTests.

}
}

Expand All @@ -204,12 +218,17 @@ public class ArrowWriter { // swiftlint:disable:this type_body_length
let buffer = org_apache_arrow_flatbuf_Buffer(offset: Int64(bufferOffset), length: Int64(bufferDataSize))
buffers.append(buffer)
bufferOffset += bufferDataSize
if let nestedType = column.type as? ArrowTypeStruct {
let nestedArray = column.array as? NestedArray
if let nestedFields = nestedArray?.fields {
writeBufferInfo(nestedType.fields, columns: nestedFields,
bufferOffset: &bufferOffset, buffers: &buffers, fbb: &fbb)
}
}
if let nestedType = column.type as? ArrowTypeStruct {
let nestedArray = column.array as? NestedArray
if let nestedFields = nestedArray?.fields {
writeBufferInfo(nestedType.fields, columns: nestedFields,
bufferOffset: &bufferOffset, buffers: &buffers, fbb: &fbb)
}
} else if let listType = column.type as? ArrowTypeList {
if let nestedArray = column.array as? NestedArray, let valuesHolder = nestedArray.values {
writeBufferInfo([listType.elementField], columns: [valuesHolder],
bufferOffset: &bufferOffset, buffers: &buffers, fbb: &fbb)
}
}
}
Expand Down Expand Up @@ -263,18 +282,30 @@ public class ArrowWriter { // swiftlint:disable:this type_body_length
for var bufferData in colBufferData {
addPadForAlignment(&bufferData)
writer.append(bufferData)
if let nestedType = column.type as? ArrowTypeStruct {
guard let nestedArray = column.array as? NestedArray,
let nestedFields = nestedArray.fields else {
return .failure(.invalid("Struct type array expected for nested type"))
}
}
if let nestedType = column.type as? ArrowTypeStruct {
guard let nestedArray = column.array as? NestedArray,
let nestedFields = nestedArray.fields else {
return .failure(.invalid("Struct type array expected for nested type"))
}

switch writeRecordBatchData(&writer, fields: nestedType.fields, columns: nestedFields) {
case .success:
continue
case .failure(let error):
return .failure(error)
}
switch writeRecordBatchData(&writer, fields: nestedType.fields, columns: nestedFields) {
case .success:
continue
case .failure(let error):
return .failure(error)
}
} else if let listType = column.type as? ArrowTypeList {
guard let nestedArray = column.array as? NestedArray,
let valuesHolder = nestedArray.values else {
return .failure(.invalid("List type array expected with values child"))
}

switch writeRecordBatchData(&writer, fields: [listType.elementField], columns: [valuesHolder]) {
case .success:
continue
case .failure(let error):
return .failure(error)
}
}
}
Expand Down Expand Up @@ -341,7 +372,8 @@ public class ArrowWriter { // swiftlint:disable:this type_body_length
public func writeStreaming(_ info: ArrowWriter.Info) -> Result<Data, ArrowError> {
let writer: any DataWriter = InMemDataWriter()
switch toMessage(info.schema) {
case .success(let schemaData):
case .success(var schemaData):
addPadForAlignment(&schemaData)
withUnsafeBytes(of: CONTINUATIONMARKER.littleEndian) {writer.append(Data($0))}
withUnsafeBytes(of: UInt32(schemaData.count).littleEndian) {writer.append(Data($0))}
writer.append(schemaData)
Comment on lines 372 to 379
Copy link

Copilot AI Mar 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider adding a regression test that asserts the schema message metadata length written by writeStreaming is padded to an 8-byte multiple (and therefore the first record-batch body starts at an 8-byte-aligned offset). This is a spec requirement and a subtle invariant that can regress without a targeted test.

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added testStreamingSchemaMetadataPadding

Expand Down
5 changes: 5 additions & 0 deletions Sources/Arrow/ArrowWriterHelper.swift
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ func toFBTypeEnum(_ arrowType: ArrowType) -> Result<org_apache_arrow_flatbuf_Typ
return .success(org_apache_arrow_flatbuf_Type_.timestamp)
case .strct:
return .success(org_apache_arrow_flatbuf_Type_.struct_)
case .list:
return .success(org_apache_arrow_flatbuf_Type_.list)
default:
return .failure(.unknownType("Unable to find flatbuf type for Arrow type: \(typeId)"))
}
Expand Down Expand Up @@ -134,6 +136,9 @@ func toFBType( // swiftlint:disable:this cyclomatic_complexity function_body_len
case .strct:
let startOffset = org_apache_arrow_flatbuf_Struct_.startStruct_(&fbb)
return .success(org_apache_arrow_flatbuf_Struct_.endStruct_(&fbb, start: startOffset))
case .list:
let startOffset = org_apache_arrow_flatbuf_List.startList(&fbb)
return .success(org_apache_arrow_flatbuf_List.endList(&fbb, start: startOffset))
default:
return .failure(.unknownType("Unable to add flatbuf type for Arrow type: \(infoType)"))
}
Expand Down
111 changes: 111 additions & 0 deletions Tests/ArrowTests/ArrayTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -524,4 +524,115 @@ final class ArrayTests: XCTestCase { // swiftlint:disable:this type_body_length
let emptyList = nestedArray[3]!
XCTAssertEqual(emptyList.count, 0)
}

func testStructArrayBuilderAppendAny() throws {
let structType = ArrowTypeStruct(ArrowType.ArrowStruct, fields: [
ArrowField("name", type: ArrowType(ArrowType.ArrowString), isNullable: true),
ArrowField("age", type: ArrowType(ArrowType.ArrowInt32), isNullable: true)
])
let builder = try ArrowArrayBuilders.loadBuilder(arrowType: structType)

builder.appendAny(["Alice", Int32(30)] as [Any?])
builder.appendAny(nil)
builder.appendAny(["Bob", Int32(25)] as [Any?])

let holder = try builder.toHolder()
let structArray = holder.array as? NestedArray
XCTAssertNotNil(structArray)
XCTAssertEqual(structArray!.length, 3)
XCTAssertEqual(holder.nullCount, 1)

let row0 = structArray![0]
XCTAssertNotNil(row0)
XCTAssertEqual(row0![0] as? String, "Alice")
XCTAssertEqual(row0![1] as? Int32, 30)

XCTAssertNil(structArray![1])

let row2 = structArray![2]
XCTAssertNotNil(row2)
XCTAssertEqual(row2![0] as? String, "Bob")
XCTAssertEqual(row2![1] as? Int32, 25)

XCTAssertNotNil(structArray!.fields)
XCTAssertEqual(structArray!.fields!.count, 2)
XCTAssertEqual(structArray!.fields![0].length, 3)
XCTAssertEqual(structArray!.fields![1].length, 3)
}

func testListArrayBuilderAppendAny() throws {
let listType = ArrowTypeList(ArrowType(ArrowType.ArrowInt32))
let listBuilder = try ListArrayBuilder(listType)

listBuilder.appendAny([Int32(10), Int32(20), Int32(30)] as [Any?])
listBuilder.appendAny([Int32(40)] as [Any?])
listBuilder.appendAny(nil)

let listArray = try listBuilder.finish()
XCTAssertEqual(listArray.length, 3)
XCTAssertEqual(listArray.nullCount, 1)

let row0 = listArray[0]
XCTAssertNotNil(row0)
XCTAssertEqual(row0!.count, 3)
XCTAssertEqual(row0![0] as? Int32, 10)
XCTAssertEqual(row0![1] as? Int32, 20)
XCTAssertEqual(row0![2] as? Int32, 30)

let row1 = listArray[1]
XCTAssertNotNil(row1)
XCTAssertEqual(row1!.count, 1)
XCTAssertEqual(row1![0] as? Int32, 40)

XCTAssertNil(listArray[2])

let valuesHolder = listArray.values
XCTAssertNotNil(valuesHolder)
XCTAssertEqual(valuesHolder!.length, 4)
}

func testListOfStructAppendAny() throws {
let structType = ArrowTypeStruct(ArrowType.ArrowStruct, fields: [
ArrowField("name", type: ArrowType(ArrowType.ArrowString), isNullable: false),
ArrowField("value", type: ArrowType(ArrowType.ArrowFloat), isNullable: false)
])
let listType = ArrowTypeList(ArrowField("item", type: structType, isNullable: true))
let listBuilder = try ListArrayBuilder(listType)

let listRow0: [Any?] = [["Alice", Float(1.5)] as [Any?], ["Bob", Float(2.5)] as [Any?]]
let listRow1: [Any?] = [["Charlie", Float(3.5)] as [Any?]]
listBuilder.appendAny(listRow0)
listBuilder.appendAny(listRow1)
listBuilder.appendAny(nil)

let listArray = try listBuilder.finish()
XCTAssertEqual(listArray.length, 3)
XCTAssertEqual(listArray.nullCount, 1)

let row0 = listArray[0]
XCTAssertNotNil(row0)
XCTAssertEqual(row0!.count, 2)
let struct0 = row0![0] as? [Any?]
XCTAssertNotNil(struct0)
XCTAssertEqual(struct0![0] as? String, "Alice")
XCTAssertEqual(struct0![1] as? Float, 1.5)
let struct1 = row0![1] as? [Any?]
XCTAssertNotNil(struct1)
XCTAssertEqual(struct1![0] as? String, "Bob")
XCTAssertEqual(struct1![1] as? Float, 2.5)

let row1 = listArray[1]
XCTAssertNotNil(row1)
XCTAssertEqual(row1!.count, 1)
let struct2 = row1![0] as? [Any?]
XCTAssertNotNil(struct2)
XCTAssertEqual(struct2![0] as? String, "Charlie")
XCTAssertEqual(struct2![1] as? Float, 3.5)

XCTAssertNil(listArray[2])

let valuesHolder = listArray.values
XCTAssertNotNil(valuesHolder)
XCTAssertEqual(valuesHolder!.length, 3)
}
}
Loading
Loading