Skip to content
4 changes: 4 additions & 0 deletions Sources/Arrow/ArrowBuffer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ public class ArrowBuffer {
self.isMemoryOwner = isMemoryOwner
}

func updateLength(_ newLength: UInt) {
self.length = newLength
}

deinit {
if isMemoryOwner {
self.rawPointer.deallocate()
Expand Down
90 changes: 48 additions & 42 deletions Sources/Arrow/ArrowBufferBuilder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -205,80 +205,86 @@ public class VariableBufferBuilder<T>: ValuesBufferBuilder<T>, ArrowBufferBuilde
public required init() throws {
let values = ArrowBuffer.createBuffer(0, size: UInt(binaryStride))
let nulls = ArrowBuffer.createBuffer(0, size: UInt(binaryStride))
self.offsets = ArrowBuffer.createBuffer(0, size: UInt(MemoryLayout<Int32>.stride))
self.offsets = ArrowBuffer.createBuffer(1, size: UInt(MemoryLayout<Int32>.stride))
self.offsets.rawPointer.storeBytes(of: Int32(0), as: Int32.self)
super.init(values: values, nulls: nulls, stride: binaryStride)
}

public func append(_ newValue: ItemType?) {
let index = UInt(self.length)
self.length += 1
let offsetIndex = MemoryLayout<Int32>.stride * Int(index)
if self.length >= self.offsets.length {
self.resize(UInt( self.offsets.length + 1))
}
var binData: Data
var isNull = false
if let val = newValue {
binData = getBytesFor(val)!
} else {
var nullVal = 0
isNull = true
binData = Data(bytes: &nullVal, count: MemoryLayout<UInt32>.size)
}
let nextLength = index + 1
self.resize(nextLength)

var currentIndex: Int32 = 0
var currentOffset: Int32 = Int32(binData.count)
if index > 0 {
currentIndex = self.offsets.rawPointer.advanced(by: offsetIndex).load(as: Int32.self)
currentOffset += currentIndex
if currentOffset > self.values.length {
self.value_resize(UInt(currentOffset))
}
}
let currentOffset = self.offsets.rawPointer.advanced(by: offsetIndex).load(as: Int32.self)
var nextOffset = currentOffset

if isNull {
if let val = newValue {
let binData = getBytesFor(val)!
precondition(
binData.count <= Int(Int32.max),
"VariableBufferBuilder.append: element size exceeds Int32.max")
let proposedOffset = Int64(currentOffset) + Int64(binData.count)
precondition(
proposedOffset <= Int64(Int32.max),
"VariableBufferBuilder.append: total buffer size exceeds Int32.max")
nextOffset = Int32(proposedOffset)
self.value_resize(UInt(nextOffset))
binData.withUnsafeBytes { bufferPointer in
let rawPointer = bufferPointer.baseAddress!
self.values.rawPointer.advanced(by: Int(currentOffset))
.copyMemory(from: rawPointer, byteCount: binData.count)
}
BitUtility.setBit(index + self.offset, buffer: self.nulls)
} else {
self.nullCount += 1
BitUtility.clearBit(index + self.offset, buffer: self.nulls)
} else {
BitUtility.setBit(index + self.offset, buffer: self.nulls)
}

binData.withUnsafeBytes { bufferPointer in
let rawPointer = bufferPointer.baseAddress!
self.values.rawPointer.advanced(by: Int(currentIndex))
.copyMemory(from: rawPointer, byteCount: binData.count)
}

self.offsets.rawPointer.advanced(by: (offsetIndex + MemoryLayout<Int32>.stride))
.storeBytes(of: currentOffset, as: Int32.self)
self.offsets.rawPointer.advanced(by: offsetIndex + MemoryLayout<Int32>.stride)
.storeBytes(of: nextOffset, as: Int32.self)
self.length = nextLength
self.nulls.updateLength(nextLength / 8 + 1)
self.offsets.updateLength(nextLength + 1)
self.values.updateLength(UInt(nextOffset))
}

public func value_resize(_ length: UInt) {
if length > self.values.length {
if length > self.values.capacity {
let resizeLength = resizeLength(self.values, len: length)
var values = ArrowBuffer.createBuffer(resizeLength, size: UInt(MemoryLayout<UInt8>.size))
ArrowBuffer.copyCurrent(self.values, to: &values, len: self.values.capacity)
values.updateLength(self.values.length)
self.values = values
}
}

public func resize(_ length: UInt) {
if length > self.offsets.length {
let resizeLength = resizeLength(self.offsets, len: length)
var nulls = ArrowBuffer.createBuffer(resizeLength/8 + 1, size: UInt(MemoryLayout<UInt8>.size))
var offsets = ArrowBuffer.createBuffer(resizeLength, size: UInt(MemoryLayout<Int32>.size))
let nullByteLength = length / 8 + 1
if nullByteLength > self.nulls.capacity {
let resizeLength = resizeLength(self.nulls, len: nullByteLength)
var nulls = ArrowBuffer.createBuffer(resizeLength, size: UInt(MemoryLayout<UInt8>.size))
ArrowBuffer.copyCurrent(self.nulls, to: &nulls, len: self.nulls.capacity)
ArrowBuffer.copyCurrent(self.offsets, to: &offsets, len: self.offsets.capacity)
nulls.updateLength(self.nulls.length)
self.nulls = nulls
}

let requiredOffsetCount = length + 1
let requiredOffsetByteCount = requiredOffsetCount * UInt(MemoryLayout<Int32>.size)
if requiredOffsetByteCount > self.offsets.capacity {
let resizeLength = resizeLength(self.offsets, len: requiredOffsetCount)
var offsets = ArrowBuffer.createBuffer(resizeLength, size: UInt(MemoryLayout<Int32>.size))
ArrowBuffer.copyCurrent(self.offsets, to: &offsets, len: self.offsets.capacity)
offsets.updateLength(self.offsets.length)
self.offsets = offsets
}
}

public func finish() -> [ArrowBuffer] {
let length = self.length
var values = ArrowBuffer.createBuffer(self.values.length, size: UInt(MemoryLayout<UInt8>.size))
var nulls = ArrowBuffer.createBuffer(length/8 + 1, size: UInt(MemoryLayout<UInt8>.size))
var offsets = ArrowBuffer.createBuffer(length, size: UInt(MemoryLayout<Int32>.size))
var nulls = ArrowBuffer.createBuffer(length / 8 + 1, size: UInt(MemoryLayout<UInt8>.size))
var offsets = ArrowBuffer.createBuffer(length + 1, size: UInt(MemoryLayout<Int32>.size))
ArrowBuffer.copyCurrent(self.values, to: &values, len: values.capacity)
ArrowBuffer.copyCurrent(self.nulls, to: &nulls, len: nulls.capacity)
ArrowBuffer.copyCurrent(self.offsets, to: &offsets, len: offsets.capacity)
Expand Down
2 changes: 1 addition & 1 deletion Sources/Arrow/ArrowCImporter.swift
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ public class ArrowCImporter {
}

appendToBuffer(cArray.buffers[0], arrowBuffers: &arrowBuffers, length: (length + 7) / 8)
appendToBuffer(cArray.buffers[1], arrowBuffers: &arrowBuffers, length: length)
appendToBuffer(cArray.buffers[1], arrowBuffers: &arrowBuffers, length: length + 1)
let lastOffsetLength = cArray.buffers[1]!
.advanced(by: Int(length) * MemoryLayout<Int32>.stride)
.load(as: Int32.self)
Expand Down
19 changes: 18 additions & 1 deletion Sources/Arrow/ArrowData.swift
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,26 @@ public class ArrowData {
public let stride: Int

convenience init(_ arrowType: ArrowType, buffers: [ArrowBuffer], nullCount: UInt) throws {
let arrayLength: UInt
switch arrowType.info {
case .variableInfo:
guard buffers.count >= 3 else {
throw ArrowError.invalid(
"Variable-width ArrowData requires at least three buffers (null bitmap, offsets, and values).")
}
let offsetsLength = buffers[1].length
guard offsetsLength >= 1 else {
throw ArrowError.invalid(
"Variable-width ArrowData requires a non-empty offsets buffer.")
}
arrayLength = offsetsLength - 1
default:
arrayLength = buffers[1].length
}

try self.init(arrowType, buffers: buffers,
children: [ArrowData](), nullCount: nullCount,
length: buffers[1].length)
length: arrayLength)
}

init(_ arrowType: ArrowType, buffers: [ArrowBuffer], children: [ArrowData], nullCount: UInt, length: UInt) throws {
Expand Down
14 changes: 12 additions & 2 deletions Sources/Arrow/ArrowReader.swift
Original file line number Diff line number Diff line change
Expand Up @@ -195,9 +195,19 @@ public class ArrowReader { // swiftlint:disable:this type_body_length
let arrowNullBuffer = makeBuffer(nullBuffer, fileData: loadInfo.fileData,
length: nullLength, messageOffset: loadInfo.messageOffset)
let arrowOffsetBuffer = makeBuffer(offsetBuffer, fileData: loadInfo.fileData,
length: UInt(node.length), messageOffset: loadInfo.messageOffset)
length: UInt(node.length + 1), messageOffset: loadInfo.messageOffset)
let lastOffset = arrowOffsetBuffer.rawPointer
.advanced(by: Int(node.length) * MemoryLayout<Int32>.stride)
.load(as: Int32.self)
guard lastOffset >= 0 else {
return .failure(.invalid("Negative last offset (\(lastOffset)) in variable-width buffer"))
}
guard Int64(lastOffset) <= valueBuffer.length else {
return .failure(.invalid(
"Last offset (\(lastOffset)) exceeds value buffer length (\(valueBuffer.length))"))
}
let arrowValueBuffer = makeBuffer(valueBuffer, fileData: loadInfo.fileData,
length: UInt(node.length), messageOffset: loadInfo.messageOffset)
length: UInt(lastOffset), messageOffset: loadInfo.messageOffset)
Comment on lines 197 to +210
Copy link

Copilot AI Mar 29, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

loadVariableData derives the values buffer logical length from the last offset without validating it. If the IPC input is malformed, lastOffset can be negative or larger than the underlying valueBuffer.length, and the subsequent UInt(lastOffset) conversion / later value reads can crash or read out of bounds. Add guards to ensure the offset buffer is large enough to read the last offset, lastOffset >= 0, and lastOffset <= valueBuffer.length (bytes) before creating the value buffer (otherwise return .failure(.invalid(...))).

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added guards to reject negative offsets and offsets exceeding the value buffer length, returning .failure(.invalid(...)) with a descriptive message. This is consistent with the existing guard chain in the method.

return makeArrayHolder(field, buffers: [arrowNullBuffer, arrowOffsetBuffer, arrowValueBuffer],
nullCount: UInt(node.nullCount), children: nil,
rbLength: UInt(loadInfo.batchData.recordBatch.length))
Expand Down
94 changes: 88 additions & 6 deletions Tests/ArrowTests/ArrayTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,13 @@
import XCTest
@testable import Arrow

private func int32Values(in buffer: ArrowBuffer, count: Int) -> [Int32] {
(0..<count).map { index in
let offset = index * MemoryLayout<Int32>.stride
return buffer.rawPointer.advanced(by: offset).load(as: Int32.self)
}
}

final class ArrayTests: XCTestCase { // swiftlint:disable:this type_body_length
func testPrimitiveArray() throws {
// This is an example of a functional test case.
Expand Down Expand Up @@ -63,7 +70,7 @@ final class ArrayTests: XCTestCase { // swiftlint:disable:this type_body_length

XCTAssertEqual(stringBuilder.nullCount, 10)
XCTAssertEqual(stringBuilder.length, 100)
XCTAssertEqual(stringBuilder.capacity, 640)
XCTAssertGreaterThanOrEqual(stringBuilder.capacity, 640)
let stringArray = try stringBuilder.finish()
XCTAssertEqual(stringArray.length, 100)
for index in 0..<stringArray.length {
Expand Down Expand Up @@ -139,7 +146,7 @@ final class ArrayTests: XCTestCase { // swiftlint:disable:this type_body_length

XCTAssertEqual(binaryBuilder.nullCount, 10)
XCTAssertEqual(binaryBuilder.length, 100)
XCTAssertEqual(binaryBuilder.capacity, 640)
XCTAssertGreaterThanOrEqual(binaryBuilder.capacity, 640)
let binaryArray = try binaryBuilder.finish()
XCTAssertEqual(binaryArray.length, 100)
for index in 0..<binaryArray.length {
Expand All @@ -152,6 +159,66 @@ final class ArrayTests: XCTestCase { // swiftlint:disable:this type_body_length
}
}

func testStringArrayTracksLogicalValueLengthAfterFirstAppend() throws {
let stringBuilder = try ArrowArrayBuilders.loadStringArrayBuilder()
let firstValue = String(repeating: "a", count: 256)

stringBuilder.append(firstValue)

let stringArray = try stringBuilder.finish()
XCTAssertEqual(stringArray.length, 1)
XCTAssertEqual(stringArray[0], firstValue)
XCTAssertEqual(stringArray.arrowData.buffers[2].length, UInt(firstValue.utf8.count))
XCTAssertEqual(int32Values(in: stringArray.arrowData.buffers[1], count: 2), [0, 256])
}

func testStringArrayNullDoesNotAdvanceOffsets() throws {
let stringBuilder = try ArrowArrayBuilders.loadStringArrayBuilder()

stringBuilder.append("a")
stringBuilder.append(nil)
stringBuilder.append("bbb")

let stringArray = try stringBuilder.finish()
XCTAssertEqual(stringArray.length, 3)
XCTAssertEqual(stringArray[0], "a")
XCTAssertNil(stringArray[1])
XCTAssertEqual(stringArray[2], "bbb")
XCTAssertEqual(stringArray.arrowData.buffers[2].length, 4)
XCTAssertEqual(int32Values(in: stringArray.arrowData.buffers[1], count: 4), [0, 1, 1, 4])
}

func testStringArrayNullFirstThenLongValue() throws {
let stringBuilder = try ArrowArrayBuilders.loadStringArrayBuilder()
let longValue = String(repeating: "z", count: 512)

stringBuilder.append(nil)
stringBuilder.append(longValue)

let stringArray = try stringBuilder.finish()
XCTAssertEqual(stringArray.length, 2)
XCTAssertNil(stringArray[0])
XCTAssertEqual(stringArray[1], longValue)
XCTAssertEqual(stringArray.arrowData.buffers[2].length, UInt(longValue.utf8.count))
XCTAssertEqual(int32Values(in: stringArray.arrowData.buffers[1], count: 3), [0, 0, 512])
}

func testBinaryArrayNullDoesNotAdvanceOffsets() throws {
let binaryBuilder = try ArrowArrayBuilders.loadBinaryArrayBuilder()

binaryBuilder.append(Data("a".utf8))
binaryBuilder.append(nil)
binaryBuilder.append(Data("bbb".utf8))

let binaryArray = try binaryBuilder.finish()
XCTAssertEqual(binaryArray.length, 3)
XCTAssertEqual(binaryArray[0], Data("a".utf8))
XCTAssertNil(binaryArray[1])
XCTAssertEqual(binaryArray[2], Data("bbb".utf8))
XCTAssertEqual(binaryArray.arrowData.buffers[2].length, 4)
XCTAssertEqual(int32Values(in: binaryArray.arrowData.buffers[1], count: 4), [0, 1, 1, 4])
}

func testTime32Array() throws {
let milliBuilder = try ArrowArrayBuilders.loadTime32ArrayBuilder(.milliseconds)
milliBuilder.append(100)
Expand Down Expand Up @@ -344,10 +411,25 @@ final class ArrayTests: XCTestCase { // swiftlint:disable:this type_body_length
}

func checkHolderForType(_ checkType: ArrowType) throws {
let buffers = [ArrowBuffer(length: 0, capacity: 0,
rawPointer: UnsafeMutableRawPointer.allocate(byteCount: 0, alignment: .zero)),
ArrowBuffer(length: 0, capacity: 0,
rawPointer: UnsafeMutableRawPointer.allocate(byteCount: 0, alignment: .zero))]
let emptyPtr = UnsafeMutableRawPointer.allocate(byteCount: 0, alignment: .zero)
let buffers: [ArrowBuffer]
switch checkType.info {
case .variableInfo:
let offsetPtr = UnsafeMutableRawPointer.allocate(byteCount: MemoryLayout<Int32>.stride, alignment: 4)
offsetPtr.storeBytes(of: Int32(0), as: Int32.self)
buffers = [
ArrowBuffer(length: 0, capacity: 0, rawPointer: emptyPtr),
ArrowBuffer(length: 1, capacity: UInt(MemoryLayout<Int32>.stride), rawPointer: offsetPtr),
ArrowBuffer(length: 0, capacity: 0,
rawPointer: UnsafeMutableRawPointer.allocate(byteCount: 0, alignment: .zero))
]
default:
buffers = [
ArrowBuffer(length: 0, capacity: 0, rawPointer: emptyPtr),
ArrowBuffer(length: 0, capacity: 0,
rawPointer: UnsafeMutableRawPointer.allocate(byteCount: 0, alignment: .zero))
]
}
Comment on lines +414 to +432
Copy link

Copilot AI Mar 30, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

checkHolderForType reuses the same emptyPtr for multiple ArrowBuffer instances that all default to isMemoryOwner: true. When these buffers are deallocated this will double-free the same pointer and can crash tests / trigger undefined behavior. Allocate a distinct empty pointer per buffer (e.g. use ArrowBuffer.createEmptyBuffer()), or mark shared buffers with isMemoryOwner: false so only one owner deallocates.

Copilot uses AI. Check for mistakes.
let field = ArrowField("", type: checkType, isNullable: true)
switch makeArrayHolder(field, buffers: buffers, nullCount: 0, children: nil, rbLength: 0) {
case .success(let holder):
Expand Down
2 changes: 1 addition & 1 deletion Tests/ArrowTests/CDataTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ final class CDataTests: XCTestCase {

XCTAssertEqual(stringBuilder.nullCount, 10)
XCTAssertEqual(stringBuilder.length, 100)
XCTAssertEqual(stringBuilder.capacity, 640)
XCTAssertGreaterThanOrEqual(stringBuilder.capacity, 640)
let stringArray = try stringBuilder.finish()
let exporter = ArrowCExporter()
var cArray = ArrowC.ArrowArray()
Expand Down
53 changes: 53 additions & 0 deletions Tests/ArrowTests/IPCTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,59 @@ func makeRecordBatch() throws -> RecordBatch {
}

final class IPCStreamReaderTests: XCTestCase {
func testVariableWidthColumnsRoundTripLongFirstValue() throws {
let longFirstValue = String(repeating: "x", count: 256)
let stringBuilder = try ArrowArrayBuilders.loadStringArrayBuilder()
stringBuilder.append(longFirstValue)
stringBuilder.append(nil)
stringBuilder.append("tail")

let stringHolder = ArrowArrayHolderImpl(try stringBuilder.finish())
let recordBatchResult = RecordBatch.Builder()
.addColumn("message", arrowArray: stringHolder)
.finish()

let recordBatch: RecordBatch
switch recordBatchResult {
case .success(let batch):
recordBatch = batch
case .failure(let error):
throw error
}

let schema = ArrowSchema.Builder()
.addField("message", type: ArrowType(ArrowType.ArrowString), isNullable: true)
.finish()

let arrowWriter = ArrowWriter()
let writerInfo = ArrowWriter.Info(.recordbatch, schema: schema, batches: [recordBatch])

let streamingData: Data
switch arrowWriter.writeStreaming(writerInfo) {
case .success(let data):
streamingData = data
case .failure(let error):
throw error
}

let arrowReader = ArrowReader()
switch arrowReader.readStreaming(streamingData) {
case .success(let result):
XCTAssertEqual(result.batches.count, 1)
let firstBatch = result.batches[0]
XCTAssertEqual(firstBatch.length, 3)
XCTAssertEqual(firstBatch.schema.fields.count, 1)
XCTAssertEqual(firstBatch.schema.fields[0].name, "message")
XCTAssertEqual(firstBatch.schema.fields[0].type.info, ArrowType.ArrowString)
let stringColumn = firstBatch.columns[0]
XCTAssertEqual((stringColumn.array as? StringArray)?[0], longFirstValue)
XCTAssertNil((stringColumn.array as? StringArray)?[1])
XCTAssertEqual((stringColumn.array as? StringArray)?[2], "tail")
case .failure(let error):
throw error
}
}

func testRBInMemoryToFromStream() throws {
let schema = makeSchema()
let recordBatch = try makeRecordBatch()
Expand Down
Loading