@@ -19,7 +19,7 @@ import FlatBuffers
19
19
import Foundation
20
20
21
21
let FILEMARKER = " ARROW1 "
22
- let CONTINUATIONMARKER = - 1
22
+ let CONTINUATIONMARKER = UInt32 ( 0xFFFFFFFF )
23
23
24
24
public class ArrowReader { // swiftlint:disable:this type_body_length
25
25
private class RecordBatchData {
@@ -216,7 +216,77 @@ public class ArrowReader { // swiftlint:disable:this type_body_length
216
216
return . success( RecordBatch ( arrowSchema, columns: columns) )
217
217
}
218
218
219
- public func fromStream( // swiftlint:disable:this function_body_length
219
+ /*
220
+ The Memory stream format is for reading the arrow streaming protocol. This
221
+ format is slightly different from the File format protocol as it doesn't contain
222
+ a header and footer
223
+ */
224
+ public func fromMemoryStream( // swiftlint:disable:this function_body_length
225
+ _ fileData: Data ,
226
+ useUnalignedBuffers: Bool = false
227
+ ) -> Result < ArrowReaderResult , ArrowError > {
228
+ let result = ArrowReaderResult ( )
229
+ var offset : Int = 0
230
+ var length = getUInt32 ( fileData, offset: offset)
231
+ var streamData = fileData
232
+ var schemaMessage : org_apache_arrow_flatbuf_Schema ?
233
+ while length != 0 {
234
+ if length == CONTINUATIONMARKER {
235
+ offset += Int ( MemoryLayout< Int32> . size)
236
+ length = getUInt32 ( fileData, offset: offset)
237
+ if length == 0 {
238
+ return . success( result)
239
+ }
240
+ }
241
+
242
+ offset += Int ( MemoryLayout< Int32> . size)
243
+ streamData = fileData [ offset... ]
244
+ let dataBuffer = ByteBuffer (
245
+ data: streamData,
246
+ allowReadingUnalignedBuffers: true )
247
+ let message = org_apache_arrow_flatbuf_Message. getRootAsMessage ( bb: dataBuffer)
248
+ switch message. headerType {
249
+ case . recordbatch:
250
+ do {
251
+ let rbMessage = message. header ( type: org_apache_arrow_flatbuf_RecordBatch. self) !
252
+ offset += Int ( message. bodyLength + Int64( length) )
253
+ let recordBatch = try loadRecordBatch (
254
+ rbMessage,
255
+ schema: schemaMessage!,
256
+ arrowSchema: result. schema!,
257
+ data: fileData,
258
+ messageEndOffset: ( message. bodyLength + Int64( length) ) ) . get ( )
259
+ result. batches. append ( recordBatch)
260
+ length = getUInt32 ( fileData, offset: offset)
261
+ } catch let error as ArrowError {
262
+ return . failure( error)
263
+ } catch {
264
+ return . failure( . unknownError( " Unexpected error: \( error) " ) )
265
+ }
266
+ case . schema:
267
+ schemaMessage = message. header ( type: org_apache_arrow_flatbuf_Schema. self) !
268
+ let schemaResult = loadSchema ( schemaMessage!)
269
+ switch schemaResult {
270
+ case . success( let schema) :
271
+ result. schema = schema
272
+ case . failure( let error) :
273
+ return . failure( error)
274
+ }
275
+ offset += Int ( message. bodyLength + Int64( length) )
276
+ length = getUInt32 ( fileData, offset: offset)
277
+ default :
278
+ return . failure( . unknownError( " Unhandled header type: \( message. headerType) " ) )
279
+ }
280
+ }
281
+ return . success( result)
282
+ }
283
+
284
+ /*
285
+ The File stream format supports random accessing the data. This format contains
286
+ a header and footer around the streaming format.
287
+ */
288
+
289
+ public func fromFileStream( // swiftlint:disable:this function_body_length
220
290
_ fileData: Data ,
221
291
useUnalignedBuffers: Bool = false
222
292
) -> Result < ArrowReaderResult , ArrowError > {
@@ -242,7 +312,7 @@ public class ArrowReader { // swiftlint:disable:this type_body_length
242
312
for index in 0 ..< footer. recordBatchesCount {
243
313
let recordBatch = footer. recordBatches ( at: index) !
244
314
var messageLength = fileData. withUnsafeBytes { rawBuffer in
245
- rawBuffer. loadUnaligned ( fromByteOffset: Int ( recordBatch. offset) , as: Int32 . self)
315
+ rawBuffer. loadUnaligned ( fromByteOffset: Int ( recordBatch. offset) , as: UInt32 . self)
246
316
}
247
317
248
318
var messageOffset : Int64 = 1
@@ -251,7 +321,7 @@ public class ArrowReader { // swiftlint:disable:this type_body_length
251
321
messageLength = fileData. withUnsafeBytes { rawBuffer in
252
322
rawBuffer. loadUnaligned (
253
323
fromByteOffset: Int ( recordBatch. offset + Int64( MemoryLayout< Int32> . size) ) ,
254
- as: Int32 . self)
324
+ as: UInt32 . self)
255
325
}
256
326
}
257
327
@@ -296,7 +366,7 @@ public class ArrowReader { // swiftlint:disable:this type_body_length
296
366
let markerLength = FILEMARKER . utf8. count
297
367
let footerLengthEnd = Int ( fileData. count - markerLength)
298
368
let data = fileData [ ..< ( footerLengthEnd) ]
299
- return fromStream ( data)
369
+ return fromFileStream ( data)
300
370
} catch {
301
371
return . failure( . unknownError( " Error loading file: \( error) " ) )
302
372
}
@@ -340,10 +410,10 @@ public class ArrowReader { // swiftlint:disable:this type_body_length
340
410
} catch {
341
411
return . failure( . unknownError( " Unexpected error: \( error) " ) )
342
412
}
343
-
344
413
default :
345
414
return . failure( . unknownError( " Unhandled header type: \( message. headerType) " ) )
346
415
}
347
416
}
348
417
349
418
}
419
+ // swiftlint:disable:this file_length
0 commit comments