@@ -425,24 +425,32 @@ impl<'a> ImportedArrowArray<'a> {
425
425
( length + 1 ) * ( bits / 8 )
426
426
}
427
427
( DataType :: Utf8 , 2 ) | ( DataType :: Binary , 2 ) => {
428
- // the len of the data buffer (buffer 2) equals the last value of the offset buffer (buffer 1)
428
+ // the len of the data buffer (buffer 2) equals the difference between the last value
429
+ // and the first value of the offset buffer (buffer 1).
429
430
let len = self . buffer_len ( 1 , dt) ?;
430
431
// first buffer is the null buffer => add(1)
431
432
// we assume that pointer is aligned for `i32`, as Utf8 uses `i32` offsets.
432
433
#[ allow( clippy:: cast_ptr_alignment) ]
433
434
let offset_buffer = self . array . buffer ( 1 ) as * const i32 ;
435
+ // get first offset
436
+ let start = ( unsafe { * offset_buffer. add ( 0 ) } ) as usize ;
434
437
// get last offset
435
- ( unsafe { * offset_buffer. add ( len / size_of :: < i32 > ( ) - 1 ) } ) as usize
438
+ let end = ( unsafe { * offset_buffer. add ( len / size_of :: < i32 > ( ) - 1 ) } ) as usize ;
439
+ end - start
436
440
}
437
441
( DataType :: LargeUtf8 , 2 ) | ( DataType :: LargeBinary , 2 ) => {
438
- // the len of the data buffer (buffer 2) equals the last value of the offset buffer (buffer 1)
442
+ // the len of the data buffer (buffer 2) equals the difference between the last value
443
+ // and the first value of the offset buffer (buffer 1).
439
444
let len = self . buffer_len ( 1 , dt) ?;
440
445
// first buffer is the null buffer => add(1)
441
446
// we assume that pointer is aligned for `i64`, as Large uses `i64` offsets.
442
447
#[ allow( clippy:: cast_ptr_alignment) ]
443
448
let offset_buffer = self . array . buffer ( 1 ) as * const i64 ;
449
+ // get first offset
450
+ let start = ( unsafe { * offset_buffer. add ( 0 ) } ) as usize ;
444
451
// get last offset
445
- ( unsafe { * offset_buffer. add ( len / size_of :: < i64 > ( ) - 1 ) } ) as usize
452
+ let end = ( unsafe { * offset_buffer. add ( len / size_of :: < i64 > ( ) - 1 ) } ) as usize ;
453
+ end - start
446
454
}
447
455
// buffer len of primitive types
448
456
_ => {
@@ -1216,7 +1224,7 @@ mod tests_to_then_from_ffi {
1216
1224
mod tests_from_ffi {
1217
1225
use std:: sync:: Arc ;
1218
1226
1219
- use arrow_buffer:: { bit_util, buffer:: Buffer } ;
1227
+ use arrow_buffer:: { bit_util, buffer:: Buffer , MutableBuffer , OffsetBuffer } ;
1220
1228
use arrow_data:: ArrayData ;
1221
1229
use arrow_schema:: { DataType , Field } ;
1222
1230
@@ -1228,7 +1236,7 @@ mod tests_from_ffi {
1228
1236
ffi:: { from_ffi, FFI_ArrowArray , FFI_ArrowSchema } ,
1229
1237
} ;
1230
1238
1231
- use super :: Result ;
1239
+ use super :: { ImportedArrowArray , Result } ;
1232
1240
1233
1241
fn test_round_trip ( expected : & ArrayData ) -> Result < ( ) > {
1234
1242
// here we export the array
@@ -1420,4 +1428,34 @@ mod tests_from_ffi {
1420
1428
let data = array. into_data ( ) ;
1421
1429
test_round_trip ( & data)
1422
1430
}
1431
+
1432
+ #[ test]
1433
+ fn test_empty_string_with_non_zero_offset ( ) -> Result < ( ) > {
1434
+ // Simulate an empty string array with a non-zero offset from a producer
1435
+ let data: Buffer = MutableBuffer :: new ( 0 ) . into ( ) ;
1436
+ let offsets = OffsetBuffer :: new ( vec ! [ 123 ] . into ( ) ) ;
1437
+ let string_array =
1438
+ unsafe { StringArray :: new_unchecked ( offsets. clone ( ) , data. clone ( ) , None ) } ;
1439
+
1440
+ let data = string_array. into_data ( ) ;
1441
+
1442
+ let array = FFI_ArrowArray :: new ( & data) ;
1443
+ let schema = FFI_ArrowSchema :: try_from ( data. data_type ( ) ) ?;
1444
+
1445
+ let dt = DataType :: try_from ( & schema) ?;
1446
+ let array = Arc :: new ( array) ;
1447
+ let imported_array = ImportedArrowArray {
1448
+ array : & array,
1449
+ data_type : dt,
1450
+ owner : & array,
1451
+ } ;
1452
+
1453
+ let offset_buf_len = imported_array. buffer_len ( 1 , & imported_array. data_type ) ?;
1454
+ let data_buf_len = imported_array. buffer_len ( 2 , & imported_array. data_type ) ?;
1455
+
1456
+ assert_eq ! ( offset_buf_len, 4 ) ;
1457
+ assert_eq ! ( data_buf_len, 0 ) ;
1458
+
1459
+ test_round_trip ( & imported_array. consume ( ) ?)
1460
+ }
1423
1461
}
0 commit comments