Skip to content

Commit 350ea26

Browse files
authored
Support Utf8View for bit_length kernel (#6671)
* Support `Utf8View` for string function `bit_length()` Signed-off-by: Austin Liu <[email protected]> * Add test & handle view bytes length counting Signed-off-by: Austin Liu <[email protected]> Add test & handle view bytes length counting Signed-off-by: Austin Liu <[email protected]> * Refine `string_view_array` Signed-off-by: Austin Liu <[email protected]> * Make length from `i32` to `u32` & check nullity Signed-off-by: Austin Liu <[email protected]> * Clean up Signed-off-by: Austin Liu <[email protected]> * Refine Signed-off-by: Austin Liu <[email protected]> * Use `from_unary` instead Signed-off-by: Austin Liu <[email protected]> * Prevent inspect the string data Signed-off-by: Austin Liu <[email protected]> * Clean up Signed-off-by: Austin Liu <[email protected]> --------- Signed-off-by: Austin Liu <[email protected]>
1 parent b11b151 commit 350ea26

File tree

1 file changed

+38
-0
lines changed

1 file changed

+38
-0
lines changed

arrow-string/src/length.rs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,15 @@ pub fn bit_length(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
137137
let list = array.as_string::<i64>();
138138
Ok(bit_length_impl::<Int64Type>(list.offsets(), list.nulls()))
139139
}
140+
DataType::Utf8View => {
141+
let list = array.as_string_view();
142+
let values = list
143+
.views()
144+
.iter()
145+
.map(|view| (*view as i32).wrapping_mul(8))
146+
.collect();
147+
Ok(Arc::new(Int32Array::new(values, array.nulls().cloned())))
148+
}
140149
DataType::Binary => {
141150
let list = array.as_binary::<i32>();
142151
Ok(bit_length_impl::<Int32Type>(list.offsets(), list.nulls()))
@@ -462,6 +471,35 @@ mod tests {
462471
})
463472
}
464473

474+
#[test]
475+
fn bit_length_test_utf8view() {
476+
bit_length_cases()
477+
.into_iter()
478+
.for_each(|(input, len, expected)| {
479+
let string_array = StringViewArray::from(input);
480+
let result = bit_length(&string_array).unwrap();
481+
assert_eq!(len, result.len());
482+
let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
483+
expected.iter().enumerate().for_each(|(i, value)| {
484+
assert_eq!(*value, result.value(i));
485+
});
486+
})
487+
}
488+
489+
#[test]
490+
fn bit_length_null_utf8view() {
491+
bit_length_null_cases()
492+
.into_iter()
493+
.for_each(|(input, len, expected)| {
494+
let array = StringArray::from(input);
495+
let result = bit_length(&array).unwrap();
496+
assert_eq!(len, result.len());
497+
let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
498+
499+
let expected: Int32Array = expected.into();
500+
assert_eq!(&expected, result);
501+
})
502+
}
465503
#[test]
466504
fn bit_length_binary() {
467505
let value: Vec<&[u8]> = vec![b"one", &[0xff, 0xf8], b"three"];

0 commit comments

Comments
 (0)