Skip to content

Commit 1f2d150

Browse files
committed
Support casting StringView/BinaryView --> StringArray/BinaryArray.
1 parent a61f1dc commit 1f2d150

File tree

1 file changed

+110
-0
lines changed

1 file changed

+110
-0
lines changed

arrow-cast/src/cast/mod.rs

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,8 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
218218
| Interval(_),
219219
) => true,
220220
(Utf8 | LargeUtf8, Utf8View) => true,
221+
(Utf8View, Utf8 | LargeUtf8) => true,
222+
(BinaryView, Binary | LargeBinary) => true,
221223
(Utf8 | LargeUtf8, _) => to_type.is_numeric() && to_type != &Float16,
222224
(_, Utf8 | LargeUtf8) => from_type.is_primitive(),
223225

@@ -1262,6 +1264,12 @@ pub fn cast_with_options(
12621264
"Casting from {from_type:?} to {to_type:?} not supported",
12631265
))),
12641266
},
1267+
(Utf8View, Utf8) => cast_view_to_byte::<StringViewType, GenericStringType<i32>>(array),
1268+
(Utf8View, LargeUtf8) => cast_view_to_byte::<StringViewType, GenericStringType<i64>>(array),
1269+
(BinaryView, Binary) => cast_view_to_byte::<BinaryViewType, GenericBinaryType<i32>>(array),
1270+
(BinaryView, LargeBinary) => {
1271+
cast_view_to_byte::<BinaryViewType, GenericBinaryType<i64>>(array)
1272+
}
12651273
(from_type, LargeUtf8) if from_type.is_primitive() => {
12661274
value_to_string::<i64>(array, cast_options)
12671275
}
@@ -2299,6 +2307,32 @@ where
22992307
}))
23002308
}
23012309

2310+
/// Helper function to cast from one `ByteViewType` array to `ByteArrayType` array.
2311+
fn cast_view_to_byte<FROM, TO>(array: &dyn Array) -> Result<ArrayRef, ArrowError>
2312+
where
2313+
FROM: ByteViewType,
2314+
TO: ByteArrayType,
2315+
FROM::Native: AsRef<TO::Native>,
2316+
{
2317+
let data = array.to_data();
2318+
let view_array = GenericByteViewArray::<FROM>::from(data);
2319+
2320+
let len = view_array.len();
2321+
let bytes = view_array
2322+
.views()
2323+
.iter()
2324+
.map(|v| ByteView::from(*v).length as usize)
2325+
.sum::<usize>();
2326+
2327+
let mut byte_array_builder = GenericByteBuilder::<TO>::with_capacity(len, bytes);
2328+
2329+
for val in view_array.iter() {
2330+
byte_array_builder.append_option(val);
2331+
}
2332+
2333+
Ok(Arc::new(byte_array_builder.finish()))
2334+
}
2335+
23022336
#[cfg(test)]
23032337
mod tests {
23042338
use arrow_buffer::{Buffer, NullBuffer};
@@ -5169,6 +5203,82 @@ mod tests {
51695203
assert_eq!(binary_view_array.as_ref(), &expect_binary_view_array);
51705204
}
51715205

5206+
#[test]
5207+
fn test_view_to_string() {
5208+
_test_view_to_string::<i32>();
5209+
_test_view_to_string::<i64>();
5210+
}
5211+
5212+
fn _test_view_to_string<O>()
5213+
where
5214+
O: OffsetSizeTrait,
5215+
{
5216+
let data: Vec<Option<&str>> = vec![
5217+
Some("hello"),
5218+
Some("world"),
5219+
None,
5220+
Some("large payload over 12 bytes"),
5221+
Some("lulu"),
5222+
];
5223+
5224+
let view_array = {
5225+
// ["hello", "world", null, "large payload over 12 bytes", "lulu"]
5226+
let mut builder = StringViewBuilder::new().with_block_size(8); // multiple buffer.
5227+
for s in data.iter() {
5228+
builder.append_option(s.clone());
5229+
}
5230+
builder.finish()
5231+
};
5232+
5233+
let expected_string_array = GenericStringArray::<O>::from(data);
5234+
let expected_type = expected_string_array.data_type();
5235+
5236+
assert!(can_cast_types(&view_array.data_type(), expected_type));
5237+
5238+
let string_array = cast(&view_array, expected_type).unwrap();
5239+
assert_eq!(string_array.data_type(), expected_type);
5240+
5241+
assert_eq!(string_array.as_ref(), &expected_string_array);
5242+
}
5243+
5244+
#[test]
5245+
fn test_view_to_binary() {
5246+
_test_view_to_binary::<i32>();
5247+
_test_view_to_binary::<i64>();
5248+
}
5249+
5250+
fn _test_view_to_binary<O>()
5251+
where
5252+
O: OffsetSizeTrait,
5253+
{
5254+
let data: Vec<Option<&[u8]>> = vec![
5255+
Some(b"hello"),
5256+
Some(b"world"),
5257+
None,
5258+
Some(b"large payload over 12 bytes"),
5259+
Some(b"lulu"),
5260+
];
5261+
5262+
let view_array = {
5263+
// ["hello", "world", null, "large payload over 12 bytes", "lulu"]
5264+
let mut builder = BinaryViewBuilder::new().with_block_size(8); // multiple buffer.
5265+
for s in data.iter() {
5266+
builder.append_option(s.clone());
5267+
}
5268+
builder.finish()
5269+
};
5270+
5271+
let expected_binary_array = GenericBinaryArray::<O>::from(data);
5272+
let expected_type = expected_binary_array.data_type();
5273+
5274+
assert!(can_cast_types(view_array.data_type(), expected_type));
5275+
5276+
let binary_array = cast(&view_array, expected_type).unwrap();
5277+
assert_eq!(binary_array.data_type(), expected_type);
5278+
5279+
assert_eq!(binary_array.as_ref(), &expected_binary_array);
5280+
}
5281+
51725282
#[test]
51735283
fn test_cast_from_f64() {
51745284
let f64_values: Vec<f64> = vec![

0 commit comments

Comments
 (0)