Skip to content

Support Binary --> Utf8View casting #6531

Closed
@alamb

Description

@alamb

Is your feature request related to a problem or challenge? Please describe what you are trying to do.
While working on apache/datafusion#12788 with StringView upstream in DataFusion, @goldmedal found that casting from BinaryArray --> Utf8View is not supported

Describe the solution you'd like
Support casting BinaryArray --> Utf8View

Describe alternatives you've considered
Here is a modified test_binary_to_view test that should pass

    #[test]
    fn test_binary_to_view() {
        _test_binary_to_view::<i32>();
        _test_binary_to_view::<i64>();
    }

    fn _test_binary_to_view<O>()
    where
        O: OffsetSizeTrait,
    {
        let binary_array = GenericBinaryArray::<O>::from_iter(VIEW_TEST_DATA);

        assert!(can_cast_types(
            binary_array.data_type(),
            &DataType::Utf8View
        ));

        assert!(can_cast_types(
            binary_array.data_type(),
            &DataType::BinaryView
        ));

        let string_view_array = cast(&binary_array, &DataType::Utf8View).unwrap();
        assert_eq!(string_view_array.data_type(), &DataType::Utf8View);

        let binary_view_array = cast(&binary_array, &DataType::BinaryView).unwrap();
        assert_eq!(binary_view_array.data_type(), &DataType::BinaryView);

        let expect_string_view_array = StringViewArray::from_iter(VIEW_TEST_DATA);
        assert_eq!(string_view_array.as_ref(), &expect_string_view_array);

        let expect_binary_view_array = BinaryViewArray::from_iter(VIEW_TEST_DATA);
        assert_eq!(binary_view_array.as_ref(), &expect_binary_view_array);
    }
Full Diff

diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index e3fad3da19..f147a9c3f6 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -5523,7 +5523,7 @@ mod tests {
     }

     #[test]
-    fn test_bianry_to_view() {
+    fn test_binary_to_view() {
         _test_binary_to_view::<i32>();
         _test_binary_to_view::<i64>();
     }
@@ -5534,14 +5534,25 @@ mod tests {
     {
         let binary_array = GenericBinaryArray::<O>::from_iter(VIEW_TEST_DATA);

+        assert!(can_cast_types(
+            binary_array.data_type(),
+            &DataType::Utf8View
+        ));
+
         assert!(can_cast_types(
             binary_array.data_type(),
             &DataType::BinaryView
         ));

+        let string_view_array = cast(&binary_array, &DataType::Utf8View).unwrap();
+        assert_eq!(string_view_array.data_type(), &DataType::Utf8View);
+
         let binary_view_array = cast(&binary_array, &DataType::BinaryView).unwrap();
         assert_eq!(binary_view_array.data_type(), &DataType::BinaryView);

+        let expect_string_view_array = StringViewArray::from_iter(VIEW_TEST_DATA);
+        assert_eq!(string_view_array.as_ref(), &expect_string_view_array);
+
         let expect_binary_view_array = BinaryViewArray::from_iter(VIEW_TEST_DATA);
         assert_eq!(binary_view_array.as_ref(), &expect_binary_view_array);
     }

The test currently fails with

assertion failed: can_cast_types(binary_array.data_type(), &DataType::Utf8View)
thread 'cast::tests::test_binary_to_view' panicked at arrow-cast/src/cast/mod.rs:5537:9:
assertion failed: can_cast_types(binary_array.data_type(), &DataType::Utf8View)
stack backtrace:
   0: rust_begin_unwind
             at /rustc/eeb90cda1969383f56a2637cbd3037bdf598841c/library/std/src/panicking.rs:665:5
   1: core::panicking::panic_fmt
             at /rustc/eeb90cda1969383f56a2637cbd3037bdf598841c/library/core/src/panicking.rs:74:14
   2: core::panicking::panic
             at /rustc/eeb90cda1969383f56a2637cbd3037bdf598841c/library/core/src/panicking.rs:148:5
   3: arrow_cast::cast::tests::_test_binary_to_view
             at ./src/cast/mod.rs:5537:9
   4: arrow_cast::cast::tests::test_binary_to_view
             at ./src/cast/mod.rs:5527:9
   5: arrow_cast::cast::tests::test_binary_to_view::{{closure}}
             at ./src/cast/mod.rs:5526:29
   6: core::ops::function::FnOnce::call_once
             at /rustc/eeb90cda1969383f56a2637cbd3037bdf598841c/library/core/src/ops/function.rs:250:5
   7: core::ops::function::FnOnce::call_once
             at /rustc/eeb90cda1969383f56a2637cbd3037bdf598841c/library/core/src/ops/function.rs:250:5
note: Some details are omitted, run with `RUST_BACKTRACE=full` for a verbose backtrace.

Additional context

Metadata

Metadata

Assignees

No one assigned

    Labels

    arrowChanges to the arrow crateenhancementAny new improvement worthy of a entry in the changelog

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions