Skip to content

Commit 60d1d3a

Browse files
authored
Cast Utf8View to Utf8 to support || from StringViewArray (#11796)
* make query work * hack string_concat_coercion * more tests
1 parent cd322f1 commit 60d1d3a

File tree

2 files changed

+68
-8
lines changed

2 files changed

+68
-8
lines changed

datafusion/expr/src/type_coercion/binary.rs

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -890,15 +890,22 @@ fn dictionary_coercion(
890890
/// 2. Data type of the other side should be able to cast to string type
891891
fn string_concat_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
892892
use arrow::datatypes::DataType::*;
893-
string_coercion(lhs_type, rhs_type).or(match (lhs_type, rhs_type) {
894-
(Utf8, from_type) | (from_type, Utf8) => {
895-
string_concat_internal_coercion(from_type, &Utf8)
896-
}
897-
(LargeUtf8, from_type) | (from_type, LargeUtf8) => {
898-
string_concat_internal_coercion(from_type, &LargeUtf8)
893+
match (lhs_type, rhs_type) {
894+
// If Utf8View is in any side, we coerce to Utf8.
895+
// Ref: https://github.com/apache/datafusion/pull/11796
896+
(Utf8View, Utf8View | Utf8 | LargeUtf8) | (Utf8 | LargeUtf8, Utf8View) => {
897+
Some(Utf8)
899898
}
900-
_ => None,
901-
})
899+
_ => string_coercion(lhs_type, rhs_type).or(match (lhs_type, rhs_type) {
900+
(Utf8, from_type) | (from_type, Utf8) => {
901+
string_concat_internal_coercion(from_type, &Utf8)
902+
}
903+
(LargeUtf8, from_type) | (from_type, LargeUtf8) => {
904+
string_concat_internal_coercion(from_type, &LargeUtf8)
905+
}
906+
_ => None,
907+
}),
908+
}
902909
}
903910

904911
fn array_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {

datafusion/sqllogictest/test_files/string_view.slt

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -447,3 +447,56 @@ select t.dt from dates t where arrow_cast('2024-01-01', 'Utf8View') < t.dt;
447447

448448
statement ok
449449
drop table dates;
450+
451+
statement ok
452+
create table temp as values
453+
('value1', arrow_cast('rust', 'Utf8View'), arrow_cast('fast', 'Utf8View')),
454+
('value2', arrow_cast('datafusion', 'Utf8View'), arrow_cast('cool', 'Utf8View'));
455+
456+
query T
457+
select column2||' is fast' from temp;
458+
----
459+
rust is fast
460+
datafusion is fast
461+
462+
463+
query T
464+
select column2 || ' is ' || column3 from temp;
465+
----
466+
rust is fast
467+
datafusion is cool
468+
469+
query TT
470+
explain select column2 || 'is' || column3 from temp;
471+
----
472+
logical_plan
473+
01)Projection: CAST(temp.column2 AS Utf8) || Utf8("is") || CAST(temp.column3 AS Utf8)
474+
02)--TableScan: temp projection=[column2, column3]
475+
476+
477+
query TT
478+
explain select column2||' is fast' from temp;
479+
----
480+
logical_plan
481+
01)Projection: CAST(temp.column2 AS Utf8) || Utf8(" is fast")
482+
02)--TableScan: temp projection=[column2]
483+
484+
485+
query T
486+
select column2||column3 from temp;
487+
----
488+
rustfast
489+
datafusioncool
490+
491+
query TT
492+
explain select column2||column3 from temp;
493+
----
494+
logical_plan
495+
01)Projection: CAST(temp.column2 AS Utf8) || CAST(temp.column3 AS Utf8)
496+
02)--TableScan: temp projection=[column2, column3]
497+
498+
query T
499+
select column2|| ' ' ||column3 from temp;
500+
----
501+
rust fast
502+
datafusion cool

0 commit comments

Comments
 (0)