Skip to content

Commit be7e1e7

Browse files
austin362667jayzhan211
authored andcommitted
Support Utf8View for string function bit_length() (apache#13221)
* Support `Utf8View` for string function `bit_length()` Signed-off-by: Austin Liu <[email protected]> * Add scalar test case Signed-off-by: Austin Liu <[email protected]> * Refine tests Signed-off-by: Austin Liu <[email protected]> * Fix wrong format Signed-off-by: Austin Liu <[email protected]> --------- Signed-off-by: Austin Liu <[email protected]>
1 parent d1e6fed commit be7e1e7

File tree

3 files changed

+45
-0
lines changed

3 files changed

+45
-0
lines changed

datafusion/functions/src/string/bit_length.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,9 @@ impl ScalarUDFImpl for BitLengthFunc {
7979
ScalarValue::LargeUtf8(v) => Ok(ColumnarValue::Scalar(
8080
ScalarValue::Int64(v.as_ref().map(|x| (x.len() * 8) as i64)),
8181
)),
82+
ScalarValue::Utf8View(v) => Ok(ColumnarValue::Scalar(
83+
ScalarValue::Int32(v.as_ref().map(|x| (x.len() * 8) as i32)),
84+
)),
8285
_ => unreachable!("bit length"),
8386
},
8487
}

datafusion/sqllogictest/test_files/string/string_literal.slt

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1623,3 +1623,44 @@ a\_c \%abc false
16231623
\%abc a\_c false
16241624
\%abc %abc true
16251625
\%abc \%abc false
1626+
1627+
# test utf8, largeutf8, utf8view, DictionaryString for bit_length
1628+
query IIII
1629+
SELECT
1630+
bit_length('Andrew'),
1631+
bit_length('datafusion数据融合'),
1632+
bit_length('💖'),
1633+
bit_length('josé')
1634+
;
1635+
----
1636+
48 176 32 40
1637+
1638+
query IIII
1639+
SELECT
1640+
bit_length(arrow_cast('Andrew', 'LargeUtf8')),
1641+
bit_length(arrow_cast('datafusion数据融合', 'LargeUtf8')),
1642+
bit_length(arrow_cast('💖', 'LargeUtf8')),
1643+
bit_length(arrow_cast('josé', 'LargeUtf8'))
1644+
;
1645+
----
1646+
48 176 32 40
1647+
1648+
query IIII
1649+
SELECT
1650+
bit_length(arrow_cast('Andrew', 'Utf8View')),
1651+
bit_length(arrow_cast('datafusion数据融合', 'Utf8View')),
1652+
bit_length(arrow_cast('💖', 'Utf8View')),
1653+
bit_length(arrow_cast('josé', 'Utf8View'))
1654+
;
1655+
----
1656+
48 176 32 40
1657+
1658+
query IIII
1659+
SELECT
1660+
bit_length(arrow_cast('Andrew', 'Dictionary(Int32, Utf8)')),
1661+
bit_length(arrow_cast('datafusion数据融合', 'Dictionary(Int32, Utf8)')),
1662+
bit_length(arrow_cast('💖', 'Dictionary(Int32, Utf8)')),
1663+
bit_length(arrow_cast('josé', 'Dictionary(Int32, Utf8)'))
1664+
;
1665+
----
1666+
48 176 32 40

datafusion/sqllogictest/test_files/string/string_view.slt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ select octet_length(column1_utf8view) from test;
9393
0
9494
NULL
9595

96+
# TODO: Revisit this issue after upgrading to the arrow-rs version that includes apache/arrow-rs#6671.
9697
query error DataFusion error: Arrow error: Compute error: bit_length not supported for Utf8View
9798
select bit_length(column1_utf8view) from test;
9899

0 commit comments

Comments
 (0)