Skip to content

Commit d857563

Browse files
authored
Fully support LIKE/ILIKE with Utf8View (#14379)
* Fully support LIKE/NLIKE with Utf8View * update test * fix typo * Add literal tests for like
1 parent edbdefe commit d857563

File tree

5 files changed

+170
-139
lines changed

5 files changed

+170
-139
lines changed

datafusion/sql/src/expr/mod.rs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ use datafusion_common::{
3030
internal_datafusion_err, internal_err, not_impl_err, plan_err, DFSchema, Result,
3131
ScalarValue,
3232
};
33+
3334
use datafusion_expr::expr::ScalarFunction;
3435
use datafusion_expr::expr::{InList, WildcardOptions};
3536
use datafusion_expr::{
@@ -820,10 +821,6 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
820821
return not_impl_err!("ANY in LIKE expression");
821822
}
822823
let pattern = self.sql_expr_to_logical_expr(pattern, schema, planner_context)?;
823-
let pattern_type = pattern.get_type(schema)?;
824-
if pattern_type != DataType::Utf8 && pattern_type != DataType::Null {
825-
return plan_err!("Invalid pattern in LIKE expression");
826-
}
827824
let escape_char = if let Some(char) = escape_char {
828825
if char.len() != 1 {
829826
return plan_err!("Invalid escape character in LIKE expression");

datafusion/sqllogictest/test_files/scalar.slt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1689,6 +1689,11 @@ true true false false true true
16891689
statement ok
16901690
drop table t1
16911691

1692+
# can't use like with non strings
1693+
query error There isn't a common type to coerce Utf8 and Int64 in LIKE expression
1694+
select column1 like 1 from (values('a'), ('b'), (NULL)) as t;
1695+
1696+
16921697
# like nlike with null lt
16931698
query BB rowsort
16941699
SELECT column1 like NULL as col_null, NULL like column1 as null_col from (values('a'), ('b'), (NULL)) as t

datafusion/sqllogictest/test_files/string/string.slt

Lines changed: 0 additions & 135 deletions
Original file line numberDiff line numberDiff line change
@@ -41,141 +41,6 @@ select arrow_cast(col1, 'Utf8') as c1 from test_substr_base;
4141
#
4242
include ./string_query.slt.part
4343

44-
# TODO support all String types in sql_like_to_expr and move this test to `string_query.slt.part`
45-
# dynamic LIKE as filter
46-
query TTT rowsort
47-
SELECT ascii_1, 'is LIKE', ascii_2 FROM test_basic_operator WHERE ascii_1 LIKE ascii_2
48-
UNION ALL
49-
SELECT ascii_1, 'is NOT LIKE', ascii_2 FROM test_basic_operator WHERE ascii_1 NOT LIKE ascii_2
50-
UNION ALL
51-
SELECT unicode_1, 'is LIKE', ascii_2 FROM test_basic_operator WHERE unicode_1 LIKE ascii_2
52-
UNION ALL
53-
SELECT unicode_1, 'is NOT LIKE', ascii_2 FROM test_basic_operator WHERE unicode_1 NOT LIKE ascii_2
54-
UNION ALL
55-
SELECT unicode_2, 'is LIKE', ascii_2 FROM test_basic_operator WHERE unicode_2 LIKE ascii_2
56-
UNION ALL
57-
SELECT unicode_2, 'is NOT LIKE', ascii_2 FROM test_basic_operator WHERE unicode_2 NOT LIKE ascii_2
58-
----
59-
% is LIKE \%
60-
(empty) is LIKE %
61-
(empty) is LIKE %
62-
(empty) is LIKE %
63-
(empty) is LIKE %%
64-
(empty) is LIKE %%
65-
(empty) is LIKE %%
66-
(empty) is NOT LIKE \%
67-
(empty) is NOT LIKE \%
68-
(empty) is NOT LIKE \_
69-
(empty) is NOT LIKE \_
70-
Andrew is NOT LIKE X
71-
Pan Tadeusz ma frunąć stąd w kąt is NOT LIKE p%t
72-
Raphael is NOT LIKE R
73-
Xiangpeng is LIKE Xiangpeng
74-
_ is LIKE \_
75-
chrząszcz na łące w 東京都 is NOT LIKE un_____core
76-
datafusionДатаФусион is NOT LIKE R
77-
datafusion数据融合 is NOT LIKE Xiangpeng
78-
datafusion数据融合 is NOT LIKE Xiangpeng
79-
datafusion📊🔥 is NOT LIKE X
80-
pan Tadeusz ma iść w kąt is LIKE p%t
81-
percent is LIKE p%t
82-
un iść core is LIKE un_____core
83-
under_score is LIKE un_____core
84-
аФус is NOT LIKE R
85-
🔥 is NOT LIKE R
86-
🔥 is NOT LIKE X
87-
88-
# TODO support all String types in sql_like_to_expr and move this test to `string_query.slt.part`
89-
# dynamic LIKE as projection
90-
query TTTTBBBB rowsort
91-
SELECT
92-
ascii_1, ascii_2, unicode_1, unicode_2,
93-
(ascii_1 LIKE ascii_2) AS ascii_1_like_ascii_2,
94-
(ascii_2 LIKE ascii_1) AS ascii_2_like_ascii_1,
95-
(unicode_1 LIKE ascii_2) AS unicode_1_like_ascii_2,
96-
(unicode_2 LIKE ascii_2) AS unicode_2_like_ascii_2
97-
FROM test_basic_operator
98-
----
99-
% \% (empty) (empty) true true false false
100-
(empty) % (empty) (empty) true false true true
101-
(empty) %% (empty) (empty) true false true true
102-
Andrew X datafusion📊🔥 🔥 false false false false
103-
NULL % NULL NULL NULL NULL NULL NULL
104-
NULL R NULL 🔥 NULL NULL NULL false
105-
Raphael R datafusionДатаФусион аФус false false false false
106-
Xiangpeng Xiangpeng datafusion数据融合 datafusion数据融合 true true false false
107-
_ \_ (empty) (empty) true false false false
108-
percent p%t pan Tadeusz ma iść w kąt Pan Tadeusz ma frunąć stąd w kąt true false true false
109-
under_score un_____core un iść core chrząszcz na łące w 東京都 true false true false
110-
111-
# TODO support all String types in sql_like_to_expr and move this test to `string_query.slt.part`
112-
# dynamic ILIKE as filter
113-
query TTT rowsort
114-
SELECT ascii_1, 'is ILIKE', ascii_2 FROM test_basic_operator WHERE ascii_1 ILIKE ascii_2
115-
UNION ALL
116-
SELECT ascii_1, 'is NOT ILIKE', ascii_2 FROM test_basic_operator WHERE ascii_1 NOT ILIKE ascii_2
117-
UNION ALL
118-
SELECT unicode_1, 'is ILIKE', ascii_2 FROM test_basic_operator WHERE unicode_1 ILIKE ascii_2
119-
UNION ALL
120-
SELECT unicode_1, 'is NOT ILIKE', ascii_2 FROM test_basic_operator WHERE unicode_1 NOT ILIKE ascii_2
121-
UNION ALL
122-
SELECT unicode_2, 'is ILIKE', ascii_2 FROM test_basic_operator WHERE unicode_2 ILIKE ascii_2
123-
UNION ALL
124-
SELECT unicode_2, 'is NOT ILIKE', ascii_2 FROM test_basic_operator WHERE unicode_2 NOT ILIKE ascii_2
125-
----
126-
% is ILIKE \%
127-
(empty) is ILIKE %
128-
(empty) is ILIKE %
129-
(empty) is ILIKE %
130-
(empty) is ILIKE %%
131-
(empty) is ILIKE %%
132-
(empty) is ILIKE %%
133-
(empty) is NOT ILIKE \%
134-
(empty) is NOT ILIKE \%
135-
(empty) is NOT ILIKE \_
136-
(empty) is NOT ILIKE \_
137-
Andrew is NOT ILIKE X
138-
Pan Tadeusz ma frunąć stąd w kąt is ILIKE p%t
139-
Raphael is NOT ILIKE R
140-
Xiangpeng is ILIKE Xiangpeng
141-
_ is ILIKE \_
142-
chrząszcz na łące w 東京都 is NOT ILIKE un_____core
143-
datafusionДатаФусион is NOT ILIKE R
144-
datafusion数据融合 is NOT ILIKE Xiangpeng
145-
datafusion数据融合 is NOT ILIKE Xiangpeng
146-
datafusion📊🔥 is NOT ILIKE X
147-
pan Tadeusz ma iść w kąt is ILIKE p%t
148-
percent is ILIKE p%t
149-
un iść core is ILIKE un_____core
150-
under_score is ILIKE un_____core
151-
аФус is NOT ILIKE R
152-
🔥 is NOT ILIKE R
153-
🔥 is NOT ILIKE X
154-
155-
# TODO support all String types in sql_like_to_expr and move this test to `string_query.slt.part`
156-
# dynamic ILIKE as projection
157-
query TTTTBBBB rowsort
158-
SELECT
159-
ascii_1, ascii_2, unicode_1, unicode_2,
160-
(ascii_1 ILIKE ascii_2) AS ascii_1_ilike_ascii_2,
161-
(ascii_2 ILIKE ascii_1) AS ascii_2_ilike_ascii_1,
162-
(unicode_1 ILIKE ascii_2) AS unicode_1_ilike_ascii_2,
163-
(unicode_2 ILIKE ascii_2) AS unicode_2_ilike_ascii_2
164-
FROM test_basic_operator
165-
----
166-
% \% (empty) (empty) true true false false
167-
(empty) % (empty) (empty) true false true true
168-
(empty) %% (empty) (empty) true false true true
169-
Andrew X datafusion📊🔥 🔥 false false false false
170-
NULL % NULL NULL NULL NULL NULL NULL
171-
NULL R NULL 🔥 NULL NULL NULL false
172-
Raphael R datafusionДатаФусион аФус false false false false
173-
Xiangpeng Xiangpeng datafusion数据融合 datafusion数据融合 true true false false
174-
_ \_ (empty) (empty) true false false false
175-
percent p%t pan Tadeusz ma iść w kąt Pan Tadeusz ma frunąć stąd w kąt true false true true
176-
under_score un_____core un iść core chrząszcz na łące w 東京都 true false true false
177-
178-
17944

18045
#
18146
# Clean up

datafusion/sqllogictest/test_files/string/string_literal.slt

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -839,6 +839,28 @@ SELECT
839839
----
840840
NULL true true
841841

842+
# Literals with different arrow types
843+
query BBBB
844+
select
845+
arrow_cast('foobar', 'Utf8') LIKE arrow_cast('foo%', 'Utf8'),
846+
arrow_cast('foobar', 'LargeUtf8') LIKE arrow_cast('foo%', 'LargeUtf8'),
847+
arrow_cast('foobar', 'Utf8View') LIKE arrow_cast('foo%', 'Utf8View'),
848+
arrow_cast('foobar', 'Dictionary(Int32, Utf8)') LIKE arrow_cast('foo%', 'Dictionary(Int32, Utf8)')
849+
----
850+
true true true true
851+
852+
# Literal with UTF8 string and different arrow types for pattern
853+
query BBBB
854+
select
855+
'foobar' LIKE arrow_cast('foo%', 'Utf8'),
856+
'foobar' LIKE arrow_cast('foo%', 'LargeUtf8'),
857+
'foobar' LIKE arrow_cast('foo%', 'Utf8View'),
858+
'foobar' LIKE arrow_cast('foo%', 'Dictionary(Int32, Utf8)')
859+
----
860+
true true true true
861+
862+
# Escapes
863+
842864
# \ is an implicit escape character
843865
query BBBB
844866
SELECT

datafusion/sqllogictest/test_files/string/string_query.slt.part

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,148 @@ _ (empty) false true false true
188188
NULL NULL NULL NULL NULL NULL
189189
NULL NULL NULL NULL NULL NULL
190190

191+
# --------------------------------------
192+
# dynamic LIKE as filter
193+
# --------------------------------------
194+
195+
query TTT rowsort
196+
SELECT ascii_1, 'is LIKE', ascii_2 FROM test_basic_operator WHERE ascii_1 LIKE ascii_2
197+
UNION ALL
198+
SELECT ascii_1, 'is NOT LIKE', ascii_2 FROM test_basic_operator WHERE ascii_1 NOT LIKE ascii_2
199+
UNION ALL
200+
SELECT unicode_1, 'is LIKE', ascii_2 FROM test_basic_operator WHERE unicode_1 LIKE ascii_2
201+
UNION ALL
202+
SELECT unicode_1, 'is NOT LIKE', ascii_2 FROM test_basic_operator WHERE unicode_1 NOT LIKE ascii_2
203+
UNION ALL
204+
SELECT unicode_2, 'is LIKE', ascii_2 FROM test_basic_operator WHERE unicode_2 LIKE ascii_2
205+
UNION ALL
206+
SELECT unicode_2, 'is NOT LIKE', ascii_2 FROM test_basic_operator WHERE unicode_2 NOT LIKE ascii_2
207+
----
208+
% is LIKE \%
209+
(empty) is LIKE %
210+
(empty) is LIKE %
211+
(empty) is LIKE %
212+
(empty) is LIKE %%
213+
(empty) is LIKE %%
214+
(empty) is LIKE %%
215+
(empty) is NOT LIKE \%
216+
(empty) is NOT LIKE \%
217+
(empty) is NOT LIKE \_
218+
(empty) is NOT LIKE \_
219+
Andrew is NOT LIKE X
220+
Pan Tadeusz ma frunąć stąd w kąt is NOT LIKE p%t
221+
Raphael is NOT LIKE R
222+
Xiangpeng is LIKE Xiangpeng
223+
_ is LIKE \_
224+
chrząszcz na łące w 東京都 is NOT LIKE un_____core
225+
datafusionДатаФусион is NOT LIKE R
226+
datafusion数据融合 is NOT LIKE Xiangpeng
227+
datafusion数据融合 is NOT LIKE Xiangpeng
228+
datafusion📊🔥 is NOT LIKE X
229+
pan Tadeusz ma iść w kąt is LIKE p%t
230+
percent is LIKE p%t
231+
un iść core is LIKE un_____core
232+
under_score is LIKE un_____core
233+
аФус is NOT LIKE R
234+
🔥 is NOT LIKE R
235+
🔥 is NOT LIKE X
236+
237+
# --------------------------------------
238+
# dynamic LIKE as projection
239+
# --------------------------------------
240+
241+
query TTTTBBBB rowsort
242+
SELECT
243+
ascii_1, ascii_2, unicode_1, unicode_2,
244+
(ascii_1 LIKE ascii_2) AS ascii_1_like_ascii_2,
245+
(ascii_2 LIKE ascii_1) AS ascii_2_like_ascii_1,
246+
(unicode_1 LIKE ascii_2) AS unicode_1_like_ascii_2,
247+
(unicode_2 LIKE ascii_2) AS unicode_2_like_ascii_2
248+
FROM test_basic_operator
249+
----
250+
% \% (empty) (empty) true true false false
251+
(empty) % (empty) (empty) true false true true
252+
(empty) %% (empty) (empty) true false true true
253+
Andrew X datafusion📊🔥 🔥 false false false false
254+
NULL % NULL NULL NULL NULL NULL NULL
255+
NULL R NULL 🔥 NULL NULL NULL false
256+
Raphael R datafusionДатаФусион аФус false false false false
257+
Xiangpeng Xiangpeng datafusion数据融合 datafusion数据融合 true true false false
258+
_ \_ (empty) (empty) true false false false
259+
percent p%t pan Tadeusz ma iść w kąt Pan Tadeusz ma frunąć stąd w kąt true false true false
260+
under_score un_____core un iść core chrząszcz na łące w 東京都 true false true false
261+
262+
# --------------------------------------
263+
# dynamic ILIKE as filter
264+
# --------------------------------------
265+
266+
query TTT rowsort
267+
SELECT ascii_1, 'is ILIKE', ascii_2 FROM test_basic_operator WHERE ascii_1 ILIKE ascii_2
268+
UNION ALL
269+
SELECT ascii_1, 'is NOT ILIKE', ascii_2 FROM test_basic_operator WHERE ascii_1 NOT ILIKE ascii_2
270+
UNION ALL
271+
SELECT unicode_1, 'is ILIKE', ascii_2 FROM test_basic_operator WHERE unicode_1 ILIKE ascii_2
272+
UNION ALL
273+
SELECT unicode_1, 'is NOT ILIKE', ascii_2 FROM test_basic_operator WHERE unicode_1 NOT ILIKE ascii_2
274+
UNION ALL
275+
SELECT unicode_2, 'is ILIKE', ascii_2 FROM test_basic_operator WHERE unicode_2 ILIKE ascii_2
276+
UNION ALL
277+
SELECT unicode_2, 'is NOT ILIKE', ascii_2 FROM test_basic_operator WHERE unicode_2 NOT ILIKE ascii_2
278+
----
279+
% is ILIKE \%
280+
(empty) is ILIKE %
281+
(empty) is ILIKE %
282+
(empty) is ILIKE %
283+
(empty) is ILIKE %%
284+
(empty) is ILIKE %%
285+
(empty) is ILIKE %%
286+
(empty) is NOT ILIKE \%
287+
(empty) is NOT ILIKE \%
288+
(empty) is NOT ILIKE \_
289+
(empty) is NOT ILIKE \_
290+
Andrew is NOT ILIKE X
291+
Pan Tadeusz ma frunąć stąd w kąt is ILIKE p%t
292+
Raphael is NOT ILIKE R
293+
Xiangpeng is ILIKE Xiangpeng
294+
_ is ILIKE \_
295+
chrząszcz na łące w 東京都 is NOT ILIKE un_____core
296+
datafusionДатаФусион is NOT ILIKE R
297+
datafusion数据融合 is NOT ILIKE Xiangpeng
298+
datafusion数据融合 is NOT ILIKE Xiangpeng
299+
datafusion📊🔥 is NOT ILIKE X
300+
pan Tadeusz ma iść w kąt is ILIKE p%t
301+
percent is ILIKE p%t
302+
un iść core is ILIKE un_____core
303+
under_score is ILIKE un_____core
304+
аФус is NOT ILIKE R
305+
🔥 is NOT ILIKE R
306+
🔥 is NOT ILIKE X
307+
308+
# --------------------------------------
309+
# dynamic ILIKE as projection
310+
# --------------------------------------
311+
query TTTTBBBB rowsort
312+
SELECT
313+
ascii_1, ascii_2, unicode_1, unicode_2,
314+
(ascii_1 ILIKE ascii_2) AS ascii_1_ilike_ascii_2,
315+
(ascii_2 ILIKE ascii_1) AS ascii_2_ilike_ascii_1,
316+
(unicode_1 ILIKE ascii_2) AS unicode_1_ilike_ascii_2,
317+
(unicode_2 ILIKE ascii_2) AS unicode_2_ilike_ascii_2
318+
FROM test_basic_operator
319+
----
320+
% \% (empty) (empty) true true false false
321+
(empty) % (empty) (empty) true false true true
322+
(empty) %% (empty) (empty) true false true true
323+
Andrew X datafusion📊🔥 🔥 false false false false
324+
NULL % NULL NULL NULL NULL NULL NULL
325+
NULL R NULL 🔥 NULL NULL NULL false
326+
Raphael R datafusionДатаФусион аФус false false false false
327+
Xiangpeng Xiangpeng datafusion数据融合 datafusion数据融合 true true false false
328+
_ \_ (empty) (empty) true false false false
329+
percent p%t pan Tadeusz ma iść w kąt Pan Tadeusz ma frunąć stąd w kąt true false true true
330+
under_score un_____core un iść core chrząszcz na łące w 東京都 true false true false
331+
332+
191333
# --------------------------------------
192334
# substr function
193335
# --------------------------------------

0 commit comments

Comments
 (0)