Skip to content

Commit 44b83a1

Browse files
authored
feat: New functions and operations for working with arrays (#6384)
* feat: multidimensional arrays * feat: array_append, array_prepend, array_concat, array_fill * feat: array_dims, array_length * feat: array_position, array_positions, array_remove, array_replace, array_to_string * feat: trim_array, cardinality * feat: docs * refactoring: code cleanup * fix: test_scalar_expr * fix: clippy * fix: array_concat capacity * fix: cargo fmt
1 parent 91e75d7 commit 44b83a1

File tree

14 files changed

+2532
-110
lines changed

14 files changed

+2532
-110
lines changed

datafusion/common/src/cast.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ use arrow::{
3030
IntervalYearMonthArray, LargeListArray, ListArray, MapArray, NullArray,
3131
OffsetSizeTrait, PrimitiveArray, StringArray, StructArray,
3232
TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
33-
TimestampSecondArray, UInt32Array, UInt64Array, UnionArray,
33+
TimestampSecondArray, UInt32Array, UInt64Array, UInt8Array, UnionArray,
3434
},
3535
datatypes::{ArrowDictionaryKeyType, ArrowPrimitiveType},
3636
};
@@ -45,6 +45,11 @@ pub fn as_struct_array(array: &dyn Array) -> Result<&StructArray> {
4545
Ok(downcast_value!(array, StructArray))
4646
}
4747

48+
// Downcast ArrayRef to UInt8Array
49+
pub fn as_uint8_array(array: &dyn Array) -> Result<&UInt8Array> {
50+
Ok(downcast_value!(array, UInt8Array))
51+
}
52+
4853
// Downcast ArrayRef to Int32Array
4954
pub fn as_int32_array(array: &dyn Array) -> Result<&Int32Array> {
5055
Ok(downcast_value!(array, Int32Array))
Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
#############
19+
## Array expressions Tests
20+
#############
21+
22+
# array scalar function #1
23+
query ??? rowsort
24+
select make_array(1, 2, 3), make_array(1.0, 2.0, 3.0), make_array('h', 'e', 'l', 'l', 'o');
25+
----
26+
[1, 2, 3] [1.0, 2.0, 3.0] [h, e, l, l, o]
27+
28+
# array scalar function #2
29+
query ??? rowsort
30+
select make_array(1, 2, 3), make_array(make_array(1, 2), make_array(3, 4)), make_array([[[[1], [2]]]]);
31+
----
32+
[1, 2, 3] [[1, 2], [3, 4]] [[[[[1], [2]]]]]
33+
34+
# array scalar function #3
35+
query ?? rowsort
36+
select make_array([1, 2, 3], [4, 5, 6], [7, 8, 9]), make_array([[1, 2], [3, 4]], [[5, 6], [7, 8]]);
37+
----
38+
[[1, 2, 3], [4, 5, 6], [7, 8, 9]] [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
39+
40+
# array scalar function #4
41+
query ?? rowsort
42+
select make_array([1.0, 2.0], [3.0, 4.0]), make_array('h', 'e', 'l', 'l', 'o');
43+
----
44+
[[1.0, 2.0], [3.0, 4.0]] [h, e, l, l, o]
45+
46+
# array scalar function #5
47+
query ? rowsort
48+
select make_array(make_array(make_array(make_array(1, 2, 3), make_array(4, 5, 6)), make_array(make_array(7, 8, 9), make_array(10, 11, 12))))
49+
----
50+
[[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]]
51+
52+
# array_append scalar function
53+
query ??? rowsort
54+
select array_append(make_array(1, 2, 3), 4), array_append(make_array(1.0, 2.0, 3.0), 4.0), array_append(make_array('h', 'e', 'l', 'l'), 'o');
55+
----
56+
[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o]
57+
58+
# array_prepend scalar function
59+
query ??? rowsort
60+
select array_prepend(1, make_array(2, 3, 4)), array_prepend(1.0, make_array(2.0, 3.0, 4.0)), array_prepend('h', make_array('e', 'l', 'l', 'o'));
61+
----
62+
[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o]
63+
64+
# array_fill scalar function #1
65+
query ??? rowsort
66+
select array_fill(11, make_array(1, 2, 3)), array_fill(3, make_array(2, 3)), array_fill(2, make_array(2));
67+
----
68+
[[[11, 11, 11], [11, 11, 11]]] [[3, 3, 3], [3, 3, 3]] [2, 2]
69+
70+
# array_fill scalar function #2
71+
query ?? rowsort
72+
select array_fill(1, make_array(1, 1, 1)), array_fill(2, make_array(2, 2, 2, 2, 2));
73+
----
74+
[[[1]]] [[[[[2, 2], [2, 2]], [[2, 2], [2, 2]]], [[[2, 2], [2, 2]], [[2, 2], [2, 2]]]], [[[[2, 2], [2, 2]], [[2, 2], [2, 2]]], [[[2, 2], [2, 2]], [[2, 2], [2, 2]]]]]
75+
76+
# array_concat scalar function #1
77+
query ?? rowsort
78+
select array_concat(make_array(1, 2, 3), make_array(4, 5, 6), make_array(7, 8, 9)), array_concat(make_array([1], [2]), make_array([3], [4]));
79+
----
80+
[1, 2, 3, 4, 5, 6, 7, 8, 9] [[1], [2], [3], [4]]
81+
82+
# array_concat scalar function #2
83+
query ? rowsort
84+
select array_concat(make_array(make_array(1, 2), make_array(3, 4)), make_array(make_array(5, 6), make_array(7, 8)));
85+
----
86+
[[1, 2], [3, 4], [5, 6], [7, 8]]
87+
88+
# array_concat scalar function #3
89+
query ? rowsort
90+
select array_concat(make_array([1], [2], [3]), make_array([4], [5], [6]), make_array([7], [8], [9]));
91+
----
92+
[[1], [2], [3], [4], [5], [6], [7], [8], [9]]
93+
94+
# array_concat scalar function #4
95+
query ? rowsort
96+
select array_concat(make_array([[1]]), make_array([[2]]));
97+
----
98+
[[[1]], [[2]]]
99+
100+
# array_position scalar function #1
101+
query III
102+
select array_position(['h', 'e', 'l', 'l', 'o'], 'l'), array_position([1, 2, 3, 4, 5], 5), array_position([1, 1, 1], 1);
103+
----
104+
3 5 1
105+
106+
# array_position scalar function #2
107+
query III
108+
select array_position(['h', 'e', 'l', 'l', 'o'], 'l', 4), array_position([1, 2, 5, 4, 5], 5, 4), array_position([1, 1, 1], 1, 2);
109+
----
110+
4 5 2
111+
112+
# array_positions scalar function
113+
query III
114+
select array_positions(['h', 'e', 'l', 'l', 'o'], 'l'), array_positions([1, 2, 3, 4, 5], 5), array_positions([1, 1, 1], 1);
115+
----
116+
[3, 4] [5] [1, 2, 3]
117+
118+
# array_replace scalar function
119+
query ???
120+
select array_replace(make_array(1, 2, 3, 4), 2, 3), array_replace(make_array(1, 4, 4, 5, 4, 6, 7), 4, 0), array_replace(make_array(1, 2, 3), 4, 0);
121+
----
122+
[1, 3, 3, 4] [1, 0, 0, 5, 0, 6, 7] [1, 2, 3]
123+
124+
# array_to_string scalar function
125+
query ???
126+
select array_to_string(['h', 'e', 'l', 'l', 'o'], ','), array_to_string([1, 2, 3, 4, 5], '-'), array_to_string([1.0, 2.0, 3.0], '|');
127+
----
128+
h,e,l,l,o 1-2-3-4-5 1|2|3
129+
130+
# array_to_string scalar function #2
131+
query ???
132+
select array_to_string([1, 1, 1], '1'), array_to_string([[1, 2], [3, 4], [5, 6]], '+'), array_to_string(array_fill(3, [3, 2, 2]), '/\');
133+
----
134+
11111 1+2+3+4+5+6 3/\3/\3/\3/\3/\3/\3/\3/\3/\3/\3/\3
135+
136+
# cardinality scalar function
137+
query III
138+
select cardinality(make_array(1, 2, 3, 4, 5)), cardinality([1, 3, 5]), cardinality(make_array('h', 'e', 'l', 'l', 'o'));
139+
----
140+
5 3 5
141+
142+
# cardinality scalar function #2
143+
query II
144+
select cardinality(make_array([1, 2], [3, 4], [5, 6])), cardinality(array_fill(3, array[3, 2, 3]));
145+
----
146+
6 18
147+
148+
# trim_array scalar function
149+
query ???
150+
select trim_array(make_array(1, 2, 3, 4, 5), 2), trim_array(['h', 'e', 'l', 'l', 'o'], 3), trim_array([1.0, 2.0, 3.0], 2);
151+
----
152+
[1, 2, 3] [h, e] [1.0]
153+
154+
# trim_array scalar function #2
155+
query ??
156+
select trim_array([[1, 2], [3, 4], [5, 6]], 2), trim_array(array_fill(4, [3, 4, 2]), 2);
157+
----
158+
[[1, 2]] [[[4, 4], [4, 4], [4, 4], [4, 4]]]
159+
160+
# array_length scalar function
161+
query III rowsort
162+
select array_length(make_array(1, 2, 3, 4, 5)), array_length(make_array(1, 2, 3)), array_length(make_array([1, 2], [3, 4], [5, 6]));
163+
----
164+
5 3 3
165+
166+
# array_length scalar function #2
167+
query III rowsort
168+
select array_length(make_array(1, 2, 3, 4, 5), 1), array_length(make_array(1, 2, 3), 1), array_length(make_array([1, 2], [3, 4], [5, 6]), 1);
169+
----
170+
5 3 3
171+
172+
# array_length scalar function #3
173+
query III rowsort
174+
select array_length(make_array(1, 2, 3, 4, 5), 2), array_length(make_array(1, 2, 3), 2), array_length(make_array([1, 2], [3, 4], [5, 6]), 2);
175+
----
176+
NULL NULL 2
177+
178+
# array_length scalar function #4
179+
query IIII rowsort
180+
select array_length(array_fill(3, [3, 2, 5]), 1), array_length(array_fill(3, [3, 2, 5]), 2), array_length(array_fill(3, [3, 2, 5]), 3), array_length(array_fill(3, [3, 2, 5]), 4);
181+
----
182+
3 2 5 NULL
183+
184+
# array_dims scalar function
185+
query III rowsort
186+
select array_dims(make_array(1, 2, 3)), array_dims(make_array([1, 2], [3, 4])), array_dims(make_array([[[[1], [2]]]]));
187+
----
188+
[3] [2, 2] [1, 1, 1, 2, 1]
189+
190+
# array_dims scalar function #2
191+
query II rowsort
192+
select array_dims(array_fill(2, [1, 2, 3])), array_dims(array_fill(3, [2, 5, 4]));
193+
----
194+
[1, 2, 3] [2, 5, 4]
195+
196+
# array_ndims scalar function
197+
query III rowsort
198+
select array_ndims(make_array(1, 2, 3)), array_ndims(make_array([1, 2], [3, 4])), array_ndims(make_array([[[[1], [2]]]]));
199+
----
200+
1 2 5
201+
202+
# array_ndims scalar function #2
203+
query II rowsort
204+
select array_ndims(array_fill(1, [1, 2, 3])), array_ndims([[[[[[[[[[[[[[[[[[[[[1]]]]]]]]]]]]]]]]]]]]]);
205+
----
206+
3 21

datafusion/expr/src/built_in_function.rs

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,9 +102,39 @@ pub enum BuiltinScalarFunction {
102102
/// trunc
103103
Trunc,
104104

105-
// string functions
105+
// array functions
106+
/// array_append
107+
ArrayAppend,
108+
/// array_concat
109+
ArrayConcat,
110+
/// array_dims
111+
ArrayDims,
112+
/// array_fill
113+
ArrayFill,
114+
/// array_length
115+
ArrayLength,
116+
/// array_ndims
117+
ArrayNdims,
118+
/// array_position
119+
ArrayPosition,
120+
/// array_positions
121+
ArrayPositions,
122+
/// array_prepend
123+
ArrayPrepend,
124+
/// array_remove
125+
ArrayRemove,
126+
/// array_replace
127+
ArrayReplace,
128+
/// array_to_string
129+
ArrayToString,
130+
/// cardinality
131+
Cardinality,
106132
/// construct an array from columns
107133
MakeArray,
134+
/// trim_array
135+
TrimArray,
136+
137+
// string functions
108138
/// ascii
109139
Ascii,
110140
/// bit_length
@@ -280,7 +310,21 @@ impl BuiltinScalarFunction {
280310
BuiltinScalarFunction::Tan => Volatility::Immutable,
281311
BuiltinScalarFunction::Tanh => Volatility::Immutable,
282312
BuiltinScalarFunction::Trunc => Volatility::Immutable,
313+
BuiltinScalarFunction::ArrayAppend => Volatility::Immutable,
314+
BuiltinScalarFunction::ArrayConcat => Volatility::Immutable,
315+
BuiltinScalarFunction::ArrayDims => Volatility::Immutable,
316+
BuiltinScalarFunction::ArrayFill => Volatility::Immutable,
317+
BuiltinScalarFunction::ArrayLength => Volatility::Immutable,
318+
BuiltinScalarFunction::ArrayNdims => Volatility::Immutable,
319+
BuiltinScalarFunction::ArrayPosition => Volatility::Immutable,
320+
BuiltinScalarFunction::ArrayPositions => Volatility::Immutable,
321+
BuiltinScalarFunction::ArrayPrepend => Volatility::Immutable,
322+
BuiltinScalarFunction::ArrayRemove => Volatility::Immutable,
323+
BuiltinScalarFunction::ArrayReplace => Volatility::Immutable,
324+
BuiltinScalarFunction::ArrayToString => Volatility::Immutable,
325+
BuiltinScalarFunction::Cardinality => Volatility::Immutable,
283326
BuiltinScalarFunction::MakeArray => Volatility::Immutable,
327+
BuiltinScalarFunction::TrimArray => Volatility::Immutable,
284328
BuiltinScalarFunction::Ascii => Volatility::Immutable,
285329
BuiltinScalarFunction::BitLength => Volatility::Immutable,
286330
BuiltinScalarFunction::Btrim => Volatility::Immutable,
@@ -444,7 +488,21 @@ fn aliases(func: &BuiltinScalarFunction) -> &'static [&'static str] {
444488
BuiltinScalarFunction::ArrowTypeof => &["arrow_typeof"],
445489

446490
// array functions
491+
BuiltinScalarFunction::ArrayAppend => &["array_append"],
492+
BuiltinScalarFunction::ArrayConcat => &["array_concat"],
493+
BuiltinScalarFunction::ArrayDims => &["array_dims"],
494+
BuiltinScalarFunction::ArrayFill => &["array_fill"],
495+
BuiltinScalarFunction::ArrayLength => &["array_length"],
496+
BuiltinScalarFunction::ArrayNdims => &["array_ndims"],
497+
BuiltinScalarFunction::ArrayPosition => &["array_position"],
498+
BuiltinScalarFunction::ArrayPositions => &["array_positions"],
499+
BuiltinScalarFunction::ArrayPrepend => &["array_prepend"],
500+
BuiltinScalarFunction::ArrayRemove => &["array_remove"],
501+
BuiltinScalarFunction::ArrayReplace => &["array_replace"],
502+
BuiltinScalarFunction::ArrayToString => &["array_to_string"],
503+
BuiltinScalarFunction::Cardinality => &["cardinality"],
447504
BuiltinScalarFunction::MakeArray => &["make_array"],
505+
BuiltinScalarFunction::TrimArray => &["trim_array"],
448506
}
449507
}
450508

0 commit comments

Comments
 (0)