Skip to content

Commit 053b53e

Browse files
authored
Int64 as default type for make_array function empty or null case (#10790)
* set default type i64 Signed-off-by: jayzhan211 <[email protected]> * fmt Signed-off-by: jayzhan211 <[email protected]> --------- Signed-off-by: jayzhan211 <[email protected]>
1 parent c580ef4 commit 053b53e

File tree

5 files changed

+85
-48
lines changed

5 files changed

+85
-48
lines changed

datafusion/functions-array/src/empty.rs

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ use crate::utils::make_scalar_function;
2121
use arrow_array::{ArrayRef, BooleanArray, OffsetSizeTrait};
2222
use arrow_schema::DataType;
2323
use arrow_schema::DataType::{Boolean, FixedSizeList, LargeList, List};
24-
use datafusion_common::cast::{as_generic_list_array, as_null_array};
24+
use datafusion_common::cast::as_generic_list_array;
2525
use datafusion_common::{exec_err, plan_err, Result};
2626
use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
2727
use std::any::Any;
@@ -85,12 +85,7 @@ pub fn array_empty_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
8585
return exec_err!("array_empty expects one argument");
8686
}
8787

88-
if as_null_array(&args[0]).is_ok() {
89-
// Make sure to return Boolean type.
90-
return Ok(Arc::new(BooleanArray::new_null(args[0].len())));
91-
}
9288
let array_type = args[0].data_type();
93-
9489
match array_type {
9590
List(_) => general_array_empty::<i32>(&args[0]),
9691
LargeList(_) => general_array_empty::<i64>(&args[0]),
@@ -100,9 +95,10 @@ pub fn array_empty_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
10095

10196
fn general_array_empty<O: OffsetSizeTrait>(array: &ArrayRef) -> Result<ArrayRef> {
10297
let array = as_generic_list_array::<O>(array)?;
98+
10399
let builder = array
104100
.iter()
105-
.map(|arr| arr.map(|arr| arr.len() == arr.null_count()))
101+
.map(|arr| arr.map(|arr| arr.is_empty()))
106102
.collect::<BooleanArray>();
107103
Ok(Arc::new(builder))
108104
}

datafusion/functions-array/src/make_array.rs

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -80,11 +80,7 @@ impl ScalarUDFImpl for MakeArray {
8080

8181
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
8282
match arg_types.len() {
83-
0 => Ok(DataType::List(Arc::new(Field::new(
84-
"item",
85-
DataType::Null,
86-
true,
87-
)))),
83+
0 => Ok(empty_array_type()),
8884
_ => {
8985
let mut expr_type = DataType::Null;
9086
for arg_type in arg_types {
@@ -94,6 +90,10 @@ impl ScalarUDFImpl for MakeArray {
9490
}
9591
}
9692

93+
if expr_type.is_null() {
94+
expr_type = DataType::Int64;
95+
}
96+
9797
Ok(List(Arc::new(Field::new("item", expr_type, true))))
9898
}
9999
}
@@ -131,6 +131,11 @@ impl ScalarUDFImpl for MakeArray {
131131
}
132132
}
133133

134+
// Empty array is a special case that is useful for many other array functions
135+
pub(super) fn empty_array_type() -> DataType {
136+
DataType::List(Arc::new(Field::new("item", DataType::Int64, true)))
137+
}
138+
134139
/// `make_array_inner` is the implementation of the `make_array` function.
135140
/// Constructs an array using the input `data` as `ArrayRef`.
136141
/// Returns a reference-counted `Array` instance result.
@@ -147,7 +152,9 @@ pub(crate) fn make_array_inner(arrays: &[ArrayRef]) -> Result<ArrayRef> {
147152
match data_type {
148153
// Either an empty array or all nulls:
149154
Null => {
150-
let array = new_null_array(&Null, arrays.iter().map(|a| a.len()).sum());
155+
let length = arrays.iter().map(|a| a.len()).sum();
156+
// By default Int64
157+
let array = new_null_array(&DataType::Int64, length);
151158
Ok(Arc::new(array_into_list_array(array)))
152159
}
153160
LargeList(..) => array_array::<i64>(arrays, data_type),

datafusion/functions-array/src/set_ops.rs

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
//! [`ScalarUDFImpl`] definitions for array_union, array_intersect and array_distinct functions.
1919
20-
use crate::make_array::make_array_inner;
20+
use crate::make_array::{empty_array_type, make_array_inner};
2121
use crate::utils::make_scalar_function;
2222
use arrow::array::{new_empty_array, Array, ArrayRef, GenericListArray, OffsetSizeTrait};
2323
use arrow::buffer::OffsetBuffer;
@@ -135,7 +135,7 @@ impl ScalarUDFImpl for ArrayIntersect {
135135
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
136136
match (arg_types[0].clone(), arg_types[1].clone()) {
137137
(Null, Null) | (Null, _) => Ok(Null),
138-
(_, Null) => Ok(List(Arc::new(Field::new("item", Null, true)))),
138+
(_, Null) => Ok(empty_array_type()),
139139
(dt, _) => Ok(dt),
140140
}
141141
}
@@ -259,6 +259,17 @@ fn generic_set_lists<OffsetSize: OffsetSizeTrait>(
259259
return general_array_distinct::<OffsetSize>(l, &field);
260260
}
261261

262+
// Handle empty array at rhs case
263+
// array_union(arr, []) -> arr;
264+
// array_intersect(arr, []) -> [];
265+
if r.value_length(0).is_zero() {
266+
if set_op == SetOp::Union {
267+
return Ok(Arc::new(l.clone()) as ArrayRef);
268+
} else {
269+
return Ok(Arc::new(r.clone()) as ArrayRef);
270+
}
271+
}
272+
262273
if l.value_type() != r.value_type() {
263274
return internal_err!("{set_op:?} is not implemented for '{l:?}' and '{r:?}'");
264275
}

datafusion/sqllogictest/test_files/array.slt

Lines changed: 52 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -346,8 +346,8 @@ AS VALUES
346346
(arrow_cast(make_array([[1,2]], [[3, 4]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([1], [2]), 'FixedSizeList(2, List(Int64))')),
347347
(arrow_cast(make_array([[1,2]], [[4, 4]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([1,2], [3, 4]), 'FixedSizeList(2, List(Int64))')),
348348
(arrow_cast(make_array([[1,2]], [[4, 4]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([1,2,3], [1]), 'FixedSizeList(2, List(Int64))')),
349-
(arrow_cast(make_array([[1], [2]], []), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([2], [3]), 'FixedSizeList(2, List(Int64))')),
350-
(arrow_cast(make_array([[1], [2]], []), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([1], [2]), 'FixedSizeList(2, List(Int64))')),
349+
(arrow_cast(make_array([[1], [2]], [[]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([2], [3]), 'FixedSizeList(2, List(Int64))')),
350+
(arrow_cast(make_array([[1], [2]], [[]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([1], [2]), 'FixedSizeList(2, List(Int64))')),
351351
(arrow_cast(make_array([[1], [2]], [[2], [3]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([1], [2]), 'FixedSizeList(2, List(Int64))')),
352352
(arrow_cast(make_array([[1], [2]], [[2], [3]]), 'FixedSizeList(2, List(List(Int64)))'), arrow_cast(make_array([1], [2]), 'FixedSizeList(2, List(Int64))'))
353353
;
@@ -2038,6 +2038,13 @@ NULL
20382038
[, 51, 52, 54, 55, 56, 57, 58, 59, 60]
20392039
[61, 62, 63, 64, 65, 66, 67, 68, 69, 70]
20402040

2041+
# test with empty array
2042+
query ?
2043+
select array_sort([]);
2044+
----
2045+
[]
2046+
2047+
# test with empty row, the row that does not match the condition has row count 0
20412048
statement ok
20422049
create table t1(a int, b int) as values (100, 1), (101, 2), (102, 3), (101, 2);
20432050

@@ -2083,10 +2090,10 @@ select
20832090

20842091
query ????
20852092
select
2086-
array_append(arrow_cast(make_array(), 'LargeList(Null)'), 4),
2087-
array_append(arrow_cast(make_array(), 'LargeList(Null)'), null),
2093+
array_append(arrow_cast(make_array(), 'LargeList(Int64)'), 4),
2094+
array_append(arrow_cast(make_array(), 'LargeList(Int64)'), null),
20882095
array_append(arrow_cast(make_array(1, null, 3), 'LargeList(Int64)'), 4),
2089-
array_append(arrow_cast(make_array(null, null), 'LargeList(Null)'), 1)
2096+
array_append(arrow_cast(make_array(null, null), 'LargeList(Int64)'), 1)
20902097
;
20912098
----
20922099
[4] [] [1, , 3, 4] [, , 1]
@@ -2567,7 +2574,7 @@ query ????
25672574
select
25682575
array_repeat(arrow_cast([1], 'LargeList(Int64)'), 5),
25692576
array_repeat(arrow_cast([1.1, 2.2, 3.3], 'LargeList(Float64)'), 3),
2570-
array_repeat(arrow_cast([null, null], 'LargeList(Null)'), 3),
2577+
array_repeat(arrow_cast([null, null], 'LargeList(Int64)'), 3),
25712578
array_repeat(arrow_cast([[1, 2], [3, 4]], 'LargeList(List(Int64))'), 2);
25722579
----
25732580
[[1], [1], [1], [1], [1]] [[1.1, 2.2, 3.3], [1.1, 2.2, 3.3], [1.1, 2.2, 3.3]] [[, ], [, ], [, ]] [[[1, 2], [3, 4]], [[1, 2], [3, 4]]]
@@ -2630,6 +2637,12 @@ drop table large_array_repeat_table;
26302637

26312638
## array_concat (aliases: `array_cat`, `list_concat`, `list_cat`)
26322639

2640+
# test with empty array
2641+
query ?
2642+
select array_concat([]);
2643+
----
2644+
[]
2645+
26332646
# array_concat error
26342647
query error DataFusion error: Error during planning: The array_concat function can only accept list as the args\.
26352648
select array_concat(1, 2);
@@ -2674,19 +2687,19 @@ select array_concat(make_array(), make_array(2, 3));
26742687
query ?
26752688
select array_concat(make_array(make_array(1, 2), make_array(3, 4)), make_array(make_array()));
26762689
----
2677-
[[1, 2], [3, 4]]
2690+
[[1, 2], [3, 4], []]
26782691

26792692
# array_concat scalar function #8 (with empty arrays)
26802693
query ?
26812694
select array_concat(make_array(make_array(1, 2), make_array(3, 4)), make_array(make_array()), make_array(make_array(), make_array()), make_array(make_array(5, 6), make_array(7, 8)));
26822695
----
2683-
[[1, 2], [3, 4], [5, 6], [7, 8]]
2696+
[[1, 2], [3, 4], [], [], [], [5, 6], [7, 8]]
26842697

26852698
# array_concat scalar function #9 (with empty arrays)
26862699
query ?
26872700
select array_concat(make_array(make_array()), make_array(make_array(1, 2), make_array(3, 4)));
26882701
----
2689-
[[1, 2], [3, 4]]
2702+
[[], [1, 2], [3, 4]]
26902703

26912704
# array_cat scalar function #10 (function alias `array_concat`)
26922705
query ??
@@ -3788,7 +3801,7 @@ select array_union([1,2,3], []);
37883801
[1, 2, 3]
37893802

37903803
query ?
3791-
select array_union(arrow_cast([1,2,3], 'LargeList(Int64)'), arrow_cast([], 'LargeList(Null)'));
3804+
select array_union(arrow_cast([1,2,3], 'LargeList(Int64)'), arrow_cast([], 'LargeList(Int64)'));
37923805
----
37933806
[1, 2, 3]
37943807

@@ -3836,7 +3849,7 @@ select array_union([], []);
38363849
[]
38373850

38383851
query ?
3839-
select array_union(arrow_cast([], 'LargeList(Null)'), arrow_cast([], 'LargeList(Null)'));
3852+
select array_union(arrow_cast([], 'LargeList(Int64)'), arrow_cast([], 'LargeList(Int64)'));
38403853
----
38413854
[]
38423855

@@ -3847,7 +3860,7 @@ select array_union([[null]], []);
38473860
[[]]
38483861

38493862
query ?
3850-
select array_union(arrow_cast([[null]], 'LargeList(List(Null))'), arrow_cast([], 'LargeList(Null)'));
3863+
select array_union(arrow_cast([[null]], 'LargeList(List(Int64))'), arrow_cast([], 'LargeList(Int64)'));
38513864
----
38523865
[[]]
38533866

@@ -3858,7 +3871,7 @@ select array_union([null], [null]);
38583871
[]
38593872

38603873
query ?
3861-
select array_union(arrow_cast([[null]], 'LargeList(List(Null))'), arrow_cast([[null]], 'LargeList(List(Null))'));
3874+
select array_union(arrow_cast([[null]], 'LargeList(List(Int64))'), arrow_cast([[null]], 'LargeList(List(Int64))'));
38623875
----
38633876
[[]]
38643877

@@ -3869,7 +3882,7 @@ select array_union(null, []);
38693882
[]
38703883

38713884
query ?
3872-
select array_union(null, arrow_cast([], 'LargeList(Null)'));
3885+
select array_union(null, arrow_cast([], 'LargeList(Int64)'));
38733886
----
38743887
[]
38753888

@@ -4106,14 +4119,14 @@ select cardinality(make_array()), cardinality(make_array(make_array()))
41064119
NULL 0
41074120

41084121
query II
4109-
select cardinality(arrow_cast(make_array(), 'LargeList(Null)')), cardinality(arrow_cast(make_array(make_array()), 'LargeList(List(Null))'))
4122+
select cardinality(arrow_cast(make_array(), 'LargeList(Int64)')), cardinality(arrow_cast(make_array(make_array()), 'LargeList(List(Int64))'))
41104123
----
41114124
NULL 0
41124125

41134126
#TODO
41144127
#https://github.com/apache/datafusion/issues/9158
41154128
#query II
4116-
#select cardinality(arrow_cast(make_array(), 'FixedSizeList(1, Null)')), cardinality(arrow_cast(make_array(make_array()), 'FixedSizeList(1, List(Null))'))
4129+
#select cardinality(arrow_cast(make_array(), 'FixedSizeList(1, Null)')), cardinality(arrow_cast(make_array(make_array()), 'FixedSizeList(1, List(Int64))'))
41174130
#----
41184131
#NULL 0
41194132

@@ -4699,7 +4712,7 @@ select array_dims(make_array()), array_dims(make_array(make_array()))
46994712
NULL [1, 0]
47004713

47014714
query ??
4702-
select array_dims(arrow_cast(make_array(), 'LargeList(Null)')), array_dims(arrow_cast(make_array(make_array()), 'LargeList(List(Null))'))
4715+
select array_dims(arrow_cast(make_array(), 'LargeList(Int64)')), array_dims(arrow_cast(make_array(make_array()), 'LargeList(List(Int64))'))
47034716
----
47044717
NULL [1, 0]
47054718

@@ -4861,7 +4874,7 @@ select array_ndims(make_array()), array_ndims(make_array(make_array()))
48614874
1 2
48624875

48634876
query II
4864-
select array_ndims(arrow_cast(make_array(), 'LargeList(Null)')), array_ndims(arrow_cast(make_array(make_array()), 'LargeList(List(Null))'))
4877+
select array_ndims(arrow_cast(make_array(), 'LargeList(Int64)')), array_ndims(arrow_cast(make_array(make_array()), 'LargeList(List(Int64))'))
48654878
----
48664879
1 2
48674880

@@ -4882,7 +4895,7 @@ select list_ndims(make_array()), list_ndims(make_array(make_array()))
48824895
1 2
48834896

48844897
query II
4885-
select list_ndims(arrow_cast(make_array(), 'LargeList(Null)')), list_ndims(arrow_cast(make_array(make_array()), 'LargeList(List(Null))'))
4898+
select list_ndims(arrow_cast(make_array(), 'LargeList(Int64)')), list_ndims(arrow_cast(make_array(make_array()), 'LargeList(List(Int64))'))
48864899
----
48874900
1 2
48884901

@@ -5500,7 +5513,7 @@ select array_intersect([], []);
55005513
[]
55015514

55025515
query ?
5503-
select array_intersect(arrow_cast([], 'LargeList(Null)'), arrow_cast([], 'LargeList(Null)'));
5516+
select array_intersect(arrow_cast([], 'LargeList(Int64)'), arrow_cast([], 'LargeList(Int64)'));
55045517
----
55055518
[]
55065519

@@ -5530,7 +5543,17 @@ select array_intersect([], null);
55305543
[]
55315544

55325545
query ?
5533-
select array_intersect(arrow_cast([], 'LargeList(Null)'), null);
5546+
select array_intersect([[1,2,3]], [[]]);
5547+
----
5548+
[]
5549+
5550+
query ?
5551+
select array_intersect([[null]], [[]]);
5552+
----
5553+
[]
5554+
5555+
query ?
5556+
select array_intersect(arrow_cast([], 'LargeList(Int64)'), null);
55345557
----
55355558
[]
55365559

@@ -5540,7 +5563,7 @@ select array_intersect(null, []);
55405563
NULL
55415564

55425565
query ?
5543-
select array_intersect(null, arrow_cast([], 'LargeList(Null)'));
5566+
select array_intersect(null, arrow_cast([], 'LargeList(Int64)'));
55445567
----
55455568
NULL
55465569

@@ -6196,7 +6219,7 @@ select empty(make_array());
61966219
true
61976220

61986221
query B
6199-
select empty(arrow_cast(make_array(), 'LargeList(Null)'));
6222+
select empty(arrow_cast(make_array(), 'LargeList(Int64)'));
62006223
----
62016224
true
62026225

@@ -6213,12 +6236,12 @@ select empty(make_array(NULL));
62136236
false
62146237

62156238
query B
6216-
select empty(arrow_cast(make_array(NULL), 'LargeList(Null)'));
6239+
select empty(arrow_cast(make_array(NULL), 'LargeList(Int64)'));
62176240
----
62186241
false
62196242

62206243
query B
6221-
select empty(arrow_cast(make_array(NULL), 'FixedSizeList(1, Null)'));
6244+
select empty(arrow_cast(make_array(NULL), 'FixedSizeList(1, Int64)'));
62226245
----
62236246
false
62246247

@@ -6282,7 +6305,7 @@ select array_empty(make_array());
62826305
true
62836306

62846307
query B
6285-
select array_empty(arrow_cast(make_array(), 'LargeList(Null)'));
6308+
select array_empty(arrow_cast(make_array(), 'LargeList(Int64)'));
62866309
----
62876310
true
62886311

@@ -6293,7 +6316,7 @@ select array_empty(make_array(NULL));
62936316
false
62946317

62956318
query B
6296-
select array_empty(arrow_cast(make_array(NULL), 'LargeList(Null)'));
6319+
select array_empty(arrow_cast(make_array(NULL), 'LargeList(Int64)'));
62976320
----
62986321
false
62996322

@@ -6316,7 +6339,7 @@ select list_empty(make_array());
63166339
true
63176340

63186341
query B
6319-
select list_empty(arrow_cast(make_array(), 'LargeList(Null)'));
6342+
select list_empty(arrow_cast(make_array(), 'LargeList(Int64)'));
63206343
----
63216344
true
63226345

@@ -6327,7 +6350,7 @@ select list_empty(make_array(NULL));
63276350
false
63286351

63296352
query B
6330-
select list_empty(arrow_cast(make_array(NULL), 'LargeList(Null)'));
6353+
select list_empty(arrow_cast(make_array(NULL), 'LargeList(Int64)'));
63316354
----
63326355
false
63336356

0 commit comments

Comments
 (0)