Skip to content

Commit d841116

Browse files
authored
Allow ColumnarValue to array conversion with less copying (#13644)
`ColumnarValue::into_array` takes ownership of the columnar value and so requires copying data if the call site doesn't own the value. This does not matter in many cases, since the `into_array` internally will copy data even more. It does matter however for the cases when to-array is called with desired array length of 1, which may happen when UDF implementation attempts to normalize arguments into arrays without expanding them. This pattern can be seen in regexp functions, but can be useful in downstream projects too.
1 parent 9fbd87f commit d841116

File tree

7 files changed

+28
-10
lines changed

7 files changed

+28
-10
lines changed

datafusion/expr-common/src/columnar_value.rs

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,24 @@ impl ColumnarValue {
129129
})
130130
}
131131

132+
/// Convert a columnar value into an Arrow [`ArrayRef`] with the specified
133+
/// number of rows. [`Self::Scalar`] is converted by repeating the same
134+
/// scalar multiple times which is not as efficient as handling the scalar
135+
/// directly.
136+
///
137+
/// See [`Self::values_to_arrays`] to convert multiple columnar values into
138+
/// arrays of the same length.
139+
///
140+
/// # Errors
141+
///
142+
/// Errors if `self` is a Scalar that fails to be converted into an array of size
143+
pub fn to_array(&self, num_rows: usize) -> Result<ArrayRef> {
144+
Ok(match self {
145+
ColumnarValue::Array(array) => Arc::clone(array),
146+
ColumnarValue::Scalar(scalar) => scalar.to_array_of_size(num_rows)?,
147+
})
148+
}
149+
132150
/// Null columnar values are implemented as a null array in order to pass batch
133151
/// num_rows
134152
pub fn create_null_array(num_rows: usize) -> Self {
@@ -176,7 +194,7 @@ impl ColumnarValue {
176194

177195
let args = args
178196
.iter()
179-
.map(|arg| arg.clone().into_array(inferred_length))
197+
.map(|arg| arg.to_array(inferred_length))
180198
.collect::<Result<Vec<_>>>()?;
181199

182200
Ok(args)

datafusion/functions-nested/src/array_has.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ impl ScalarUDFImpl for ArrayHas {
106106
match &args[1] {
107107
ColumnarValue::Array(array_needle) => {
108108
// the needle is already an array, convert the haystack to an array of the same length
109-
let haystack = args[0].to_owned().into_array(array_needle.len())?;
109+
let haystack = args[0].to_array(array_needle.len())?;
110110
let array = array_has_inner_for_array(&haystack, array_needle)?;
111111
Ok(ColumnarValue::Array(array))
112112
}
@@ -118,7 +118,7 @@ impl ScalarUDFImpl for ArrayHas {
118118
}
119119

120120
// since the needle is a scalar, convert it to an array of size 1
121-
let haystack = args[0].to_owned().into_array(1)?;
121+
let haystack = args[0].to_array(1)?;
122122
let needle = scalar_needle.to_array_of_size(1)?;
123123
let needle = Scalar::new(needle);
124124
let array = array_has_inner_for_scalar(&haystack, &needle)?;

datafusion/functions/src/regex/regexpcount.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ impl ScalarUDFImpl for RegexpCountFunc {
9797
let inferred_length = len.unwrap_or(1);
9898
let args = args
9999
.iter()
100-
.map(|arg| arg.clone().into_array(inferred_length))
100+
.map(|arg| arg.to_array(inferred_length))
101101
.collect::<Result<Vec<_>>>()?;
102102

103103
let result = regexp_count_func(&args);

datafusion/functions/src/regex/regexplike.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ impl ScalarUDFImpl for RegexpLikeFunc {
147147
let inferred_length = len.unwrap_or(1);
148148
let args = args
149149
.iter()
150-
.map(|arg| arg.clone().into_array(inferred_length))
150+
.map(|arg| arg.to_array(inferred_length))
151151
.collect::<Result<Vec<_>>>()?;
152152

153153
let result = regexp_like(&args);

datafusion/functions/src/regex/regexpmatch.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ impl ScalarUDFImpl for RegexpMatchFunc {
9999
let inferred_length = len.unwrap_or(1);
100100
let args = args
101101
.iter()
102-
.map(|arg| arg.clone().into_array(inferred_length))
102+
.map(|arg| arg.to_array(inferred_length))
103103
.collect::<Result<Vec<_>>>()?;
104104

105105
let result = regexp_match_func(&args);

datafusion/functions/src/regex/regexpreplace.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -575,7 +575,7 @@ pub fn specialize_regexp_replace<T: OffsetSizeTrait>(
575575
Hint::AcceptsSingular => 1,
576576
Hint::Pad => inferred_length,
577577
};
578-
arg.clone().into_array(expansion_len)
578+
arg.to_array(expansion_len)
579579
})
580580
.collect::<Result<Vec<_>>>()?;
581581
_regexp_replace_static_pattern_replace::<T>(&args)
@@ -586,7 +586,7 @@ pub fn specialize_regexp_replace<T: OffsetSizeTrait>(
586586
(_, _, _, _) => {
587587
let args = args
588588
.iter()
589-
.map(|arg| arg.clone().into_array(inferred_length))
589+
.map(|arg| arg.to_array(inferred_length))
590590
.collect::<Result<Vec<_>>>()?;
591591

592592
match args[0].data_type() {

datafusion/functions/src/utils.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ where
105105
Hint::AcceptsSingular => 1,
106106
Hint::Pad => inferred_length,
107107
};
108-
arg.clone().into_array(expansion_len)
108+
arg.to_array(expansion_len)
109109
})
110110
.collect::<Result<Vec<_>>>()?;
111111

@@ -152,7 +152,7 @@ pub mod test {
152152
let result = func.invoke_with_args(datafusion_expr::ScalarFunctionArgs{args: $ARGS, number_rows: cardinality, return_type: &return_type});
153153
assert_eq!(result.is_ok(), true, "function returned an error: {}", result.unwrap_err());
154154

155-
let result = result.unwrap().clone().into_array(cardinality).expect("Failed to convert to array");
155+
let result = result.unwrap().to_array(cardinality).expect("Failed to convert to array");
156156
let result = result.as_any().downcast_ref::<$ARRAY_TYPE>().expect("Failed to convert to type");
157157

158158
// value is correct

0 commit comments

Comments
 (0)