Skip to content

Commit b19ae17

Browse files
committed
migrate string functions to inovke_with_args
1 parent 19fe44c commit b19ae17

27 files changed

+252
-224
lines changed

datafusion/functions/benches/concat.rs

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,11 @@
1616
// under the License.
1717

1818
use arrow::array::ArrayRef;
19+
use arrow::datatypes::DataType;
1920
use arrow::util::bench_util::create_string_array_with_len;
2021
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
2122
use datafusion_common::ScalarValue;
22-
use datafusion_expr::ColumnarValue;
23+
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
2324
use datafusion_functions::string::concat;
2425
use std::sync::Arc;
2526

@@ -39,8 +40,15 @@ fn criterion_benchmark(c: &mut Criterion) {
3940
let mut group = c.benchmark_group("concat function");
4041
group.bench_function(BenchmarkId::new("concat", size), |b| {
4142
b.iter(|| {
42-
// TODO use invoke_with_args
43-
criterion::black_box(concat().invoke_batch(&args, size).unwrap())
43+
criterion::black_box(
44+
concat()
45+
.invoke_with_args(ScalarFunctionArgs {
46+
args: args.clone(),
47+
number_rows: size,
48+
return_type: &DataType::Utf8,
49+
})
50+
.unwrap(),
51+
)
4452
})
4553
});
4654
group.finish();

datafusion/functions/benches/lower.rs

Lines changed: 32 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,12 @@
1818
extern crate criterion;
1919

2020
use arrow::array::{ArrayRef, StringArray, StringViewBuilder};
21+
use arrow::datatypes::DataType;
2122
use arrow::util::bench_util::{
2223
create_string_array_with_len, create_string_view_array_with_len,
2324
};
2425
use criterion::{black_box, criterion_group, criterion_main, Criterion};
25-
use datafusion_expr::ColumnarValue;
26+
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
2627
use datafusion_functions::string;
2728
use std::sync::Arc;
2829

@@ -125,8 +126,11 @@ fn criterion_benchmark(c: &mut Criterion) {
125126
let args = create_args1(size, 32);
126127
c.bench_function(&format!("lower_all_values_are_ascii: {}", size), |b| {
127128
b.iter(|| {
128-
// TODO use invoke_with_args
129-
black_box(lower.invoke_batch(&args, size))
129+
black_box(lower.invoke_with_args(ScalarFunctionArgs {
130+
args: args.clone(),
131+
number_rows: size,
132+
return_type: &DataType::Utf8,
133+
}))
130134
})
131135
});
132136

@@ -135,8 +139,11 @@ fn criterion_benchmark(c: &mut Criterion) {
135139
&format!("lower_the_first_value_is_nonascii: {}", size),
136140
|b| {
137141
b.iter(|| {
138-
// TODO use invoke_with_args
139-
black_box(lower.invoke_batch(&args, size))
142+
black_box(lower.invoke_with_args(ScalarFunctionArgs {
143+
args: args.clone(),
144+
number_rows: size,
145+
return_type: &DataType::Utf8,
146+
}))
140147
})
141148
},
142149
);
@@ -146,8 +153,11 @@ fn criterion_benchmark(c: &mut Criterion) {
146153
&format!("lower_the_middle_value_is_nonascii: {}", size),
147154
|b| {
148155
b.iter(|| {
149-
// TODO use invoke_with_args
150-
black_box(lower.invoke_batch(&args, size))
156+
black_box(lower.invoke_with_args(ScalarFunctionArgs {
157+
args: args.clone(),
158+
number_rows: size,
159+
return_type: &DataType::Utf8,
160+
}))
151161
})
152162
},
153163
);
@@ -167,8 +177,11 @@ fn criterion_benchmark(c: &mut Criterion) {
167177
&format!("lower_all_values_are_ascii_string_views: size: {}, str_len: {}, null_density: {}, mixed: {}",
168178
size, str_len, null_density, mixed),
169179
|b| b.iter(|| {
170-
// TODO use invoke_with_args
171-
black_box(lower.invoke_batch(&args, size))
180+
black_box(lower.invoke_with_args(ScalarFunctionArgs{
181+
args: args.clone(),
182+
number_rows: size,
183+
return_type: &DataType::Utf8,
184+
}))
172185
}),
173186
);
174187

@@ -177,8 +190,11 @@ fn criterion_benchmark(c: &mut Criterion) {
177190
&format!("lower_all_values_are_ascii_string_views: size: {}, str_len: {}, null_density: {}, mixed: {}",
178191
size, str_len, null_density, mixed),
179192
|b| b.iter(|| {
180-
// TODO use invoke_with_args
181-
black_box(lower.invoke_batch(&args, size))
193+
black_box(lower.invoke_with_args(ScalarFunctionArgs{
194+
args: args.clone(),
195+
number_rows: size,
196+
return_type: &DataType::Utf8,
197+
}))
182198
}),
183199
);
184200

@@ -187,8 +203,11 @@ fn criterion_benchmark(c: &mut Criterion) {
187203
&format!("lower_some_values_are_nonascii_string_views: size: {}, str_len: {}, non_ascii_density: {}, null_density: {}, mixed: {}",
188204
size, str_len, 0.1, null_density, mixed),
189205
|b| b.iter(|| {
190-
// TODO use invoke_with_args
191-
black_box(lower.invoke_batch(&args, size))
206+
black_box(lower.invoke_with_args(ScalarFunctionArgs{
207+
args: args.clone(),
208+
number_rows: size,
209+
return_type: &DataType::Utf8,
210+
}))
192211
}),
193212
);
194213
}

datafusion/functions/benches/ltrim.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,13 @@
1818
extern crate criterion;
1919

2020
use arrow::array::{ArrayRef, LargeStringArray, StringArray, StringViewArray};
21+
use arrow::datatypes::DataType;
2122
use criterion::{
2223
black_box, criterion_group, criterion_main, measurement::Measurement, BenchmarkGroup,
2324
Criterion, SamplingMode,
2425
};
2526
use datafusion_common::ScalarValue;
26-
use datafusion_expr::{ColumnarValue, ScalarUDF};
27+
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDF};
2728
use datafusion_functions::string;
2829
use rand::{distributions::Alphanumeric, rngs::StdRng, Rng, SeedableRng};
2930
use std::{fmt, sync::Arc};
@@ -141,8 +142,11 @@ fn run_with_string_type<M: Measurement>(
141142
),
142143
|b| {
143144
b.iter(|| {
144-
// TODO use invoke_with_args
145-
black_box(ltrim.invoke_batch(&args, size))
145+
black_box(ltrim.invoke_with_args(ScalarFunctionArgs {
146+
args: args.clone(),
147+
number_rows: size,
148+
return_type: &DataType::Utf8,
149+
}))
146150
})
147151
},
148152
);

datafusion/functions/benches/repeat.rs

Lines changed: 37 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,12 @@
1818
extern crate criterion;
1919

2020
use arrow::array::{ArrayRef, Int64Array, OffsetSizeTrait};
21+
use arrow::datatypes::DataType;
2122
use arrow::util::bench_util::{
2223
create_string_array_with_len, create_string_view_array_with_len,
2324
};
2425
use criterion::{black_box, criterion_group, criterion_main, Criterion, SamplingMode};
25-
use datafusion_expr::ColumnarValue;
26+
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
2627
use datafusion_functions::string;
2728
use std::sync::Arc;
2829
use std::time::Duration;
@@ -73,8 +74,11 @@ fn criterion_benchmark(c: &mut Criterion) {
7374
),
7475
|b| {
7576
b.iter(|| {
76-
// TODO use invoke_with_args
77-
black_box(repeat.invoke_batch(&args, repeat_times as usize))
77+
black_box(repeat.invoke_with_args(ScalarFunctionArgs {
78+
args: args.clone(),
79+
number_rows: repeat_times as usize,
80+
return_type: &DataType::Utf8,
81+
}))
7882
})
7983
},
8084
);
@@ -87,8 +91,11 @@ fn criterion_benchmark(c: &mut Criterion) {
8791
),
8892
|b| {
8993
b.iter(|| {
90-
// TODO use invoke_with_args
91-
black_box(repeat.invoke_batch(&args, repeat_times as usize))
94+
black_box(repeat.invoke_with_args(ScalarFunctionArgs {
95+
args: args.clone(),
96+
number_rows: repeat_times as usize,
97+
return_type: &DataType::Utf8,
98+
}))
9299
})
93100
},
94101
);
@@ -101,8 +108,11 @@ fn criterion_benchmark(c: &mut Criterion) {
101108
),
102109
|b| {
103110
b.iter(|| {
104-
// TODO use invoke_with_args
105-
black_box(repeat.invoke_batch(&args, repeat_times as usize))
111+
black_box(repeat.invoke_with_args(ScalarFunctionArgs {
112+
args: args.clone(),
113+
number_rows: repeat_times as usize,
114+
return_type: &DataType::Utf8,
115+
}))
106116
})
107117
},
108118
);
@@ -124,8 +134,11 @@ fn criterion_benchmark(c: &mut Criterion) {
124134
),
125135
|b| {
126136
b.iter(|| {
127-
// TODO use invoke_with_args
128-
black_box(repeat.invoke_batch(&args, repeat_times as usize))
137+
black_box(repeat.invoke_with_args(ScalarFunctionArgs {
138+
args: args.clone(),
139+
number_rows: repeat_times as usize,
140+
return_type: &DataType::Utf8,
141+
}))
129142
})
130143
},
131144
);
@@ -138,8 +151,11 @@ fn criterion_benchmark(c: &mut Criterion) {
138151
),
139152
|b| {
140153
b.iter(|| {
141-
// TODO use invoke_with_args
142-
black_box(repeat.invoke_batch(&args, size))
154+
black_box(repeat.invoke_with_args(ScalarFunctionArgs {
155+
args: args.clone(),
156+
number_rows: repeat_times as usize,
157+
return_type: &DataType::Utf8,
158+
}))
143159
})
144160
},
145161
);
@@ -152,8 +168,11 @@ fn criterion_benchmark(c: &mut Criterion) {
152168
),
153169
|b| {
154170
b.iter(|| {
155-
// TODO use invoke_with_args
156-
black_box(repeat.invoke_batch(&args, repeat_times as usize))
171+
black_box(repeat.invoke_with_args(ScalarFunctionArgs {
172+
args: args.clone(),
173+
number_rows: repeat_times as usize,
174+
return_type: &DataType::Utf8,
175+
}))
157176
})
158177
},
159178
);
@@ -175,8 +194,11 @@ fn criterion_benchmark(c: &mut Criterion) {
175194
),
176195
|b| {
177196
b.iter(|| {
178-
// TODO use invoke_with_args
179-
black_box(repeat.invoke_batch(&args, size))
197+
black_box(repeat.invoke_with_args(ScalarFunctionArgs {
198+
args: args.clone(),
199+
number_rows: repeat_times as usize,
200+
return_type: &DataType::Utf8,
201+
}))
180202
})
181203
},
182204
);

datafusion/functions/benches/upper.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,10 @@
1717

1818
extern crate criterion;
1919

20+
use arrow::datatypes::DataType;
2021
use arrow::util::bench_util::create_string_array_with_len;
2122
use criterion::{black_box, criterion_group, criterion_main, Criterion};
22-
use datafusion_expr::ColumnarValue;
23+
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
2324
use datafusion_functions::string;
2425
use std::sync::Arc;
2526

@@ -38,8 +39,11 @@ fn criterion_benchmark(c: &mut Criterion) {
3839
let args = create_args(size, 32);
3940
c.bench_function("upper_all_values_are_ascii", |b| {
4041
b.iter(|| {
41-
// TODO use invoke_with_args
42-
black_box(upper.invoke_batch(&args, size))
42+
black_box(upper.invoke_with_args(ScalarFunctionArgs {
43+
args: args.clone(),
44+
number_rows: size,
45+
return_type: &DataType::Utf8,
46+
}))
4347
})
4448
});
4549
}

datafusion/functions/benches/uuid.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,21 @@
1717

1818
extern crate criterion;
1919

20+
use arrow::datatypes::DataType;
2021
use criterion::{black_box, criterion_group, criterion_main, Criterion};
22+
use datafusion_expr::ScalarFunctionArgs;
2123
use datafusion_functions::string;
2224

2325
fn criterion_benchmark(c: &mut Criterion) {
2426
let uuid = string::uuid();
2527
c.bench_function("uuid", |b| {
26-
b.iter(|| black_box(uuid.invoke_batch(&[], 1024)))
28+
b.iter(|| {
29+
black_box(uuid.invoke_with_args(ScalarFunctionArgs {
30+
args: vec![],
31+
number_rows: 1024,
32+
return_type: &DataType::Utf8,
33+
}))
34+
})
2735
});
2836
}
2937

datafusion/functions/src/string/ascii.rs

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use arrow::error::ArrowError;
2222
use datafusion_common::types::logical_string;
2323
use datafusion_common::{internal_err, Result};
2424
use datafusion_expr::{ColumnarValue, Documentation, TypeSignatureClass};
25-
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
25+
use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility};
2626
use datafusion_expr_common::signature::Coercion;
2727
use datafusion_macros::user_doc;
2828
use std::any::Any;
@@ -92,12 +92,8 @@ impl ScalarUDFImpl for AsciiFunc {
9292
Ok(Int32)
9393
}
9494

95-
fn invoke_batch(
96-
&self,
97-
args: &[ColumnarValue],
98-
_number_rows: usize,
99-
) -> Result<ColumnarValue> {
100-
make_scalar_function(ascii, vec![])(args)
95+
fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
96+
make_scalar_function(ascii, vec![])(&args.args)
10197
}
10298

10399
fn documentation(&self) -> Option<&Documentation> {

datafusion/functions/src/string/bit_length.rs

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use std::any::Any;
2222
use crate::utils::utf8_to_int_type;
2323
use datafusion_common::{utils::take_function_args, Result, ScalarValue};
2424
use datafusion_expr::{ColumnarValue, Documentation, Volatility};
25-
use datafusion_expr::{ScalarUDFImpl, Signature};
25+
use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature};
2626
use datafusion_macros::user_doc;
2727

2828
#[user_doc(
@@ -77,12 +77,8 @@ impl ScalarUDFImpl for BitLengthFunc {
7777
utf8_to_int_type(&arg_types[0], "bit_length")
7878
}
7979

80-
fn invoke_batch(
81-
&self,
82-
args: &[ColumnarValue],
83-
_number_rows: usize,
84-
) -> Result<ColumnarValue> {
85-
let [array] = take_function_args(self.name(), args)?;
80+
fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
81+
let [array] = take_function_args(self.name(), &args.args)?;
8682

8783
match array {
8884
ColumnarValue::Array(v) => Ok(ColumnarValue::Array(bit_length(v.as_ref())?)),

0 commit comments

Comments
 (0)