Skip to content

Commit 8b45d2d

Browse files
authored
optimize performance of the repeat function (up to 50% faster) (#14697)
* optimize performance of the repeat function * correct spelling
1 parent 54228d7 commit 8b45d2d

File tree

1 file changed

+19
-4
lines changed

1 file changed

+19
-4
lines changed

datafusion/functions/src/string/repeat.rs

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -151,20 +151,35 @@ where
151151
T: OffsetSizeTrait,
152152
S: StringArrayType<'a>,
153153
{
154-
let mut builder: GenericStringBuilder<T> = GenericStringBuilder::new();
154+
let mut total_capacity = 0;
155155
string_array.iter().zip(number_array.iter()).try_for_each(
156156
|(string, number)| -> Result<(), DataFusionError> {
157157
match (string, number) {
158158
(Some(string), Some(number)) if number >= 0 => {
159-
if number as usize * string.len() > max_str_len {
159+
let item_capacity = string.len() * number as usize;
160+
if item_capacity > max_str_len {
160161
return exec_err!(
161162
"string size overflow on repeat, max size is {}, but got {}",
162163
max_str_len,
163164
number as usize * string.len()
164165
);
165-
} else {
166-
builder.append_value(string.repeat(number as usize))
167166
}
167+
total_capacity += item_capacity;
168+
}
169+
_ => (),
170+
}
171+
Ok(())
172+
},
173+
)?;
174+
175+
let mut builder =
176+
GenericStringBuilder::<T>::with_capacity(string_array.len(), total_capacity);
177+
178+
string_array.iter().zip(number_array.iter()).try_for_each(
179+
|(string, number)| -> Result<(), DataFusionError> {
180+
match (string, number) {
181+
(Some(string), Some(number)) if number >= 0 => {
182+
builder.append_value(string.repeat(number as usize));
168183
}
169184
(Some(_), Some(_)) => builder.append_value(""),
170185
_ => builder.append_null(),

0 commit comments

Comments
 (0)