Skip to content

Commit b09c09a

Browse files
authored
Speedup to_hex (~2x faster) (apache#14686)
* add bench * speed up by using write!
1 parent 2fce1ef commit b09c09a

File tree

3 files changed

+76
-15
lines changed

3 files changed

+76
-15
lines changed

datafusion/functions/Cargo.toml

+5
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,11 @@ harness = false
113113
name = "uuid"
114114
required-features = ["string_expressions"]
115115

116+
[[bench]]
117+
harness = false
118+
name = "to_hex"
119+
required-features = ["string_expressions"]
120+
116121
[[bench]]
117122
harness = false
118123
name = "regx"
+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
extern crate criterion;
19+
20+
use arrow::{
21+
datatypes::{Int32Type, Int64Type},
22+
util::bench_util::create_primitive_array,
23+
};
24+
use criterion::{black_box, criterion_group, criterion_main, Criterion};
25+
use datafusion_expr::ColumnarValue;
26+
use datafusion_functions::string;
27+
use std::sync::Arc;
28+
29+
fn criterion_benchmark(c: &mut Criterion) {
30+
let hex = string::to_hex();
31+
let size = 1024;
32+
let i32_array = Arc::new(create_primitive_array::<Int32Type>(size, 0.2));
33+
let batch_len = i32_array.len();
34+
let i32_args = vec![ColumnarValue::Array(i32_array)];
35+
c.bench_function(&format!("to_hex i32 array: {}", size), |b| {
36+
b.iter(|| black_box(hex.invoke_batch(&i32_args, batch_len).unwrap()))
37+
});
38+
let i64_array = Arc::new(create_primitive_array::<Int64Type>(size, 0.2));
39+
let batch_len = i64_array.len();
40+
let i64_args = vec![ColumnarValue::Array(i64_array)];
41+
c.bench_function(&format!("to_hex i64 array: {}", size), |b| {
42+
b.iter(|| black_box(hex.invoke_batch(&i64_args, batch_len).unwrap()))
43+
});
44+
}
45+
46+
criterion_group!(benches, criterion_benchmark);
47+
criterion_main!(benches);

datafusion/functions/src/string/to_hex.rs

+24-15
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,10 @@
1616
// under the License.
1717

1818
use std::any::Any;
19+
use std::fmt::Write;
1920
use std::sync::Arc;
2021

21-
use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait};
22+
use arrow::array::{ArrayRef, GenericStringBuilder, OffsetSizeTrait};
2223
use arrow::datatypes::{
2324
ArrowNativeType, ArrowPrimitiveType, DataType, Int32Type, Int64Type,
2425
};
@@ -40,22 +41,30 @@ where
4041
{
4142
let integer_array = as_primitive_array::<T>(&args[0])?;
4243

43-
let result = integer_array
44-
.iter()
45-
.map(|integer| {
46-
if let Some(value) = integer {
47-
if let Some(value_usize) = value.to_usize() {
48-
Ok(Some(format!("{value_usize:x}")))
49-
} else if let Some(value_isize) = value.to_isize() {
50-
Ok(Some(format!("{value_isize:x}")))
51-
} else {
52-
exec_err!("Unsupported data type {integer:?} for function to_hex")
53-
}
44+
let mut result = GenericStringBuilder::<i32>::with_capacity(
45+
integer_array.len(),
46+
// * 8 to convert to bits, / 4 bits per hex char
47+
integer_array.len() * (T::Native::get_byte_width() * 8 / 4),
48+
);
49+
50+
for integer in integer_array {
51+
if let Some(value) = integer {
52+
if let Some(value_usize) = value.to_usize() {
53+
write!(result, "{value_usize:x}")?;
54+
} else if let Some(value_isize) = value.to_isize() {
55+
write!(result, "{value_isize:x}")?;
5456
} else {
55-
Ok(None)
57+
return exec_err!(
58+
"Unsupported data type {integer:?} for function to_hex"
59+
);
5660
}
57-
})
58-
.collect::<Result<GenericStringArray<i32>>>()?;
61+
result.append_value("");
62+
} else {
63+
result.append_null();
64+
}
65+
}
66+
67+
let result = result.finish();
5968

6069
Ok(Arc::new(result) as ArrayRef)
6170
}

0 commit comments

Comments
 (0)