Skip to content

Commit 146b862

Browse files
committed
improve key allocation and add utf bench
1 parent 20f4af6 commit 146b862

File tree

2 files changed

+46
-17
lines changed

2 files changed

+46
-17
lines changed

benches/mode.rs

Lines changed: 43 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
use std::sync::Arc;
1919

20-
use arrow::util::bench_util::create_primitive_array;
20+
use arrow::util::bench_util::{create_primitive_array, create_string_array};
2121
use criterion::{black_box, criterion_group, criterion_main, Criterion};
2222
use datafusion::{
2323
arrow::{
@@ -27,14 +27,28 @@ use datafusion::{
2727
},
2828
logical_expr::Accumulator,
2929
};
30-
use datafusion_functions_extra::common::mode::PrimitiveModeAccumulator;
30+
use datafusion_functions_extra::common::mode::{BytesModeAccumulator, PrimitiveModeAccumulator};
3131

32-
fn prepare_mode_accumulator() -> Box<dyn Accumulator> {
32+
fn prepare_primitive_mode_accumulator() -> Box<dyn Accumulator> {
3333
Box::new(PrimitiveModeAccumulator::<Int32Type>::new(&DataType::Int32))
3434
}
3535

36-
fn mode_bench(c: &mut Criterion, name: &str, values: ArrayRef) {
37-
let mut accumulator = prepare_mode_accumulator();
36+
fn prepare_bytes_mode_accumulator() -> Box<dyn Accumulator> {
37+
Box::new(BytesModeAccumulator::new(&DataType::Utf8))
38+
}
39+
40+
fn mode_bench_primitive(c: &mut Criterion, name: &str, values: ArrayRef) {
41+
let mut accumulator = prepare_primitive_mode_accumulator();
42+
c.bench_function(name, |b| {
43+
b.iter(|| {
44+
accumulator.update_batch(&[values.clone()]).unwrap();
45+
black_box(accumulator.evaluate().unwrap());
46+
});
47+
});
48+
}
49+
50+
fn mode_bench_bytes(c: &mut Criterion, name: &str, values: ArrayRef) {
51+
let mut accumulator = prepare_bytes_mode_accumulator();
3852
c.bench_function(name, |b| {
3953
b.iter(|| {
4054
accumulator.update_batch(&[values.clone()]).unwrap();
@@ -44,17 +58,32 @@ fn mode_bench(c: &mut Criterion, name: &str, values: ArrayRef) {
4458
}
4559

4660
fn mode_benchmark(c: &mut Criterion) {
47-
// Case: No nulls
48-
let values = Arc::new(create_primitive_array::<Int32Type>(8192, 0.0)) as ArrayRef;
49-
mode_bench(c, "mode benchmark no nulls", values);
61+
let sizes = [100_000, 1_000_000];
62+
let null_percentages = [0.0, 0.3, 0.7];
5063

51-
// Case: 30% nulls
52-
let values = Arc::new(create_primitive_array::<Int32Type>(8192, 0.3)) as ArrayRef;
53-
mode_bench(c, "mode benchmark 30% nulls", values);
64+
for &size in &sizes {
65+
for &null_percentage in &null_percentages {
66+
let values = Arc::new(create_primitive_array::<Int32Type>(size, null_percentage)) as ArrayRef;
67+
let name = format!(
68+
"PrimitiveModeAccumulator: {} elements, {}% nulls",
69+
size,
70+
null_percentage * 100.0
71+
);
72+
mode_bench_primitive(c, &name, values);
73+
}
74+
}
5475

55-
// Case: 70% nulls
56-
let values = Arc::new(create_primitive_array::<Int32Type>(8192, 0.7)) as ArrayRef;
57-
mode_bench(c, "mode benchmark 70% nulls", values);
76+
for &size in &sizes {
77+
for &null_percentage in &null_percentages {
78+
let values = Arc::new(create_string_array::<i32>(size, null_percentage)) as ArrayRef;
79+
let name = format!(
80+
"BytesModeAccumulator: {} elements, {}% nulls",
81+
size,
82+
null_percentage * 100.0
83+
);
84+
mode_bench_bytes(c, &name, values);
85+
}
86+
}
5887
}
5988

6089
criterion_group!(benches, mode_benchmark);

src/common/mode/bytes.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,11 @@ impl BytesModeAccumulator {
4747
V: ArrayAccessor<Item = &'a str>,
4848
{
4949
for value in ArrayIter::new(array).flatten() {
50-
let key = value.to_string();
51-
if let Some(count) = self.value_counts.get_mut(&key) {
50+
let key = value;
51+
if let Some(count) = self.value_counts.get_mut(key) {
5252
*count += 1;
5353
} else {
54-
self.value_counts.insert(key, 1);
54+
self.value_counts.insert(key.to_string(), 1);
5555
}
5656
}
5757
}

0 commit comments

Comments
 (0)