Skip to content

Benchmark for casting view to dict arrays (and the reverse) #5874

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jun 12, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions arrow/benches/cast_kernels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,17 @@ fn build_decimal256_array(size: usize, precision: u8, scale: i8) -> ArrayRef {
)
}

fn build_dict_array(size: usize) -> ArrayRef {
let values = StringArray::from_iter([
Some("small"),
Some("larger string more than 12 bytes"),
None,
]);
let keys = UInt64Array::from_iter((0..size as u64).map(|v| v % 3));

Arc::new(DictionaryArray::new(keys, Arc::new(values)))
}

// cast array from specified primitive array type to desired data type
fn cast_array(array: &ArrayRef, to_type: DataType) {
criterion::black_box(cast(array, &to_type).unwrap());
Expand All @@ -138,6 +149,9 @@ fn add_benchmark(c: &mut Criterion) {
let decimal128_array = build_decimal128_array(512, 10, 3);
let decimal256_array = build_decimal256_array(512, 50, 3);

let dict_array = build_dict_array(10_000);
let string_view_array = cast(&dict_array, &DataType::Utf8View).unwrap();

c.bench_function("cast int32 to int32 512", |b| {
b.iter(|| cast_array(&i32_array, DataType::Int32))
});
Expand Down Expand Up @@ -237,6 +251,17 @@ fn add_benchmark(c: &mut Criterion) {
c.bench_function("cast decimal256 to decimal256 512 with same scale", |b| {
b.iter(|| cast_array(&decimal256_array, DataType::Decimal256(60, 3)))
});
c.bench_function("cast dict to string view", |b| {
b.iter(|| cast_array(&dict_array, DataType::Utf8View))
});
c.bench_function("cast string view to dict", |b| {
b.iter(|| {
cast_array(
&string_view_array,
DataType::Dictionary(Box::new(DataType::UInt64), Box::new(DataType::Utf8)),
)
})
});
}

criterion_group!(benches, add_benchmark);
Expand Down
Loading