-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Add insert_or_update and get_payloads to map #12701
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
ff50dac
ff51426
8675940
cb4c56b
50db977
e573715
44b5507
514dfaf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
// Licensed to the Apache Software Foundation (ASF) under one | ||
// or more contributor license agreements. See the NOTICE file | ||
// distributed with this work for additional information | ||
// regarding copyright ownership. The ASF licenses this file | ||
// to you under the Apache License, Version 2.0 (the | ||
// "License"); you may not use this file except in compliance | ||
// with the License. You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, | ||
// software distributed under the License is distributed on an | ||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
// KIND, either express or implied. See the License for the | ||
// specific language governing permissions and limitations | ||
// under the License. | ||
|
||
use std::sync::Arc; | ||
|
||
use arrow::array::ArrayRef; | ||
use arrow::util::bench_util::create_string_array_with_len; | ||
use criterion::{black_box, criterion_group, criterion_main, Criterion}; | ||
use datafusion_physical_expr_common::binary_map::{ArrowBytesMap, OutputType}; | ||
|
||
fn benchmark_arrow_bytes_map(c: &mut Criterion) { | ||
let sizes = [100_000, 1_000_000]; | ||
let null_densities = [0.1, 0.5]; | ||
let string_lengths = [20, 50]; | ||
|
||
for &num_items in &sizes { | ||
for &null_density in &null_densities { | ||
for &str_len in &string_lengths { | ||
let array: ArrayRef = Arc::new(create_string_array_with_len::<i32>( | ||
num_items, | ||
null_density, | ||
str_len, | ||
)); | ||
|
||
c.bench_function( | ||
&format!( | ||
"ArrowBytesMap insert_if_new - items: {}, null_density: {:.1}, str_len: {}", | ||
num_items, null_density, str_len | ||
), | ||
|b| { | ||
b.iter(|| { | ||
let mut map = ArrowBytesMap::<i32, ()>::new(OutputType::Utf8); | ||
map.insert_if_new(black_box(&array), |_| {}, |_| {}, |_| {}); | ||
black_box(&map); | ||
}); | ||
}, | ||
); | ||
|
||
let mut map = ArrowBytesMap::<i32, u32>::new(OutputType::Utf8); | ||
map.insert_if_new(&array, |_| 1u32, |_| {}, |_| {}); | ||
|
||
c.bench_function( | ||
&format!( | ||
"ArrowBytesMap get_payloads - items: {}, null_density: {:.1}, str_len: {}", | ||
num_items, null_density, str_len | ||
), | ||
|b| { | ||
b.iter(|| { | ||
let payloads = map.take().get_payloads(black_box(&array)); | ||
black_box(payloads); | ||
}); | ||
}, | ||
); | ||
} | ||
} | ||
} | ||
} | ||
|
||
criterion_group!(benches, benchmark_arrow_bytes_map); | ||
criterion_main!(benches); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
// Licensed to the Apache Software Foundation (ASF) under one | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ArrowBytesViewMap insert_if_new - items: 1000000, null_density: 0.1, str_len: 20 ArrowBytesViewMap get_payloads - items: 1000000, null_density: 0.1, str_len: 20 ArrowBytesViewMap insert_if_new - items: 1000000, null_density: 0.1, str_len: 50 ArrowBytesViewMap get_payloads - items: 1000000, null_density: 0.1, str_len: 50 ArrowBytesViewMap insert_if_new - items: 1000000, null_density: 0.5, str_len: 20 ArrowBytesViewMap get_payloads - items: 1000000, null_density: 0.5, str_len: 20 ArrowBytesViewMap insert_if_new - items: 1000000, null_density: 0.5, str_len: 50 ArrowBytesViewMap get_payloads - items: 1000000, null_density: 0.5, str_len: 50 |
||
// or more contributor license agreements. See the NOTICE file | ||
// distributed with this work for additional information | ||
// regarding copyright ownership. The ASF licenses this file | ||
// to you under the Apache License, Version 2.0 (the | ||
// "License"); you may not use this file except in compliance | ||
// with the License. You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, | ||
// software distributed under the License is distributed on an | ||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
// KIND, either express or implied. See the License for the | ||
// specific language governing permissions and limitations | ||
// under the License. | ||
|
||
use std::sync::Arc; | ||
|
||
use arrow::array::ArrayRef; | ||
use arrow::util::bench_util::create_string_view_array_with_len; | ||
use criterion::{black_box, criterion_group, criterion_main, Criterion}; | ||
use datafusion_physical_expr_common::{ | ||
binary_map::OutputType, binary_view_map::ArrowBytesViewMap, | ||
}; | ||
|
||
fn benchmark_arrow_bytes_view_map(c: &mut Criterion) { | ||
let sizes = [100_000, 1_000_000]; | ||
let null_densities = [0.1, 0.5]; | ||
let string_lengths = [20, 50]; | ||
|
||
for &num_items in &sizes { | ||
for &null_density in &null_densities { | ||
for &str_len in &string_lengths { | ||
let array: ArrayRef = Arc::new(create_string_view_array_with_len( | ||
num_items, | ||
null_density, | ||
str_len, | ||
false, | ||
)); | ||
|
||
c.bench_function( | ||
&format!( | ||
"ArrowBytesViewMap insert_if_new - items: {}, null_density: {:.1}, str_len: {}", | ||
num_items, null_density, str_len | ||
), | ||
|b| { | ||
b.iter(|| { | ||
let mut map = ArrowBytesViewMap::<()>::new(OutputType::Utf8View); | ||
map.insert_if_new(black_box(&array), |_| {}, |_| {}, |_| {}); | ||
black_box(&map); | ||
}); | ||
}, | ||
); | ||
|
||
let mut map = ArrowBytesViewMap::<i32>::new(OutputType::Utf8View); | ||
map.insert_if_new(&array, |_| 1i32, |_| {}, |_| {}); | ||
|
||
c.bench_function( | ||
&format!( | ||
"ArrowBytesViewMap get_payloads - items: {}, null_density: {:.1}, str_len: {}", | ||
num_items, null_density, str_len | ||
), | ||
|b| { | ||
b.iter(|| { | ||
let payloads = map.take().get_payloads(black_box(&array)); | ||
black_box(payloads); | ||
}); | ||
}, | ||
); | ||
} | ||
} | ||
} | ||
} | ||
|
||
criterion_group!(benches, benchmark_arrow_bytes_view_map); | ||
criterion_main!(benches); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ArrowBytesMap insert_if_new - items: 1000000, null_density: 0.1, str_len: 20
time: [26.761 ms 27.106 ms 27.472 ms]
Found 1 outliers among 100 measurements (1.00%)
1 (1.00%) high mild
ArrowBytesMap get_payloads - items: 1000000, null_density: 0.1, str_len: 20
time: [7.1595 ms 7.1766 ms 7.1950 ms]
Found 6 outliers among 100 measurements (6.00%)
1 (1.00%) low mild
3 (3.00%) high mild
2 (2.00%) high severe
ArrowBytesMap insert_if_new - items: 1000000, null_density: 0.1, str_len: 50
time: [31.591 ms 31.749 ms 31.929 ms]
Found 11 outliers among 100 measurements (11.00%)
6 (6.00%) high mild
5 (5.00%) high severe
ArrowBytesMap get_payloads - items: 1000000, null_density: 0.1, str_len: 50
time: [8.2213 ms 8.2395 ms 8.2596 ms]
Found 9 outliers among 100 measurements (9.00%)
4 (4.00%) high mild
5 (5.00%) high severe
ArrowBytesMap insert_if_new - items: 1000000, null_density: 0.5, str_len: 20
time: [22.149 ms 22.258 ms 22.378 ms]
Found 13 outliers among 100 measurements (13.00%)
6 (6.00%) high mild
7 (7.00%) high severe
ArrowBytesMap get_payloads - items: 1000000, null_density: 0.5, str_len: 20
time: [10.727 ms 10.783 ms 10.842 ms]
Found 1 outliers among 100 measurements (1.00%)
1 (1.00%) high mild
ArrowBytesMap insert_if_new - items: 1000000, null_density: 0.5, str_len: 50
time: [23.929 ms 24.083 ms 24.252 ms]
Found 17 outliers among 100 measurements (17.00%)
10 (10.00%) high mild
7 (7.00%) high severe
ArrowBytesMap get_payloads - items: 1000000, null_density: 0.5, str_len: 50
time: [11.141 ms 11.165 ms 11.194 ms]
Found 10 outliers among 100 measurements (10.00%)
3 (3.00%) high mild
7 (7.00%) high severe