Skip to content

Add insert_or_update and get_payloads to map #12701

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
323 changes: 221 additions & 102 deletions datafusion-cli/Cargo.lock

Large diffs are not rendered by default.

11 changes: 10 additions & 1 deletion datafusion/physical-expr-common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,17 @@ path = "src/lib.rs"

[dependencies]
ahash = { workspace = true }
arrow = { workspace = true }
arrow = { workspace = true, features = ["test_utils"] }
criterion = { version = "0.5.1", default-features = false }
datafusion-common = { workspace = true, default-features = true }
datafusion-expr-common = { workspace = true }
hashbrown = { workspace = true }
rand = { workspace = true }

[[bench]]
harness = false
name = "binary_map"

[[bench]]
harness = false
name = "binary_view_map"
74 changes: 74 additions & 0 deletions datafusion/physical-expr-common/benches/binary_map.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// Licensed to the Apache Software Foundation (ASF) under one
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ArrowBytesMap insert_if_new - items: 1000000, null_density: 0.1, str_len: 20
time: [26.761 ms 27.106 ms 27.472 ms]
Found 1 outliers among 100 measurements (1.00%)
1 (1.00%) high mild

ArrowBytesMap get_payloads - items: 1000000, null_density: 0.1, str_len: 20
time: [7.1595 ms 7.1766 ms 7.1950 ms]
Found 6 outliers among 100 measurements (6.00%)
1 (1.00%) low mild
3 (3.00%) high mild
2 (2.00%) high severe

ArrowBytesMap insert_if_new - items: 1000000, null_density: 0.1, str_len: 50
time: [31.591 ms 31.749 ms 31.929 ms]
Found 11 outliers among 100 measurements (11.00%)
6 (6.00%) high mild
5 (5.00%) high severe

ArrowBytesMap get_payloads - items: 1000000, null_density: 0.1, str_len: 50
time: [8.2213 ms 8.2395 ms 8.2596 ms]
Found 9 outliers among 100 measurements (9.00%)
4 (4.00%) high mild
5 (5.00%) high severe

ArrowBytesMap insert_if_new - items: 1000000, null_density: 0.5, str_len: 20
time: [22.149 ms 22.258 ms 22.378 ms]
Found 13 outliers among 100 measurements (13.00%)
6 (6.00%) high mild
7 (7.00%) high severe

ArrowBytesMap get_payloads - items: 1000000, null_density: 0.5, str_len: 20
time: [10.727 ms 10.783 ms 10.842 ms]
Found 1 outliers among 100 measurements (1.00%)
1 (1.00%) high mild

ArrowBytesMap insert_if_new - items: 1000000, null_density: 0.5, str_len: 50
time: [23.929 ms 24.083 ms 24.252 ms]
Found 17 outliers among 100 measurements (17.00%)
10 (10.00%) high mild
7 (7.00%) high severe

ArrowBytesMap get_payloads - items: 1000000, null_density: 0.5, str_len: 50
time: [11.141 ms 11.165 ms 11.194 ms]
Found 10 outliers among 100 measurements (10.00%)
3 (3.00%) high mild
7 (7.00%) high severe

// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use std::sync::Arc;

use arrow::array::ArrayRef;
use arrow::util::bench_util::create_string_array_with_len;
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use datafusion_physical_expr_common::binary_map::{ArrowBytesMap, OutputType};

fn benchmark_arrow_bytes_map(c: &mut Criterion) {
let sizes = [100_000, 1_000_000];
let null_densities = [0.1, 0.5];
let string_lengths = [20, 50];

for &num_items in &sizes {
for &null_density in &null_densities {
for &str_len in &string_lengths {
let array: ArrayRef = Arc::new(create_string_array_with_len::<i32>(
num_items,
null_density,
str_len,
));

c.bench_function(
&format!(
"ArrowBytesMap insert_if_new - items: {}, null_density: {:.1}, str_len: {}",
num_items, null_density, str_len
),
|b| {
b.iter(|| {
let mut map = ArrowBytesMap::<i32, ()>::new(OutputType::Utf8);
map.insert_if_new(black_box(&array), |_| {}, |_| {}, |_| {});
black_box(&map);
});
},
);

let mut map = ArrowBytesMap::<i32, u32>::new(OutputType::Utf8);
map.insert_if_new(&array, |_| 1u32, |_| {}, |_| {});

c.bench_function(
&format!(
"ArrowBytesMap get_payloads - items: {}, null_density: {:.1}, str_len: {}",
num_items, null_density, str_len
),
|b| {
b.iter(|| {
let payloads = map.take().get_payloads(black_box(&array));
black_box(payloads);
});
},
);
}
}
}
}

criterion_group!(benches, benchmark_arrow_bytes_map);
criterion_main!(benches);
77 changes: 77 additions & 0 deletions datafusion/physical-expr-common/benches/binary_view_map.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// Licensed to the Apache Software Foundation (ASF) under one
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ArrowBytesViewMap insert_if_new - items: 1000000, null_density: 0.1, str_len: 20
time: [29.995 ms 30.207 ms 30.447 ms]
Found 16 outliers among 100 measurements (16.00%)
3 (3.00%) high mild
13 (13.00%) high severe

ArrowBytesViewMap get_payloads - items: 1000000, null_density: 0.1, str_len: 20
time: [7.6807 ms 7.7203 ms 7.7624 ms]
Found 4 outliers among 100 measurements (4.00%)
4 (4.00%) high mild

ArrowBytesViewMap insert_if_new - items: 1000000, null_density: 0.1, str_len: 50
time: [35.002 ms 35.305 ms 35.637 ms]
Found 7 outliers among 100 measurements (7.00%)
6 (6.00%) high mild
1 (1.00%) high severe

ArrowBytesViewMap get_payloads - items: 1000000, null_density: 0.1, str_len: 50
time: [8.7820 ms 8.8271 ms 8.8757 ms]
Found 7 outliers among 100 measurements (7.00%)
7 (7.00%) high mild

ArrowBytesViewMap insert_if_new - items: 1000000, null_density: 0.5, str_len: 20
time: [23.549 ms 23.660 ms 23.784 ms]
Found 11 outliers among 100 measurements (11.00%)
5 (5.00%) high mild
6 (6.00%) high severe

ArrowBytesViewMap get_payloads - items: 1000000, null_density: 0.5, str_len: 20
time: [11.704 ms 11.746 ms 11.793 ms]
Found 10 outliers among 100 measurements (10.00%)
6 (6.00%) high mild
4 (4.00%) high severe

ArrowBytesViewMap insert_if_new - items: 1000000, null_density: 0.5, str_len: 50
time: [26.712 ms 27.083 ms 27.475 ms]

ArrowBytesViewMap get_payloads - items: 1000000, null_density: 0.5, str_len: 50
time: [12.457 ms 12.516 ms 12.581 ms]
Found 5 outliers among 100 measurements (5.00%)
3 (3.00%) high mild
2 (2.00%) high severe

// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use std::sync::Arc;

use arrow::array::ArrayRef;
use arrow::util::bench_util::create_string_view_array_with_len;
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use datafusion_physical_expr_common::{
binary_map::OutputType, binary_view_map::ArrowBytesViewMap,
};

fn benchmark_arrow_bytes_view_map(c: &mut Criterion) {
let sizes = [100_000, 1_000_000];
let null_densities = [0.1, 0.5];
let string_lengths = [20, 50];

for &num_items in &sizes {
for &null_density in &null_densities {
for &str_len in &string_lengths {
let array: ArrayRef = Arc::new(create_string_view_array_with_len(
num_items,
null_density,
str_len,
false,
));

c.bench_function(
&format!(
"ArrowBytesViewMap insert_if_new - items: {}, null_density: {:.1}, str_len: {}",
num_items, null_density, str_len
),
|b| {
b.iter(|| {
let mut map = ArrowBytesViewMap::<()>::new(OutputType::Utf8View);
map.insert_if_new(black_box(&array), |_| {}, |_| {}, |_| {});
black_box(&map);
});
},
);

let mut map = ArrowBytesViewMap::<i32>::new(OutputType::Utf8View);
map.insert_if_new(&array, |_| 1i32, |_| {}, |_| {});

c.bench_function(
&format!(
"ArrowBytesViewMap get_payloads - items: {}, null_density: {:.1}, str_len: {}",
num_items, null_density, str_len
),
|b| {
b.iter(|| {
let payloads = map.take().get_payloads(black_box(&array));
black_box(payloads);
});
},
);
}
}
}
}

criterion_group!(benches, benchmark_arrow_bytes_view_map);
criterion_main!(benches);
Loading