Skip to content
This repository was archived by the owner on Oct 17, 2022. It is now read-only.

Commit 1496561

Browse files
committed
Benchmark this implementation against the standard library's
Two sets of benchmarks are now run, one with this validator and one with std::str::from_utf8 (a thin, inlined wrapper around the internal run_utf8_validation function). This commit also changes the benchmark to use functions instead of macros, slightly improving readability.
1 parent 9419ade commit 1496561

File tree

2 files changed

+39
-112
lines changed

2 files changed

+39
-112
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,4 @@ core_affinity = "*"
1515

1616
[[bench]]
1717
name = "criterion_bench"
18-
harness = false
18+
harness = false

benches/criterion_bench.rs

Lines changed: 38 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -6,119 +6,46 @@ use mimalloc::MiMalloc;
66
#[global_allocator]
77
static GLOBAL: MiMalloc = MiMalloc;
88

9-
use criterion::{BatchSize, Criterion, ParameterizedBenchmark, Throughput};
10-
use std::fs::File;
11-
use std::io::Read;
12-
13-
macro_rules! bench_file {
14-
($name:ident) => {
15-
fn $name(c: &mut Criterion) {
16-
let core_ids = core_affinity::get_core_ids().unwrap();
17-
core_affinity::set_for_current(core_ids[0]);
18-
19-
let mut vec = Vec::new();
20-
File::open(concat!("data/", stringify!($name), ".data"))
21-
.unwrap()
22-
.read_to_end(&mut vec)
23-
.unwrap();
24-
25-
let b = ParameterizedBenchmark::new(
26-
"faster_utf8_validator",
27-
|b, data| {
28-
b.iter_batched(
29-
|| data,
30-
|bytes| {
31-
assert!(faster_utf8_validator::validate(&bytes));
32-
},
33-
BatchSize::SmallInput,
34-
)
35-
},
36-
vec![vec],
37-
);
38-
c.bench(
39-
stringify!($name),
40-
b.throughput(|data| Throughput::Bytes(data.len() as u64)),
41-
);
42-
}
43-
};
9+
use criterion::{measurement::Measurement, Criterion, Throughput};
10+
use std::{fs, str};
11+
12+
fn bench_file<T: Measurement>(c: &mut Criterion<T>, name: &str, is_valid: bool) {
13+
let buf = fs::read(format!("data/{}.data", name)).unwrap();
14+
15+
let mut group = c.benchmark_group(name);
16+
group.throughput(Throughput::Bytes(buf.len() as u64));
17+
group.bench_function("std_utf8_validator", |b| {
18+
b.iter(|| assert!(str::from_utf8(&buf).is_ok() == is_valid))
19+
});
20+
group.bench_function("faster_utf8_validator", |b| {
21+
b.iter(|| assert!(faster_utf8_validator::validate(&buf) == is_valid))
22+
});
23+
24+
group.finish();
4425
}
4526

46-
macro_rules! bench_file_bad {
47-
($name:ident) => {
48-
fn $name(c: &mut Criterion) {
49-
let core_ids = core_affinity::get_core_ids().unwrap();
50-
core_affinity::set_for_current(core_ids[0]);
51-
52-
let mut vec = Vec::new();
53-
File::open(concat!("data/", stringify!($name), ".data"))
54-
.unwrap()
55-
.read_to_end(&mut vec)
56-
.unwrap();
57-
58-
let b = ParameterizedBenchmark::new(
59-
"faster_utf8_validator",
60-
|b, data| {
61-
b.iter_batched(
62-
|| data,
63-
|bytes| {
64-
assert!(!faster_utf8_validator::validate(&bytes));
65-
},
66-
BatchSize::SmallInput,
67-
)
68-
},
69-
vec![vec],
70-
);
71-
c.bench(
72-
stringify!($name),
73-
b.throughput(|data| Throughput::Bytes(data.len() as u64)),
74-
);
75-
}
76-
};
27+
fn bench_all<T: Measurement>(c: &mut Criterion<T>) {
28+
bench_file(c, "apache_builds", true);
29+
bench_file(c, "canada", true);
30+
bench_file(c, "citm_catalog", true);
31+
bench_file(c, "github_events", true);
32+
bench_file(c, "gsoc_2018", true);
33+
bench_file(c, "instruments", true);
34+
bench_file(c, "log", true);
35+
bench_file(c, "marine_ik", true);
36+
bench_file(c, "mesh", true);
37+
bench_file(c, "numbers", true);
38+
bench_file(c, "random", true);
39+
bench_file(c, "twitterescaped", true);
40+
bench_file(c, "twitter", true);
41+
bench_file(c, "update_center", true);
42+
bench_file(c, "mostly_ascii_sample_ok", true);
43+
bench_file(c, "random_bytes", false);
44+
bench_file(c, "utf8_characters_0_0x10ffff", true);
45+
bench_file(c, "utf8_characters_0_0x10ffff_with_garbage", false);
46+
bench_file(c, "utf8_sample_ok", true);
47+
bench_file(c, "ascii_sample_ok", true);
7748
}
7849

79-
bench_file!(apache_builds);
80-
bench_file!(canada);
81-
bench_file!(citm_catalog);
82-
bench_file!(github_events);
83-
bench_file!(gsoc_2018);
84-
bench_file!(instruments);
85-
bench_file!(log);
86-
bench_file!(marine_ik);
87-
bench_file!(mesh);
88-
bench_file!(numbers);
89-
bench_file!(random);
90-
bench_file!(twitterescaped);
91-
bench_file!(twitter);
92-
bench_file!(update_center);
93-
bench_file!(mostly_ascii_sample_ok);
94-
bench_file_bad!(random_bytes);
95-
bench_file!(utf8_characters_0_0x10ffff);
96-
bench_file_bad!(utf8_characters_0_0x10ffff_with_garbage);
97-
bench_file!(utf8_sample_ok);
98-
bench_file!(ascii_sample_ok);
99-
100-
criterion_group!(
101-
benches,
102-
mostly_ascii_sample_ok,
103-
ascii_sample_ok,
104-
random_bytes,
105-
utf8_characters_0_0x10ffff,
106-
utf8_characters_0_0x10ffff_with_garbage,
107-
utf8_sample_ok,
108-
apache_builds,
109-
canada,
110-
citm_catalog,
111-
github_events,
112-
gsoc_2018,
113-
instruments,
114-
log,
115-
marine_ik,
116-
mesh,
117-
numbers,
118-
random,
119-
twitterescaped,
120-
twitter,
121-
update_center
122-
);
123-
50+
criterion_group!(benches, bench_all);
12451
criterion_main!(benches);

0 commit comments

Comments
 (0)