Skip to content

Commit 0635724

Browse files
authored
Merge pull request #1691 from Kobzol/runtime-benchmark-profile
Add a command to profile a runtime benchmark
2 parents 9ffdb25 + def606c commit 0635724

File tree

9 files changed

+216
-36
lines changed

9 files changed

+216
-36
lines changed

collector/README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -475,6 +475,15 @@ profilers whose results are not affected by system noise (e.g. `callgrind` or `e
475475
`RUST_LOG=debug` can be specified to enable verbose logging, which is useful
476476
for debugging `collector` itself.
477477

478+
## Profiling runtime benchmarks
479+
It is also possible to profile runtime benchmarks using the following command:
480+
481+
```
482+
./target/release/collector profile_runtime <PROFILER> <RUSTC> <BENCHMARK_NAME>
483+
```
484+
485+
Currently, a `<PROFILER>` can be `cachegrind`, which will run the runtime benchmark under
486+
`Cachegrind`.
478487

479488
## How `rustc` wrapping works
480489
When a crate is benchmarked or profiled, the real `rustc` is replaced with the `rustc-fake` binary,

collector/benchlib/src/benchmark.rs

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
1-
use crate::cli::{parse_cli, Args, BenchmarkArgs};
1+
use crate::cli::{parse_cli, Args, BenchmarkArgs, ProfileArgs};
22
use crate::comm::messages::{BenchmarkMessage, BenchmarkResult, BenchmarkStats};
33
use crate::comm::output_message;
44
use crate::measure::benchmark_function;
55
use crate::process::raise_process_priority;
6+
use crate::profile::profile_function;
67
use std::collections::HashMap;
8+
use std::rc::Rc;
79

810
/// Create and run a new benchmark group. Use the closure argument to register
911
/// the individual benchmarks.
@@ -18,12 +20,21 @@ where
1820
group.run().expect("Benchmark group execution has failed");
1921
}
2022

21-
/// Type-erased function that executes a single benchmark.
23+
/// Type-erased function that executes a single benchmark and measures counter and wall-time
24+
/// metrics.
2225
type BenchmarkFn<'a> = Box<dyn Fn() -> anyhow::Result<BenchmarkStats> + 'a>;
2326

27+
/// Type-erased function that executes a single benchmark once.
28+
type ProfileFn<'a> = Box<dyn Fn() + 'a>;
29+
30+
struct BenchmarkProfileFns<'a> {
31+
benchmark_fn: BenchmarkFn<'a>,
32+
profile_fn: ProfileFn<'a>,
33+
}
34+
2435
#[derive(Default)]
2536
pub struct BenchmarkGroup<'a> {
26-
benchmarks: HashMap<&'static str, BenchmarkFn<'a>>,
37+
benchmarks: HashMap<&'static str, BenchmarkProfileFns<'a>>,
2738
}
2839

2940
impl<'a> BenchmarkGroup<'a> {
@@ -40,8 +51,13 @@ impl<'a> BenchmarkGroup<'a> {
4051
Bench: FnOnce() -> R,
4152
{
4253
// We want to type-erase the target `func` by wrapping it in a Box.
43-
let benchmark_fn = Box::new(move || benchmark_function(&constructor));
44-
if self.benchmarks.insert(name, benchmark_fn).is_some() {
54+
let constructor = Rc::new(constructor);
55+
let constructor2 = constructor.clone();
56+
let benchmark_fns = BenchmarkProfileFns {
57+
benchmark_fn: Box::new(move || benchmark_function(constructor.as_ref())),
58+
profile_fn: Box::new(move || profile_function(constructor2.as_ref())),
59+
};
60+
if self.benchmarks.insert(name, benchmark_fns).is_some() {
4561
panic!("Benchmark '{}' was registered twice", name);
4662
}
4763
}
@@ -56,14 +72,15 @@ impl<'a> BenchmarkGroup<'a> {
5672
Args::Run(args) => {
5773
self.run_benchmarks(args)?;
5874
}
75+
Args::Profile(args) => self.profile_benchmark(args)?,
5976
Args::List => self.list_benchmarks()?,
6077
}
6178

6279
Ok(())
6380
}
6481

6582
fn run_benchmarks(self, args: BenchmarkArgs) -> anyhow::Result<()> {
66-
let mut items: Vec<(&'static str, BenchmarkFn)> = self
83+
let mut items: Vec<(&'static str, BenchmarkProfileFns)> = self
6784
.benchmarks
6885
.into_iter()
6986
.filter(|(name, _)| {
@@ -74,17 +91,17 @@ impl<'a> BenchmarkGroup<'a> {
7491

7592
let mut stdout = std::io::stdout().lock();
7693

77-
for (name, benchmark_fn) in items {
94+
for (name, benchmark_fns) in items {
7895
let mut stats: Vec<BenchmarkStats> = Vec::with_capacity(args.iterations as usize);
7996
// Warm-up
8097
for _ in 0..3 {
81-
let benchmark_stats = benchmark_fn()?;
98+
let benchmark_stats = (benchmark_fns.benchmark_fn)()?;
8299
black_box(benchmark_stats);
83100
}
84101

85102
// Actual measurement
86103
for i in 0..args.iterations {
87-
let benchmark_stats = benchmark_fn()?;
104+
let benchmark_stats = (benchmark_fns.benchmark_fn)()?;
88105
log::info!("Benchmark (run {i}) `{name}` completed: {benchmark_stats:?}");
89106
stats.push(benchmark_stats);
90107
}
@@ -100,6 +117,16 @@ impl<'a> BenchmarkGroup<'a> {
100117
Ok(())
101118
}
102119

120+
fn profile_benchmark(self, args: ProfileArgs) -> anyhow::Result<()> {
121+
let Some(benchmark) = self.benchmarks.get(args.benchmark.as_str()) else {
122+
return Err(anyhow::anyhow!("Benchmark `{}` not found. Available benchmarks: {}", args.benchmark,
123+
self.benchmarks.keys().map(|s| s.to_string()).collect::<Vec<_>>().join(", ")));
124+
};
125+
(benchmark.profile_fn)();
126+
127+
Ok(())
128+
}
129+
103130
fn list_benchmarks(self) -> anyhow::Result<()> {
104131
let benchmark_list: Vec<&str> = self.benchmarks.into_keys().collect();
105132
serde_json::to_writer(std::io::stdout(), &benchmark_list)?;

collector/benchlib/src/cli.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ use clap::{CommandFactory, FromArgMatches};
44
pub enum Args {
55
/// Benchmark all benchmarks in this benchmark group and print the results as JSON.
66
Run(BenchmarkArgs),
7+
/// Profile a single benchmark execution.
8+
Profile(ProfileArgs),
79
/// List benchmarks that are defined in the current group as a JSON array.
810
List,
911
}
@@ -23,6 +25,12 @@ pub struct BenchmarkArgs {
2325
pub include: Option<String>,
2426
}
2527

28+
#[derive(clap::Parser, Debug)]
29+
pub struct ProfileArgs {
30+
/// Name of the benchmark that should be profiled.
31+
pub benchmark: String,
32+
}
33+
2634
#[test]
2735
fn verify_cli() {
2836
// By default, clap lazily checks subcommands. This provides eager testing

collector/benchlib/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ mod cli;
1818
pub mod comm;
1919
pub mod measure;
2020
pub mod process;
21+
mod profile;
2122
mod utils;
2223

2324
#[cfg(feature = "compression")]

collector/benchlib/src/profile.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
pub fn profile_function<F: Fn() -> Bench, R, Bench: FnOnce() -> R>(benchmark_constructor: &F) {
2+
let func = benchmark_constructor();
3+
func();
4+
}

collector/src/bin/collector.rs

Lines changed: 61 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use collector::compile::benchmark::scenario::Scenario;
1010
use collector::compile::benchmark::{
1111
compile_benchmark_dir, get_compile_benchmarks, ArtifactType, Benchmark, BenchmarkName,
1212
};
13-
use collector::{runtime, utils, CollectorCtx, CollectorStepBuilder};
13+
use collector::{utils, CollectorCtx, CollectorStepBuilder};
1414
use database::{ArtifactId, ArtifactIdNumber, Commit, CommitType, Connection, Pool};
1515
use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator};
1616
use std::cmp::Ordering;
@@ -31,9 +31,11 @@ use tokio::runtime::Runtime;
3131
use collector::compile::execute::bencher::BenchProcessor;
3232
use collector::compile::execute::profiler::{ProfileProcessor, Profiler};
3333
use collector::runtime::{
34-
bench_runtime, runtime_benchmark_dir, BenchmarkFilter, BenchmarkSuite,
35-
BenchmarkSuiteCompilation, CargoIsolationMode, DEFAULT_RUNTIME_ITERATIONS,
34+
bench_runtime, prepare_runtime_benchmark_suite, runtime_benchmark_dir, BenchmarkFilter,
35+
BenchmarkSuite, BenchmarkSuiteCompilation, CargoIsolationMode, RuntimeProfiler,
36+
DEFAULT_RUNTIME_ITERATIONS,
3637
};
38+
use collector::runtime::{profile_runtime, RuntimeCompilationOpts};
3739
use collector::toolchain::{
3840
create_toolchain_from_published_version, get_local_toolchain, Sysroot, Toolchain,
3941
};
@@ -255,7 +257,7 @@ fn cg_annotate(cgout: &Path, path: &Path) -> anyhow::Result<()> {
255257
}
256258

257259
#[allow(clippy::too_many_arguments)]
258-
fn profile(
260+
fn profile_compile(
259261
toolchain: &Toolchain,
260262
profiler: Profiler,
261263
out_dir: &Path,
@@ -502,6 +504,19 @@ enum Commands {
502504
#[arg(long = "no-isolate")]
503505
no_isolate: bool,
504506
},
507+
508+
/// Profiles a runtime benchmark.
509+
ProfileRuntime {
510+
/// Profiler to use
511+
profiler: RuntimeProfiler,
512+
513+
/// The path to the local rustc used to compile the runtime benchmark
514+
rustc: String,
515+
516+
/// Name of the benchmark that should be profiled
517+
benchmark: String,
518+
},
519+
505520
/// Benchmarks a local rustc
506521
BenchLocal {
507522
#[command(flatten)]
@@ -650,15 +665,7 @@ fn main_result() -> anyhow::Result<i32> {
650665
no_isolate,
651666
} => {
652667
log_db(&db);
653-
let toolchain = get_local_toolchain(
654-
&[Profile::Opt],
655-
&local.rustc,
656-
None,
657-
local.cargo.as_deref(),
658-
local.id.as_deref(),
659-
"",
660-
target_triple,
661-
)?;
668+
let toolchain = get_local_toolchain_for_runtime_benchmarks(&local, &target_triple)?;
662669
let pool = Pool::open(&db.db);
663670

664671
let isolation_mode = if no_isolate {
@@ -689,6 +696,25 @@ fn main_result() -> anyhow::Result<i32> {
689696
run_benchmarks(&mut rt, conn, shared, None, Some(config))?;
690697
Ok(0)
691698
}
699+
Commands::ProfileRuntime {
700+
profiler,
701+
rustc,
702+
benchmark,
703+
} => {
704+
let toolchain =
705+
get_local_toolchain(&[Profile::Opt], &rustc, None, None, None, "", target_triple)?;
706+
let suite = prepare_runtime_benchmark_suite(
707+
&toolchain,
708+
&runtime_benchmark_dir,
709+
CargoIsolationMode::Cached,
710+
// Compile with debuginfo to have filenames and line numbers available in the
711+
// generated profiles.
712+
RuntimeCompilationOpts::default().debug_info("1"),
713+
)?
714+
.suite;
715+
profile_runtime(profiler, suite, &benchmark)?;
716+
Ok(0)
717+
}
692718
Commands::BenchLocal {
693719
local,
694720
opts,
@@ -904,7 +930,7 @@ fn main_result() -> anyhow::Result<i32> {
904930
target_triple.clone(),
905931
)?;
906932
let id = toolchain.id.clone();
907-
profile(
933+
profile_compile(
908934
&toolchain,
909935
profiler,
910936
&out_dir,
@@ -1005,6 +1031,21 @@ Make sure to modify `{dir}/perf-config.json` if the category/artifact don't matc
10051031
}
10061032
}
10071033

1034+
fn get_local_toolchain_for_runtime_benchmarks(
1035+
local: &LocalOptions,
1036+
target_triple: &str,
1037+
) -> anyhow::Result<Toolchain> {
1038+
get_local_toolchain(
1039+
&[Profile::Opt],
1040+
&local.rustc,
1041+
None,
1042+
local.cargo.as_deref(),
1043+
local.id.as_deref(),
1044+
"",
1045+
target_triple.to_string(),
1046+
)
1047+
}
1048+
10081049
async fn load_runtime_benchmarks(
10091050
conn: &mut dyn Connection,
10101051
benchmark_dir: &Path,
@@ -1015,7 +1056,12 @@ async fn load_runtime_benchmarks(
10151056
let BenchmarkSuiteCompilation {
10161057
suite,
10171058
failed_to_compile,
1018-
} = runtime::prepare_runtime_benchmark_suite(toolchain, benchmark_dir, isolation_mode)?;
1059+
} = prepare_runtime_benchmark_suite(
1060+
toolchain,
1061+
benchmark_dir,
1062+
isolation_mode,
1063+
RuntimeCompilationOpts::default(),
1064+
)?;
10191065

10201066
record_runtime_compilation_errors(conn, artifact_id, failed_to_compile).await;
10211067
Ok(suite)

collector/src/runtime/benchmark.rs

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,15 @@ impl BenchmarkSuite {
8080
.iter()
8181
.flat_map(|suite| suite.benchmark_names.iter().map(|n| n.as_ref()))
8282
}
83+
84+
pub fn get_group_by_benchmark(&self, benchmark: &str) -> Option<&BenchmarkGroup> {
85+
self.groups.iter().find(|group| {
86+
group
87+
.benchmark_names
88+
.iter()
89+
.any(|b| b.as_str() == benchmark)
90+
})
91+
}
8392
}
8493

8594
pub struct BenchmarkFilter {
@@ -118,6 +127,18 @@ pub struct BenchmarkSuiteCompilation {
118127
pub failed_to_compile: HashMap<String, String>,
119128
}
120129

130+
#[derive(Default)]
131+
pub struct RuntimeCompilationOpts {
132+
debug_info: Option<String>,
133+
}
134+
135+
impl RuntimeCompilationOpts {
136+
pub fn debug_info(mut self, debug_info: &str) -> Self {
137+
self.debug_info = Some(debug_info.to_string());
138+
self
139+
}
140+
}
141+
121142
/// Find all runtime benchmark crates in `benchmark_dir` and compile them.
122143
/// We assume that each binary defines a benchmark suite using `benchlib`.
123144
/// We then execute each benchmark suite with the `list-benchmarks` command to find out its
@@ -126,6 +147,7 @@ pub fn prepare_runtime_benchmark_suite(
126147
toolchain: &Toolchain,
127148
benchmark_dir: &Path,
128149
isolation_mode: CargoIsolationMode,
150+
opts: RuntimeCompilationOpts,
129151
) -> anyhow::Result<BenchmarkSuiteCompilation> {
130152
let benchmark_crates = get_runtime_benchmark_groups(benchmark_dir)?;
131153

@@ -158,7 +180,7 @@ pub fn prepare_runtime_benchmark_suite(
158180

159181
let target_dir = temp_dir.as_ref().map(|d| d.path());
160182

161-
let result = start_cargo_build(toolchain, &benchmark_crate.path, target_dir)
183+
let result = start_cargo_build(toolchain, &benchmark_crate.path, target_dir, &opts)
162184
.with_context(|| {
163185
anyhow::anyhow!("Cannot start compilation of {}", benchmark_crate.name)
164186
})
@@ -288,6 +310,7 @@ fn start_cargo_build(
288310
toolchain: &Toolchain,
289311
benchmark_dir: &Path,
290312
target_dir: Option<&Path>,
313+
opts: &RuntimeCompilationOpts,
291314
) -> anyhow::Result<Child> {
292315
let mut command = Command::new(&toolchain.components.cargo);
293316
command
@@ -301,6 +324,10 @@ fn start_cargo_build(
301324
.stdout(Stdio::piped())
302325
.stderr(Stdio::null());
303326

327+
if let Some(ref debug_info) = opts.debug_info {
328+
command.env("CARGO_PROFILE_RELEASE_DEBUG", debug_info);
329+
}
330+
304331
if let Some(target_dir) = target_dir {
305332
command.arg("--target-dir");
306333
command.arg(target_dir);

0 commit comments

Comments
 (0)