Skip to content

Commit c9abb13

Browse files
feat: parse perf file for memmap events instead of relying on /proc/pid/maps
1 parent 45dd7d7 commit c9abb13

File tree

4 files changed

+170
-114
lines changed

4 files changed

+170
-114
lines changed

Cargo.lock

Lines changed: 11 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ path = "src/main.rs"
1111

1212

1313
[dependencies]
14-
anyhow = "1.0.75"
14+
anyhow = { workspace = true }
1515
clap = { workspace = true }
1616
itertools = "0.11.0"
1717
lazy_static = "1.4.0"
@@ -26,8 +26,8 @@ reqwest = { version = "0.11.22", features = [
2626
] }
2727
reqwest-middleware = "0.2.4"
2828
reqwest-retry = "0.3.0"
29-
serde = { version = "1.0.192", features = ["derive"] }
30-
serde_json = { version = "1.0.108", features = ["preserve_order"] }
29+
serde = { workspace = true }
30+
serde_json = { workspace = true }
3131
url = "2.4.1"
3232
sha256 = "1.4.0"
3333
tokio = { version = "1", features = ["macros", "rt"] }
@@ -51,10 +51,11 @@ async-trait = "0.1.82"
5151
libc = "0.2.171"
5252
bincode = "1.3.3"
5353
object = "0.36.7"
54-
linux-perf-data = "0.11.0"
54+
# TODO: Make this repo public
55+
linux-perf-data = { git = "ssh://[email protected]/CodSpeedHQ/linux-perf-data.git", branch = "feat/support-perf-pipe-data-parsing" }
5556
debugid = "0.8.0"
5657
memmap2 = "0.9.5"
57-
nix = { version = "0.29.0", features = ["fs", "time", "user"] }
58+
nix = { workspace = true, features = ["fs", "time", "user"] }
5859
futures = "0.3.31"
5960
runner-shared = { path = "crates/runner-shared" }
6061
shellexpand = { version = "3.1.1", features = ["tilde"] }
@@ -79,7 +80,11 @@ shell-quote = "0.7.2"
7980
members = ["crates/exec-harness", "crates/runner-shared"]
8081

8182
[workspace.dependencies]
83+
anyhow = "1.0.75"
8284
clap = { version = "4.4.8", features = ["derive", "env", "color"] }
85+
nix = "0.29.0"
86+
serde = { version = "1.0.192", features = ["derive"] }
87+
serde_json = { version = "1.0.108", features = ["preserve_order"] }
8388

8489
[workspace.metadata.release]
8590
sign-tag = true

src/executor/wall_time/perf/mod.rs

Lines changed: 28 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -11,26 +11,26 @@ use crate::executor::valgrind::helpers::perf_maps::harvest_perf_maps_for_pids;
1111
use crate::executor::wall_time::perf::debug_info::ProcessDebugInfo;
1212
use crate::executor::wall_time::perf::jit_dump::harvest_perf_jit_for_pids;
1313
use crate::executor::wall_time::perf::perf_executable::get_working_perf_executable;
14-
use crate::executor::wall_time::perf::unwind_data::UnwindDataExt;
1514
use crate::prelude::*;
1615
use crate::run::UnwindingMode;
1716
use anyhow::Context;
1817
use fifo::{PerfFifo, RunnerFifo};
1918
use libc::pid_t;
2019
use nix::sys::time::TimeValLike;
2120
use nix::time::clock_gettime;
22-
use perf_map::ProcessSymbols;
21+
use parse_perf_file::MemmapRecordsOutput;
2322
use runner_shared::debug_info::ModuleDebugInfo;
2423
use runner_shared::fifo::Command as FifoCommand;
2524
use runner_shared::fifo::MarkerType;
2625
use runner_shared::metadata::PerfMetadata;
27-
use runner_shared::unwind_data::UnwindData;
2826
use std::collections::HashSet;
2927
use std::path::Path;
28+
use std::path::PathBuf;
3029
use std::time::Duration;
3130
use std::{cell::OnceCell, collections::HashMap, process::ExitStatus};
3231

3332
mod jit_dump;
33+
mod parse_perf_file;
3434
mod setup;
3535

3636
pub mod debug_info;
@@ -146,8 +146,8 @@ impl PerfRunner {
146146
]);
147147
cmd_builder.wrap_with(perf_wrapper_builder);
148148

149-
// Copy the perf data to the profile folder
150-
let perf_data_file_path = profile_folder.join(PERF_DATA_FILE_NAME);
149+
// Output the perf data to the profile folder
150+
let perf_data_file_path = get_perf_file_path(profile_folder);
151151

152152
let raw_command = format!(
153153
"set -o pipefail && {} | cat > {}",
@@ -203,86 +203,12 @@ impl PerfRunner {
203203
Ok(())
204204
}
205205

206-
#[cfg(target_os = "linux")]
207-
fn process_memory_mappings(
208-
pid: pid_t,
209-
symbols_by_pid: &mut HashMap<pid_t, ProcessSymbols>,
210-
unwind_data_by_pid: &mut HashMap<pid_t, Vec<UnwindData>>,
211-
) -> anyhow::Result<()> {
212-
use procfs::process::MMPermissions;
213-
214-
let bench_proc =
215-
procfs::process::Process::new(pid as _).expect("Failed to find benchmark process");
216-
let exe_maps = bench_proc.maps().expect("Failed to read /proc/{pid}/maps");
217-
218-
if is_codspeed_debug_enabled() {
219-
debug!("Process memory mappings for PID {pid}:");
220-
for map in exe_maps.iter().sorted_by_key(|m| m.address.0) {
221-
let (base_addr, end_addr) = map.address;
222-
debug!(
223-
" {:016x}-{:016x} {:08x} {:?} {:?} ",
224-
base_addr, end_addr, map.offset, map.pathname, map.perms,
225-
);
226-
}
227-
}
228-
229-
for map in &exe_maps {
230-
let page_offset = map.offset;
231-
let (base_addr, end_addr) = map.address;
232-
let path = match &map.pathname {
233-
procfs::process::MMapPath::Path(path) => Some(path.clone()),
234-
_ => None,
235-
};
236-
237-
let Some(path) = &path else {
238-
if map.perms.contains(MMPermissions::EXECUTE) {
239-
debug!("Found executable mapping without path: {base_addr:x} - {end_addr:x}");
240-
}
241-
continue;
242-
};
243-
244-
if !map.perms.contains(MMPermissions::EXECUTE) {
245-
continue;
246-
}
247-
248-
symbols_by_pid
249-
.entry(pid)
250-
.or_insert(ProcessSymbols::new(pid))
251-
.add_mapping(pid, path, base_addr, end_addr, map.offset);
252-
debug!("Added mapping for module {path:?}");
253-
254-
match UnwindData::new(
255-
path.to_string_lossy().as_bytes(),
256-
page_offset,
257-
base_addr,
258-
end_addr,
259-
None,
260-
) {
261-
Ok(unwind_data) => {
262-
unwind_data_by_pid.entry(pid).or_default().push(unwind_data);
263-
debug!("Added unwind data for {path:?} ({base_addr:x} - {end_addr:x})");
264-
}
265-
Err(error) => {
266-
debug!(
267-
"Failed to create unwind data for module {}: {}",
268-
path.display(),
269-
error
270-
);
271-
}
272-
}
273-
}
274-
275-
Ok(())
276-
}
277-
278206
async fn handle_fifo(
279207
mut runner_fifo: RunnerFifo,
280208
mut perf_fifo: PerfFifo,
281209
) -> anyhow::Result<BenchmarkData> {
282210
let mut bench_order_by_timestamp = Vec::<(u64, String)>::new();
283211
let mut bench_pids = HashSet::<pid_t>::new();
284-
let mut symbols_by_pid = HashMap::<pid_t, ProcessSymbols>::new();
285-
let mut unwind_data_by_pid = HashMap::<pid_t, Vec<UnwindData>>::new();
286212
let mut markers = Vec::<MarkerType>::new();
287213

288214
let mut integration = None;
@@ -319,19 +245,9 @@ impl PerfRunner {
319245

320246
match cmd {
321247
FifoCommand::CurrentBenchmark { pid, uri } => {
322-
bench_order_by_timestamp.push((current_time(), uri));
248+
bench_order_by_timestamp.push((current_time(), uri.clone()));
323249
bench_pids.insert(pid);
324250

325-
#[cfg(target_os = "linux")]
326-
if !symbols_by_pid.contains_key(&pid) && !unwind_data_by_pid.contains_key(&pid)
327-
{
328-
Self::process_memory_mappings(
329-
pid,
330-
&mut symbols_by_pid,
331-
&mut unwind_data_by_pid,
332-
)?;
333-
}
334-
335251
runner_fifo.send_cmd(FifoCommand::Ack).await?;
336252
}
337253
FifoCommand::StartBenchmark => {
@@ -400,8 +316,6 @@ impl PerfRunner {
400316
integration,
401317
uri_by_ts: bench_order_by_timestamp,
402318
bench_pids,
403-
symbols_by_pid,
404-
unwind_data_by_pid,
405319
markers,
406320
})
407321
}
@@ -413,35 +327,45 @@ pub struct BenchmarkData {
413327

414328
uri_by_ts: Vec<(u64, String)>,
415329
bench_pids: HashSet<pid_t>,
416-
symbols_by_pid: HashMap<pid_t, ProcessSymbols>,
417-
unwind_data_by_pid: HashMap<pid_t, Vec<UnwindData>>,
418330
markers: Vec<MarkerType>,
419331
}
420332

421333
#[derive(Debug)]
422334
pub enum BenchmarkDataSaveError {
423335
MissingIntegration,
336+
FailedToParsePerfFile,
424337
}
425338

426339
impl BenchmarkData {
427340
pub fn save_to<P: AsRef<std::path::Path>>(
428341
&self,
429342
path: P,
430343
) -> Result<(), BenchmarkDataSaveError> {
431-
for proc_sym in self.symbols_by_pid.values() {
344+
debug!("Reading perf data from file for mmap extraction");
345+
let perf_file_path = get_perf_file_path(&path);
346+
347+
let MemmapRecordsOutput {
348+
symbols_by_pid,
349+
unwind_data_by_pid,
350+
} = parse_perf_file::parse_for_memmap2(&perf_file_path).map_err(|e| {
351+
error!("Failed to parse perf file: {e}");
352+
BenchmarkDataSaveError::FailedToParsePerfFile
353+
})?;
354+
355+
for proc_sym in symbols_by_pid.values() {
432356
proc_sym.save_to(&path).unwrap();
433357
}
434358

435359
// Collect debug info for each process by looking up file/line for symbols
436360
let mut debug_info_by_pid = HashMap::<i32, Vec<ModuleDebugInfo>>::new();
437-
for (pid, proc_sym) in &self.symbols_by_pid {
361+
for (pid, proc_sym) in &symbols_by_pid {
438362
debug_info_by_pid
439363
.entry(*pid)
440364
.or_default()
441365
.extend(ProcessDebugInfo::new(proc_sym).modules());
442366
}
443367

444-
for (pid, modules) in &self.unwind_data_by_pid {
368+
for (pid, modules) in &unwind_data_by_pid {
445369
for module in modules {
446370
module.save_to(&path, *pid).unwrap();
447371
}
@@ -459,7 +383,7 @@ impl BenchmarkData {
459383

460384
// Check if any of the ignored modules has been loaded in the process
461385
for ignore_path in get_objects_path_to_ignore() {
462-
for proc in self.symbols_by_pid.values() {
386+
for proc in symbols_by_pid.values() {
463387
if let Some(mapping) = proc.module_mapping(&ignore_path) {
464388
let (Some((base_addr, _)), Some((_, end_addr))) = (
465389
mapping.iter().min_by_key(|(base_addr, _)| base_addr),
@@ -474,16 +398,15 @@ impl BenchmarkData {
474398
}
475399

476400
// When python is statically linked, we'll not find it in the ignored modules. Add it manually:
477-
let python_modules = self.symbols_by_pid.values().filter_map(|proc| {
401+
let python_modules = symbols_by_pid.values().filter_map(|proc| {
478402
proc.loaded_modules().find(|path| {
479403
path.file_name()
480404
.map(|name| name.to_string_lossy().starts_with("python"))
481405
.unwrap_or(false)
482406
})
483407
});
484408
for path in python_modules {
485-
if let Some(mapping) = self
486-
.symbols_by_pid
409+
if let Some(mapping) = symbols_by_pid
487410
.values()
488411
.find_map(|proc| proc.module_mapping(path))
489412
{
@@ -507,3 +430,7 @@ impl BenchmarkData {
507430
Ok(())
508431
}
509432
}
433+
434+
fn get_perf_file_path<P: AsRef<Path>>(profile_folder: P) -> PathBuf {
435+
profile_folder.as_ref().join(PERF_DATA_FILE_NAME)
436+
}

0 commit comments

Comments
 (0)