Skip to content

Commit f39e236

Browse files
feat: parse perf file for memmap events instead of relying on /proc/pid/maps
1 parent 6424703 commit f39e236

File tree

4 files changed

+153
-103
lines changed

4 files changed

+153
-103
lines changed

Cargo.lock

Lines changed: 2 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,8 @@ async-trait = "0.1.82"
5151
libc = "0.2.171"
5252
bincode = "1.3.3"
5353
object = "0.36.7"
54-
linux-perf-data = "0.11.0"
54+
# TODO: Make this repo public
55+
linux-perf-data = { git = "ssh://[email protected]/CodSpeedHQ/linux-perf-data.git", branch = "feat/support-perf-pipe-data-parsing" }
5556
debugid = "0.8.0"
5657
memmap2 = "0.9.5"
5758
nix = { version = "0.29.0", features = ["fs", "time", "user"] }

src/executor/wall_time/perf/mod.rs

Lines changed: 28 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -11,26 +11,26 @@ use crate::executor::valgrind::helpers::perf_maps::harvest_perf_maps_for_pids;
1111
use crate::executor::wall_time::perf::debug_info::ProcessDebugInfo;
1212
use crate::executor::wall_time::perf::jit_dump::harvest_perf_jit_for_pids;
1313
use crate::executor::wall_time::perf::perf_executable::get_working_perf_executable;
14-
use crate::executor::wall_time::perf::unwind_data::UnwindDataExt;
1514
use crate::prelude::*;
1615
use crate::run::UnwindingMode;
1716
use anyhow::Context;
1817
use fifo::{PerfFifo, RunnerFifo};
1918
use libc::pid_t;
2019
use nix::sys::time::TimeValLike;
2120
use nix::time::clock_gettime;
22-
use perf_map::ProcessSymbols;
21+
use parse_perf_file::MemmapRecordsOutput;
2322
use runner_shared::debug_info::ModuleDebugInfo;
2423
use runner_shared::fifo::Command as FifoCommand;
2524
use runner_shared::fifo::MarkerType;
2625
use runner_shared::metadata::PerfMetadata;
27-
use runner_shared::unwind_data::UnwindData;
2826
use std::collections::HashSet;
2927
use std::path::Path;
28+
use std::path::PathBuf;
3029
use std::time::Duration;
3130
use std::{cell::OnceCell, collections::HashMap, process::ExitStatus};
3231

3332
mod jit_dump;
33+
mod parse_perf_file;
3434
mod setup;
3535

3636
pub mod debug_info;
@@ -146,8 +146,8 @@ impl PerfRunner {
146146
]);
147147
cmd_builder.wrap_with(perf_wrapper_builder);
148148

149-
// Copy the perf data to the profile folder
150-
let perf_data_file_path = profile_folder.join(PERF_DATA_FILE_NAME);
149+
// Output the perf data to the profile folder
150+
let perf_data_file_path = get_perf_file_path(profile_folder);
151151

152152
let raw_command = format!(
153153
"set -o pipefail && {} | cat > {}",
@@ -203,84 +203,12 @@ impl PerfRunner {
203203
Ok(())
204204
}
205205

206-
#[cfg(target_os = "linux")]
207-
fn process_memory_mappings(
208-
pid: pid_t,
209-
symbols_by_pid: &mut HashMap<pid_t, ProcessSymbols>,
210-
unwind_data_by_pid: &mut HashMap<pid_t, Vec<UnwindData>>,
211-
) -> anyhow::Result<()> {
212-
use procfs::process::MMPermissions;
213-
214-
let bench_proc =
215-
procfs::process::Process::new(pid as _).expect("Failed to find benchmark process");
216-
let exe_maps = bench_proc.maps().expect("Failed to read /proc/{pid}/maps");
217-
218-
debug!("Process memory mappings for PID {pid}:");
219-
for map in exe_maps.iter().sorted_by_key(|m| m.address.0) {
220-
let (base_addr, end_addr) = map.address;
221-
debug!(
222-
" {:016x}-{:016x} {:08x} {:?} {:?} ",
223-
base_addr, end_addr, map.offset, map.pathname, map.perms,
224-
);
225-
}
226-
227-
for map in &exe_maps {
228-
let page_offset = map.offset;
229-
let (base_addr, end_addr) = map.address;
230-
let path = match &map.pathname {
231-
procfs::process::MMapPath::Path(path) => Some(path.clone()),
232-
_ => None,
233-
};
234-
235-
let Some(path) = &path else {
236-
if map.perms.contains(MMPermissions::EXECUTE) {
237-
debug!("Found executable mapping without path: {base_addr:x} - {end_addr:x}");
238-
}
239-
continue;
240-
};
241-
242-
if !map.perms.contains(MMPermissions::EXECUTE) {
243-
continue;
244-
}
245-
246-
symbols_by_pid
247-
.entry(pid)
248-
.or_insert(ProcessSymbols::new(pid))
249-
.add_mapping(pid, path, base_addr, end_addr, map.offset);
250-
debug!("Added mapping for module {path:?}");
251-
252-
match UnwindData::new(
253-
path.to_string_lossy().as_bytes(),
254-
page_offset,
255-
base_addr,
256-
end_addr,
257-
None,
258-
) {
259-
Ok(unwind_data) => {
260-
unwind_data_by_pid.entry(pid).or_default().push(unwind_data);
261-
debug!("Added unwind data for {path:?} ({base_addr:x} - {end_addr:x})");
262-
}
263-
Err(error) => {
264-
debug!(
265-
"Failed to create unwind data for module {}: {}",
266-
path.display(),
267-
error
268-
);
269-
}
270-
}
271-
}
272-
273-
Ok(())
274-
}
275-
276206
async fn handle_fifo(
277207
mut runner_fifo: RunnerFifo,
278208
mut perf_fifo: PerfFifo,
279209
) -> anyhow::Result<BenchmarkData> {
280210
let mut bench_order_by_timestamp = Vec::<(u64, String)>::new();
281211
let mut bench_pids = HashSet::<pid_t>::new();
282-
let mut symbols_by_pid = HashMap::<pid_t, ProcessSymbols>::new();
283-
let mut unwind_data_by_pid = HashMap::<pid_t, Vec<UnwindData>>::new();
284212
let mut markers = Vec::<MarkerType>::new();
285213

286214
let mut integration = None;
@@ -317,19 +245,9 @@ impl PerfRunner {
317245

318246
match cmd {
319247
FifoCommand::CurrentBenchmark { pid, uri } => {
320-
bench_order_by_timestamp.push((current_time(), uri));
248+
bench_order_by_timestamp.push((current_time(), uri.clone()));
321249
bench_pids.insert(pid);
322250

323-
#[cfg(target_os = "linux")]
324-
if !symbols_by_pid.contains_key(&pid) && !unwind_data_by_pid.contains_key(&pid)
325-
{
326-
Self::process_memory_mappings(
327-
pid,
328-
&mut symbols_by_pid,
329-
&mut unwind_data_by_pid,
330-
)?;
331-
}
332-
333251
runner_fifo.send_cmd(FifoCommand::Ack).await?;
334252
}
335253
FifoCommand::StartBenchmark => {
@@ -398,8 +316,6 @@ impl PerfRunner {
398316
integration,
399317
uri_by_ts: bench_order_by_timestamp,
400318
bench_pids,
401-
symbols_by_pid,
402-
unwind_data_by_pid,
403319
markers,
404320
})
405321
}
@@ -411,35 +327,45 @@ pub struct BenchmarkData {
411327

412328
uri_by_ts: Vec<(u64, String)>,
413329
bench_pids: HashSet<pid_t>,
414-
symbols_by_pid: HashMap<pid_t, ProcessSymbols>,
415-
unwind_data_by_pid: HashMap<pid_t, Vec<UnwindData>>,
416330
markers: Vec<MarkerType>,
417331
}
418332

419333
#[derive(Debug)]
420334
pub enum BenchmarkDataSaveError {
421335
MissingIntegration,
336+
FailedToParsePerfFile,
422337
}
423338

424339
impl BenchmarkData {
425340
pub fn save_to<P: AsRef<std::path::Path>>(
426341
&self,
427342
path: P,
428343
) -> Result<(), BenchmarkDataSaveError> {
429-
for proc_sym in self.symbols_by_pid.values() {
344+
debug!("Reading perf data from file for mmap extraction");
345+
let perf_file_path = get_perf_file_path(&path);
346+
347+
let MemmapRecordsOutput {
348+
symbols_by_pid,
349+
unwind_data_by_pid,
350+
} = parse_perf_file::parse_for_memmap2(&perf_file_path).map_err(|e| {
351+
error!("Failed to parse perf file: {e}");
352+
BenchmarkDataSaveError::FailedToParsePerfFile
353+
})?;
354+
355+
for proc_sym in symbols_by_pid.values() {
430356
proc_sym.save_to(&path).unwrap();
431357
}
432358

433359
// Collect debug info for each process by looking up file/line for symbols
434360
let mut debug_info_by_pid = HashMap::<i32, Vec<ModuleDebugInfo>>::new();
435-
for (pid, proc_sym) in &self.symbols_by_pid {
361+
for (pid, proc_sym) in &symbols_by_pid {
436362
debug_info_by_pid
437363
.entry(*pid)
438364
.or_default()
439365
.extend(ProcessDebugInfo::new(proc_sym).modules());
440366
}
441367

442-
for (pid, modules) in &self.unwind_data_by_pid {
368+
for (pid, modules) in &unwind_data_by_pid {
443369
for module in modules {
444370
module.save_to(&path, *pid).unwrap();
445371
}
@@ -457,7 +383,7 @@ impl BenchmarkData {
457383

458384
// Check if any of the ignored modules has been loaded in the process
459385
for ignore_path in get_objects_path_to_ignore() {
460-
for proc in self.symbols_by_pid.values() {
386+
for proc in symbols_by_pid.values() {
461387
if let Some(mapping) = proc.module_mapping(&ignore_path) {
462388
let (Some((base_addr, _)), Some((_, end_addr))) = (
463389
mapping.iter().min_by_key(|(base_addr, _)| base_addr),
@@ -472,16 +398,15 @@ impl BenchmarkData {
472398
}
473399

474400
// When python is statically linked, we'll not find it in the ignored modules. Add it manually:
475-
let python_modules = self.symbols_by_pid.values().filter_map(|proc| {
401+
let python_modules = symbols_by_pid.values().filter_map(|proc| {
476402
proc.loaded_modules().find(|path| {
477403
path.file_name()
478404
.map(|name| name.to_string_lossy().starts_with("python"))
479405
.unwrap_or(false)
480406
})
481407
});
482408
for path in python_modules {
483-
if let Some(mapping) = self
484-
.symbols_by_pid
409+
if let Some(mapping) = symbols_by_pid
485410
.values()
486411
.find_map(|proc| proc.module_mapping(path))
487412
{
@@ -505,3 +430,7 @@ impl BenchmarkData {
505430
Ok(())
506431
}
507432
}
433+
434+
fn get_perf_file_path<P: AsRef<Path>>(profile_folder: P) -> PathBuf {
435+
profile_folder.as_ref().join(PERF_DATA_FILE_NAME)
436+
}
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
use super::perf_map::ProcessSymbols;
2+
use super::unwind_data::UnwindDataExt;
3+
use libc::pid_t;
4+
use linux_perf_data::PerfFileReader;
5+
use linux_perf_data::PerfFileRecord;
6+
use linux_perf_data::linux_perf_event_reader::EventRecord;
7+
use runner_shared::unwind_data::UnwindData;
8+
use std::collections::HashMap;
9+
use std::path::Path;
10+
11+
use crate::prelude::*;
12+
13+
pub struct MemmapRecordsOutput {
14+
pub symbols_by_pid: HashMap<pid_t, ProcessSymbols>,
15+
pub unwind_data_by_pid: HashMap<pid_t, Vec<UnwindData>>,
16+
}
17+
18+
pub(super) fn parse_for_memmap2(perf_file_path: &Path) -> Result<MemmapRecordsOutput> {
19+
let reader = std::fs::File::open(perf_file_path).unwrap();
20+
let mut symbols_by_pid = HashMap::<pid_t, ProcessSymbols>::new();
21+
let mut unwind_data_by_pid = HashMap::<pid_t, Vec<UnwindData>>::new();
22+
23+
let PerfFileReader {
24+
mut perf_file,
25+
mut record_iter,
26+
} = PerfFileReader::parse_pipe(reader)?;
27+
28+
const PROT_EXEC: u32 = 0x4;
29+
30+
while let Some(record) = record_iter.next_record(&mut perf_file).unwrap() {
31+
let PerfFileRecord::EventRecord { record, .. } = record else {
32+
continue;
33+
};
34+
35+
let Ok(parsed_record) = record.parse() else {
36+
continue;
37+
};
38+
39+
let EventRecord::Mmap2(record) = parsed_record else {
40+
continue;
41+
};
42+
43+
let record_path_string = {
44+
let path_slice = record.path.as_slice();
45+
String::from_utf8_lossy(&path_slice).into_owned()
46+
};
47+
48+
let end_addr = record.address + record.length;
49+
50+
if record_path_string == "//anon" {
51+
// Skip anonymous mappings
52+
trace!(
53+
"Skipping anonymous mapping: {:x}-{:x}",
54+
record.address, end_addr
55+
);
56+
continue;
57+
}
58+
59+
if record_path_string.starts_with("[") && record_path_string.ends_with("]") {
60+
// Skip special mappings
61+
debug!(
62+
"Skipping special mapping: {} - {:x}-{:x}",
63+
record_path_string, record.address, end_addr
64+
);
65+
continue;
66+
}
67+
68+
debug!(
69+
"Pid {}: {:016x}-{:016x} {:08x} {:?} (Prot {:?})",
70+
record.pid,
71+
record.address,
72+
end_addr,
73+
record.page_offset,
74+
record_path_string,
75+
record.protection,
76+
);
77+
78+
if record.protection & PROT_EXEC == 0 {
79+
continue;
80+
}
81+
82+
symbols_by_pid
83+
.entry(record.pid)
84+
.or_insert(ProcessSymbols::new(record.pid))
85+
.add_mapping(
86+
record.pid,
87+
&record_path_string,
88+
record.address,
89+
end_addr,
90+
record.page_offset,
91+
);
92+
debug!("Added symbols mapping for module {record_path_string:?}");
93+
94+
match UnwindData::new(
95+
record_path_string.as_bytes(),
96+
record.page_offset,
97+
record.address,
98+
end_addr,
99+
None,
100+
) {
101+
Ok(unwind_data) => {
102+
unwind_data_by_pid
103+
.entry(record.pid)
104+
.or_default()
105+
.push(unwind_data);
106+
debug!(
107+
"Added unwind data for {record_path_string} ({:x} - {:x})",
108+
record.address, end_addr
109+
);
110+
}
111+
Err(error) => {
112+
debug!("Failed to create unwind data for module {record_path_string}: {error}");
113+
}
114+
}
115+
}
116+
117+
Ok(MemmapRecordsOutput {
118+
symbols_by_pid,
119+
unwind_data_by_pid,
120+
})
121+
}

0 commit comments

Comments
 (0)