Skip to content

Commit 5cad97d

Browse files
feat: use bench_pids filters when harvesting symbols from perf file
This fixes a regression introduced in 8b37208. We now filter pids using `bench_pids`, except for `exec-harness` integrations, where we take all pids.
1 parent ad6011b commit 5cad97d

3 files changed

Lines changed: 168 additions & 70 deletions

File tree

src/executor/shared/fifo.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,15 @@ impl GenericFifo {
6969
pub struct FifoBenchmarkData {
7070
/// Name and version of the integration
7171
pub integration: Option<(String, String)>,
72+
pub bench_pids: HashSet<pid_t>,
73+
}
74+
75+
impl FifoBenchmarkData {
76+
pub fn is_exec_harness(&self) -> bool {
77+
self.integration
78+
.as_ref()
79+
.is_some_and(|(name, _)| name == "exec-harness")
80+
}
7281
}
7382

7483
pub struct RunnerFifo {
@@ -254,7 +263,10 @@ impl RunnerFifo {
254263
);
255264
let marker_result =
256265
ExecutionTimestamps::new(&bench_order_by_timestamp, &markers);
257-
let fifo_data = FifoBenchmarkData { integration };
266+
let fifo_data = FifoBenchmarkData {
267+
integration,
268+
bench_pids,
269+
};
258270
return Ok((marker_result, fifo_data, exit_status));
259271
}
260272
Err(e) => return Err(anyhow::Error::from(e)),

src/executor/wall_time/perf/mod.rs

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -283,11 +283,17 @@ impl BenchmarkData {
283283
let path_ref = path.as_ref();
284284

285285
debug!("Reading perf data from file for mmap extraction");
286+
let pid_filter = if self.fifo_data.is_exec_harness() {
287+
parse_perf_file::PidFilter::All
288+
} else {
289+
parse_perf_file::PidFilter::TrackedPids(self.fifo_data.bench_pids.clone())
290+
};
286291
let MemmapRecordsOutput {
287292
symbols_by_pid,
288293
unwind_data_by_pid,
294+
tracked_pids,
289295
} = {
290-
parse_perf_file::parse_for_memmap2(perf_file_path).map_err(|e| {
296+
parse_perf_file::parse_for_memmap2(perf_file_path, pid_filter).map_err(|e| {
291297
error!("Failed to parse perf file: {e}");
292298
BenchmarkDataSaveError::FailedToParsePerfFile
293299
})?
@@ -296,15 +302,14 @@ impl BenchmarkData {
296302
// Harvest the perf maps generated by python. This will copy the perf
297303
// maps from /tmp to the profile folder. We have to write our own perf
298304
// maps to these files AFTERWARDS, otherwise it'll be overwritten!
299-
let bench_pids = symbols_by_pid.keys().copied().collect();
300-
debug!("Harvesting perf maps and jit dumps for pids: {bench_pids:?}");
301-
harvest_perf_maps_for_pids(path_ref, &bench_pids)
305+
debug!("Harvesting perf maps and jit dumps for pids: {tracked_pids:?}");
306+
harvest_perf_maps_for_pids(path_ref, &tracked_pids)
302307
.await
303308
.map_err(|e| {
304309
error!("Failed to harvest perf maps: {e}");
305310
BenchmarkDataSaveError::FailedToHarvestPerfMaps
306311
})?;
307-
harvest_perf_jit_for_pids(path_ref, &bench_pids)
312+
harvest_perf_jit_for_pids(path_ref, &tracked_pids)
308313
.await
309314
.map_err(|e| {
310315
error!("Failed to harvest jit dumps: {e}");

src/executor/wall_time/perf/parse_perf_file.rs

Lines changed: 145 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,23 @@ use linux_perf_data::linux_perf_event_reader::EventRecord;
88
use linux_perf_data::linux_perf_event_reader::RecordType;
99
use runner_shared::unwind_data::UnwindData;
1010
use std::collections::HashMap;
11+
use std::collections::HashSet;
1112
use std::path::Path;
1213

1314
pub struct MemmapRecordsOutput {
1415
pub symbols_by_pid: HashMap<pid_t, ProcessSymbols>,
1516
pub unwind_data_by_pid: HashMap<pid_t, Vec<UnwindData>>,
17+
pub tracked_pids: HashSet<pid_t>,
1618
}
1719

18-
pub(super) fn parse_for_memmap2<P: AsRef<Path>>(perf_file_path: P) -> Result<MemmapRecordsOutput> {
20+
/// Parse the perf file at `perf_file_path` and look for MMAP2 records for the given `pids`.
21+
/// If the pids filter is empty, all MMAP2 records will be parsed.
22+
///
23+
/// Returns process symbols and unwind data for the executable mappings found in the perf file.
24+
pub fn parse_for_memmap2<P: AsRef<Path>>(
25+
perf_file_path: P,
26+
mut pid_filter: PidFilter,
27+
) -> Result<MemmapRecordsOutput> {
1928
let mut symbols_by_pid = HashMap::<pid_t, ProcessSymbols>::new();
2029
let mut unwind_data_by_pid = HashMap::<pid_t, Vec<UnwindData>>::new();
2130

@@ -37,85 +46,157 @@ pub(super) fn parse_for_memmap2<P: AsRef<Path>>(perf_file_path: P) -> Result<Mem
3746

3847
// Check the type from the raw record to avoid parsing overhead since we do not care about
3948
// most records.
40-
if record.record_type != RecordType::MMAP2 {
41-
continue;
49+
match record.record_type {
50+
RecordType::FORK => {
51+
// Process fork events to track children (and children of children) of filtered PIDs
52+
let Ok(parsed_record) = record.parse() else {
53+
continue;
54+
};
55+
56+
let EventRecord::Fork(fork_record) = parsed_record else {
57+
continue;
58+
};
59+
60+
if pid_filter.add_child_if_parent_tracked(fork_record.ppid, fork_record.pid) {
61+
trace!(
62+
"Fork: Tracking child PID {} from parent PID {}",
63+
fork_record.pid, fork_record.ppid
64+
);
65+
}
66+
}
67+
RecordType::MMAP2 => {
68+
let Ok(parsed_record) = record.parse() else {
69+
continue;
70+
};
71+
72+
// Should never fail since we already checked the type in the raw record
73+
let EventRecord::Mmap2(mmap2_record) = parsed_record else {
74+
continue;
75+
};
76+
77+
// Filter on pid early to avoid string allocation for unwanted records
78+
if !pid_filter.should_include(mmap2_record.pid) {
79+
continue;
80+
}
81+
82+
process_mmap2_record(mmap2_record, &mut symbols_by_pid, &mut unwind_data_by_pid);
83+
}
84+
_ => continue,
4285
}
86+
}
4387

44-
let Ok(parsed_record) = record.parse() else {
45-
continue;
46-
};
47-
48-
// Should never fail since we already checked the type in the raw record
49-
let EventRecord::Mmap2(record) = parsed_record else {
50-
continue;
51-
};
88+
// Retrieve the set of PIDs we ended up tracking after processing all records
89+
let tracked_pids: HashSet<pid_t> = match pid_filter {
90+
PidFilter::All => symbols_by_pid.keys().copied().collect(),
91+
PidFilter::TrackedPids(tracked) => tracked,
92+
};
5293

53-
// Check PROT_EXEC early to avoid string allocation for non-executable mappings
54-
if record.protection as i32 & libc::PROT_EXEC == 0 {
55-
continue;
56-
}
94+
Ok(MemmapRecordsOutput {
95+
symbols_by_pid,
96+
unwind_data_by_pid,
97+
tracked_pids,
98+
})
99+
}
57100

58-
// Filter on raw bytes before allocating a String
59-
let path_slice: &[u8] = &record.path.as_slice();
101+
/// PID filter for parsing perf records
102+
pub enum PidFilter {
103+
/// Parse records for all PIDs
104+
All,
105+
/// Parse records only for specific PIDs and their children
106+
TrackedPids(HashSet<pid_t>),
107+
}
60108

61-
// Skip anonymous mappings
62-
if path_slice == b"//anon" {
63-
continue;
109+
impl PidFilter {
110+
/// Check if a PID should be included in parsing
111+
fn should_include(&self, pid: pid_t) -> bool {
112+
match self {
113+
PidFilter::All => true,
114+
PidFilter::TrackedPids(tracked_pids) => tracked_pids.contains(&pid),
64115
}
116+
}
65117

66-
// Skip special mappings like [vdso], [heap], etc.
67-
if path_slice.first() == Some(&b'[') && path_slice.last() == Some(&b']') {
68-
continue;
118+
/// Add a child PID to the filter if we're tracking its parent
119+
/// Returns true if the child was added
120+
fn add_child_if_parent_tracked(&mut self, parent_pid: pid_t, child_pid: pid_t) -> bool {
121+
match self {
122+
PidFilter::All => false, // Already tracking all PIDs
123+
PidFilter::TrackedPids(tracked_pids) => {
124+
if tracked_pids.contains(&parent_pid) {
125+
tracked_pids.insert(child_pid)
126+
} else {
127+
false
128+
}
129+
}
69130
}
131+
}
132+
}
133+
134+
/// Process a single MMAP2 record and add it to the symbols and unwind data maps
135+
fn process_mmap2_record(
136+
record: linux_perf_data::linux_perf_event_reader::Mmap2Record,
137+
symbols_by_pid: &mut HashMap<pid_t, ProcessSymbols>,
138+
unwind_data_by_pid: &mut HashMap<pid_t, Vec<UnwindData>>,
139+
) {
140+
// Check PROT_EXEC early to avoid string allocation for non-executable mappings
141+
if record.protection as i32 & libc::PROT_EXEC == 0 {
142+
return;
143+
}
70144

71-
let record_path_string = String::from_utf8_lossy(path_slice).into_owned();
72-
let end_addr = record.address + record.length;
145+
// Filter on raw bytes before allocating a String
146+
let path_slice: &[u8] = &record.path.as_slice();
73147

74-
trace!(
75-
"Mapping: Pid {}: {:016x}-{:016x} {:08x} {:?} (Prot {:?})",
148+
// Skip anonymous mappings
149+
if path_slice == b"//anon" {
150+
return;
151+
}
152+
153+
// Skip special mappings like [vdso], [heap], etc.
154+
if path_slice.first() == Some(&b'[') && path_slice.last() == Some(&b']') {
155+
return;
156+
}
157+
158+
let record_path_string = String::from_utf8_lossy(path_slice).into_owned();
159+
let end_addr = record.address + record.length;
160+
161+
trace!(
162+
"Mapping: Pid {}: {:016x}-{:016x} {:08x} {:?} (Prot {:?})",
163+
record.pid,
164+
record.address,
165+
end_addr,
166+
record.page_offset,
167+
record_path_string,
168+
record.protection,
169+
);
170+
symbols_by_pid
171+
.entry(record.pid)
172+
.or_insert(ProcessSymbols::new(record.pid))
173+
.add_mapping(
76174
record.pid,
175+
&record_path_string,
77176
record.address,
78177
end_addr,
79178
record.page_offset,
80-
record_path_string,
81-
record.protection,
82179
);
83-
symbols_by_pid
84-
.entry(record.pid)
85-
.or_insert(ProcessSymbols::new(record.pid))
86-
.add_mapping(
87-
record.pid,
88-
&record_path_string,
89-
record.address,
90-
end_addr,
91-
record.page_offset,
92-
);
93180

94-
match UnwindData::new(
95-
record_path_string.as_bytes(),
96-
record.page_offset,
97-
record.address,
98-
end_addr,
99-
None,
100-
) {
101-
Ok(unwind_data) => {
102-
unwind_data_by_pid
103-
.entry(record.pid)
104-
.or_default()
105-
.push(unwind_data);
106-
trace!(
107-
"Added unwind data for {record_path_string} ({:x} - {:x})",
108-
record.address, end_addr
109-
);
110-
}
111-
Err(error) => {
112-
debug!("Failed to create unwind data for module {record_path_string}: {error}");
113-
}
181+
match UnwindData::new(
182+
record_path_string.as_bytes(),
183+
record.page_offset,
184+
record.address,
185+
end_addr,
186+
None,
187+
) {
188+
Ok(unwind_data) => {
189+
unwind_data_by_pid
190+
.entry(record.pid)
191+
.or_default()
192+
.push(unwind_data);
193+
trace!(
194+
"Added unwind data for {record_path_string} ({:x} - {:x})",
195+
record.address, end_addr
196+
);
197+
}
198+
Err(error) => {
199+
debug!("Failed to create unwind data for module {record_path_string}: {error}");
114200
}
115201
}
116-
117-
Ok(MemmapRecordsOutput {
118-
symbols_by_pid,
119-
unwind_data_by_pid,
120-
})
121202
}

0 commit comments

Comments
 (0)