-
Notifications
You must be signed in to change notification settings - Fork 20
Expand file tree
/
Copy pathparse_perf_file.rs
More file actions
282 lines (249 loc) · 9.73 KB
/
parse_perf_file.rs
File metadata and controls
282 lines (249 loc) · 9.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
use super::module_symbols::ModuleSymbols;
use super::unwind_data::unwind_data_from_elf;
use crate::prelude::*;
use libc::pid_t;
use linux_perf_data::PerfFileReader;
use linux_perf_data::PerfFileRecord;
use linux_perf_data::linux_perf_event_reader::EventRecord;
use linux_perf_data::linux_perf_event_reader::RecordType;
use runner_shared::unwind_data::ProcessUnwindData;
use runner_shared::unwind_data::UnwindData;
use std::collections::HashMap;
use std::collections::HashSet;
use std::path::Path;
use std::path::PathBuf;
#[derive(Default)]
pub struct LoadedModule {
/// Symbols extracted from the mapped ELF file
pub module_symbols: Option<ModuleSymbols>,
/// Unwind data extracted from the mapped ELF file
pub unwind_data: Option<UnwindData>,
/// Per-process mounting information
pub process_loaded_modules: HashMap<pid_t, ProcessLoadedModule>,
}
#[derive(Default)]
pub struct ProcessLoadedModule {
/// Load bias used to adjust declared elf addresses to their actual runtime addresses
/// The bias is the difference between where the segment *actually* is in memory versus where the ELF file *preferred* it to be
pub symbols_load_bias: Option<u64>,
/// Unwind data specific to the process mounting, derived from both load bias and the actual unwind data
pub process_unwind_data: Option<ProcessUnwindData>,
}
impl LoadedModule {
pub fn pids(&self) -> impl Iterator<Item = pid_t> {
self.process_loaded_modules.keys().copied()
}
}
pub struct MemmapRecordsOutput {
/// Module symbols and the computed load bias for each pid that maps the ELF path.
pub loaded_modules_by_path: HashMap<PathBuf, LoadedModule>,
pub tracked_pids: HashSet<pid_t>,
/// Jitdump file paths discovered from MMAP2 records, keyed by PID.
pub jit_dump_paths_by_pid: HashMap<pid_t, Vec<PathBuf>>,
}
/// Parse the perf file at `perf_file_path` and look for MMAP2 records for the given `pids`.
/// If the pids filter is empty, all MMAP2 records will be parsed.
///
/// Returns process symbols and unwind data for the executable mappings found in the perf file.
pub fn parse_for_memmap2<P: AsRef<Path>>(
perf_file_path: P,
mut pid_filter: PidFilter,
) -> Result<MemmapRecordsOutput> {
let mut loaded_modules_by_path = HashMap::<PathBuf, LoadedModule>::new();
let mut jit_dump_paths_by_pid = HashMap::<pid_t, Vec<PathBuf>>::new();
// 1MiB buffer
let reader = std::io::BufReader::with_capacity(
1024 * 1024,
std::fs::File::open(perf_file_path.as_ref())?,
);
let PerfFileReader {
mut perf_file,
mut record_iter,
} = PerfFileReader::parse_pipe(reader)?;
while let Some(record) = record_iter.next_record(&mut perf_file).unwrap() {
let PerfFileRecord::EventRecord { record, .. } = record else {
continue;
};
// Check the type from the raw record to avoid parsing overhead since we do not care about
// most records.
match record.record_type {
RecordType::FORK => {
// Process fork events to track children (and children of children) of filtered PIDs
let Ok(parsed_record) = record.parse() else {
continue;
};
let EventRecord::Fork(fork_record) = parsed_record else {
continue;
};
if pid_filter.add_child_if_parent_tracked(fork_record.ppid, fork_record.pid) {
trace!(
"Fork: Tracking child PID {} from parent PID {}",
fork_record.pid, fork_record.ppid
);
}
}
RecordType::MMAP2 => {
let Ok(parsed_record) = record.parse() else {
continue;
};
// Should never fail since we already checked the type in the raw record
let EventRecord::Mmap2(mmap2_record) = parsed_record else {
continue;
};
// Filter on pid early to avoid string allocation for unwanted records
if !pid_filter.should_include(mmap2_record.pid) {
continue;
}
// Collect jitdump file paths before the PROT_EXEC filter in process_mmap2_record
// skips them. JIT runtimes mmap the jitdump file so perf records it.
// Match perf's jit_detect(): basename must be `jit-<pid>.dump`.
if is_jit_dump_path(&mmap2_record.path.as_slice(), mmap2_record.pid) {
let path = PathBuf::from(
String::from_utf8_lossy(&mmap2_record.path.as_slice()).into_owned(),
);
if path.exists() {
debug!("Found jitdump path from MMAP2 record: {path:?}");
jit_dump_paths_by_pid
.entry(mmap2_record.pid)
.or_default()
.push(path);
}
}
process_mmap2_record(mmap2_record, &mut loaded_modules_by_path);
}
_ => continue,
}
}
// Retrieve the set of PIDs we ended up tracking after processing all records
let tracked_pids: HashSet<pid_t> = match pid_filter {
PidFilter::All => loaded_modules_by_path
.iter()
.flat_map(|(_, loaded)| loaded.pids())
.collect(),
PidFilter::TrackedPids(tracked) => tracked,
};
Ok(MemmapRecordsOutput {
loaded_modules_by_path,
tracked_pids,
jit_dump_paths_by_pid,
})
}
/// PID filter for parsing perf records
#[derive(Debug)]
pub enum PidFilter {
/// Parse records for all PIDs
All,
/// Parse records only for specific PIDs and their children
TrackedPids(HashSet<pid_t>),
}
impl PidFilter {
/// Check if a PID should be included in parsing
fn should_include(&self, pid: pid_t) -> bool {
match self {
PidFilter::All => true,
PidFilter::TrackedPids(tracked_pids) => tracked_pids.contains(&pid),
}
}
/// Add a child PID to the filter if we're tracking its parent
/// Returns true if the child was added
fn add_child_if_parent_tracked(&mut self, parent_pid: pid_t, child_pid: pid_t) -> bool {
match self {
PidFilter::All => false, // Already tracking all PIDs
PidFilter::TrackedPids(tracked_pids) => {
if tracked_pids.contains(&parent_pid) {
tracked_pids.insert(child_pid)
} else {
false
}
}
}
}
}
/// Returns true if the path basename matches perf's `jit_detect()` pattern: `jit-<pid>.dump`,
/// where `<pid>` must match the MMAP2 record's PID.
fn is_jit_dump_path(path: &[u8], pid: pid_t) -> bool {
let Some(pos) = path.iter().rposition(|&b| b == b'/') else {
return false;
};
let basename = &path[pos + 1..];
let expected = format!("jit-{pid}.dump");
basename == expected.as_bytes()
}
/// Process a single MMAP2 record and add it to the symbols and unwind data maps
fn process_mmap2_record(
record: linux_perf_data::linux_perf_event_reader::Mmap2Record,
loaded_modules_by_path: &mut HashMap<PathBuf, LoadedModule>,
) {
// Check PROT_EXEC early to avoid string allocation for non-executable mappings
if record.protection as i32 & libc::PROT_EXEC == 0 {
return;
}
// Filter on raw bytes before allocating a String
let path_slice: &[u8] = &record.path.as_slice();
// Skip anonymous mappings
if path_slice == b"//anon" {
return;
}
// Skip special mappings like [vdso], [heap], etc.
if path_slice.first() == Some(&b'[') && path_slice.last() == Some(&b']') {
return;
}
let record_path_string = String::from_utf8_lossy(path_slice).into_owned();
let record_path = PathBuf::from(&record_path_string);
let end_addr = record.address + record.length;
trace!(
"Mapping: Pid {}: {:016x}-{:016x} {:08x} {:?} (Prot {:?})",
record.pid,
record.address,
end_addr,
record.page_offset,
record_path_string,
record.protection,
);
let load_bias = match ModuleSymbols::compute_load_bias(
&record_path,
record.address,
end_addr,
record.page_offset,
) {
Ok(load_bias) => load_bias,
Err(e) => {
debug!("Failed to compute load bias for {record_path_string}: {e}");
return;
}
};
let loaded_module = loaded_modules_by_path
.entry(record_path.clone())
.or_default();
let process_loaded_module = loaded_module
.process_loaded_modules
.entry(record.pid)
.or_default();
// Extract module symbols if it's no module symbol from path
if loaded_module.module_symbols.is_none() {
match ModuleSymbols::from_elf(&record_path) {
Ok(symbols) => loaded_module.module_symbols = Some(symbols),
Err(error) => {
debug!("Failed to load symbols for module {record_path_string}: {error}");
}
}
}
// Store load bias for this process mounting
process_loaded_module.symbols_load_bias = Some(load_bias);
// Extract unwind_data
match unwind_data_from_elf(
record_path_string.as_bytes(),
record.address,
end_addr,
None,
load_bias,
) {
Ok((unwind_data, process_unwind_data)) => {
loaded_module.unwind_data = Some(unwind_data);
process_loaded_module.process_unwind_data = Some(process_unwind_data);
}
Err(error) => {
debug!("Failed to load unwind data for module {record_path_string}: {error}");
}
};
}