Skip to content

Commit 18aec79

Browse files
committed
feat: add perf metadata; add python support
1 parent 03e29aa commit 18aec79

4 files changed

Lines changed: 103 additions & 32 deletions

File tree

src/run/runner/valgrind/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
pub mod executor;
2-
mod helpers;
2+
pub mod helpers;
33
mod measure;
44
mod setup;
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
// !!!!!!!!!!!!!!!!!!!!!!!!
2+
// !! DO NOT TOUCH BELOW !!
3+
// !!!!!!!!!!!!!!!!!!!!!!!!
4+
// Has to be in sync with `perf-parser`.
5+
//
6+
7+
use std::{collections::HashMap, path::Path};
8+
9+
use serde::{Deserialize, Serialize};
10+
11+
#[derive(Serialize, Deserialize)]
12+
pub struct PerfMetadata {
13+
/// The URIs of the benchmarks in the order they were executed.
14+
pub bench_order_by_pid: HashMap<u32, Vec<String>>,
15+
16+
/// Modules that should be ignored and removed from the folded trace and callgraph (e.g. python interpreter)
17+
pub ignored_modules: Vec<(String, u64, u64)>,
18+
}
19+
20+
impl PerfMetadata {
21+
pub fn save_to<P: AsRef<Path>>(&self, path: P) -> anyhow::Result<()> {
22+
let file = std::fs::File::create(path.as_ref().join("perf.metadata"))?;
23+
serde_json::to_writer(file, self)?;
24+
Ok(())
25+
}
26+
}

src/run/runner/wall_time/perf/mod.rs

Lines changed: 61 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,24 @@
11
use crate::prelude::*;
22
use crate::run::runner::helpers::run_command_with_log_pipe::run_command_with_log_pipe_and_callback;
33
use crate::run::runner::helpers::setup::run_with_sudo;
4+
use crate::run::runner::valgrind::helpers::ignored_objects_path::get_objects_path_to_ignore;
5+
use crate::run::runner::valgrind::helpers::perf_maps::harvest_perf_maps_for_pids;
46
use anyhow::Context;
57
use fifo::{PerfFifo, RunnerFifo};
68
use futures::stream::FuturesUnordered;
9+
use metadata::PerfMetadata;
710
use perf_map::ProcessSymbols;
811
use procfs::process::MMPermissions;
912
use shared::Command as FifoCommand;
13+
use std::collections::HashSet;
1014
use std::path::PathBuf;
1115
use std::process::Command;
1216
use std::time::Duration;
1317
use std::{cell::OnceCell, collections::HashMap, process::ExitStatus};
1418
use tempfile::TempDir;
1519
use unwind_data::UnwindData;
1620

21+
mod metadata;
1722
mod shared;
1823
pub use shared::*;
1924

@@ -74,10 +79,21 @@ impl PerfRunner {
7479
.prefix(PERF_DATA_PREFIX)
7580
.tempfile_in(&self.perf_dir)?;
7681

82+
// Detect the mode based on the command to be executed
83+
let cg_mode = if bench_cmd.contains("cargo") {
84+
"dwarf"
85+
} else if bench_cmd.contains("pytest") {
86+
"fp"
87+
} else {
88+
warn!("Couldn't detect call graph mode for command: {}", bench_cmd);
89+
"dwarf"
90+
};
91+
debug!("Using call graph mode: {}", cg_mode);
92+
7793
cmd.args([
7894
"-c",
7995
&format!(
80-
"perf record --quiet --user-callchains --freq=999 --switch-output --control=fifo:{},{} --delay=-1 -g --call-graph=dwarf --output={} -- {bench_cmd}",
96+
"perf record --quiet --user-callchains --freq=999 --switch-output --control=fifo:{},{} --delay=-1 -g --call-graph={cg_mode} --output={} -- {bench_cmd}",
8197
perf_fifo.ctl_fifo_path.to_string_lossy(),
8298
perf_fifo.ack_fifo_path.to_string_lossy(),
8399
perf_file.path().to_string_lossy()
@@ -109,12 +125,6 @@ impl PerfRunner {
109125
pub async fn save_files_to(&self, profile_folder: &PathBuf) -> anyhow::Result<()> {
110126
let start = std::time::Instant::now();
111127

112-
let bench_data = self
113-
.benchmark_data
114-
.get()
115-
.expect("Benchmark order is not available");
116-
bench_data.save_to(profile_folder).unwrap();
117-
118128
// Copy the perf data files to the profile folder
119129
let copy_tasks = std::fs::read_dir(&self.perf_dir)?
120130
.filter_map(|entry| entry.ok())
@@ -141,16 +151,33 @@ impl PerfRunner {
141151
let dst_path = profile_folder.join(dst_file_name);
142152
tokio::fs::copy(src_path, dst_path).await?;
143153

144-
Ok::<_, anyhow::Error>(())
154+
Ok::<_, anyhow::Error>(pid)
145155
})
146156
})
147157
.collect::<FuturesUnordered<_>>();
158+
159+
let bench_data = self
160+
.benchmark_data
161+
.get()
162+
.expect("Benchmark order is not available");
148163
assert_eq!(
149164
copy_tasks.len(),
150165
bench_data.bench_count(),
151166
"Benchmark count mismatch"
152167
);
153-
futures::future::try_join_all(copy_tasks).await?;
168+
169+
// Harvest the perf maps generated by python. This will copy the perf
170+
// maps from /tmp to the profile folder. We have to write our own perf
171+
// maps to these files AFTERWARDS, otherwise it'll be overwritten!
172+
let perf_map_pids = futures::future::try_join_all(copy_tasks)
173+
.await?
174+
.into_iter()
175+
.filter_map(Result::ok)
176+
.collect::<HashSet<_>>();
177+
harvest_perf_maps_for_pids(profile_folder, &perf_map_pids).await?;
178+
179+
// Append perf maps, unwind info and other metadata
180+
bench_data.save_to(profile_folder).unwrap();
154181

155182
let elapsed = start.elapsed();
156183
debug!("Perf teardown took: {:?}", elapsed);
@@ -260,11 +287,31 @@ impl BenchmarkData {
260287
}
261288
}
262289

263-
for (pid, orders) in &self.bench_order_by_pid {
264-
let dst_file_name = format!("{}_.bench_order", pid);
265-
let dst_path = path.as_ref().join(dst_file_name);
266-
std::fs::write(dst_path, orders.join("\n"))?;
267-
}
290+
let metadata = PerfMetadata {
291+
bench_order_by_pid: self.bench_order_by_pid.clone(),
292+
ignored_modules: {
293+
let mut to_ignore = vec![];
294+
295+
// Check if any of the ignored modules has been loaded in the process
296+
for ignore_path in get_objects_path_to_ignore() {
297+
for proc in self.symbols_by_pid.values() {
298+
if let Some(mapping) = proc.module_mapping(&ignore_path) {
299+
let (Some((base_addr, _)), Some((_, end_addr))) = (
300+
mapping.iter().min_by_key(|(base_addr, _)| base_addr),
301+
mapping.iter().max_by_key(|(_, end_addr)| end_addr),
302+
) else {
303+
continue;
304+
};
305+
306+
to_ignore.push((ignore_path.clone(), *base_addr, *end_addr));
307+
}
308+
}
309+
}
310+
311+
to_ignore
312+
},
313+
};
314+
metadata.save_to(&path).unwrap();
268315

269316
Ok(())
270317
}

src/run/runner/wall_time/perf/perf_map.rs

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -79,31 +79,20 @@ impl ModuleSymbols {
7979

8080
/// Represents all the modules inside a process and their symbols.
8181
pub struct ProcessSymbols {
82-
modules_bounds: HashMap<PathBuf, Vec<(u64, u64)>>,
83-
modules: HashMap<PathBuf, ModuleSymbols>,
8482
pid: u32,
83+
module_mappings: HashMap<PathBuf, Vec<(u64, u64)>>,
84+
modules: HashMap<PathBuf, ModuleSymbols>,
8585
}
8686

8787
impl ProcessSymbols {
8888
pub fn new(pid: u32) -> Self {
8989
Self {
90-
modules_bounds: HashMap::new(),
91-
modules: HashMap::new(),
9290
pid,
91+
module_mappings: HashMap::new(),
92+
modules: HashMap::new(),
9393
}
9494
}
9595

96-
#[allow(dead_code)] // TODO: Needed for python, remove allow later
97-
pub fn remove_module<P: AsRef<Path>>(&mut self, path: P) {
98-
self.modules.remove(path.as_ref());
99-
}
100-
101-
#[allow(dead_code)] // TODO: Needed for python, remove allow later
102-
pub fn add_module(&mut self, module: ModuleSymbols) {
103-
let entry = self.modules.entry(module.path.clone());
104-
entry.or_insert(module);
105-
}
106-
10796
pub fn add_mapping<P: AsRef<Path>>(
10897
&mut self,
10998
pid: u32,
@@ -130,12 +119,21 @@ impl ProcessSymbols {
130119
}
131120
}
132121

133-
self.modules_bounds
122+
self.module_mappings
134123
.entry(path.clone())
135124
.or_default()
136125
.push((start_addr, end_addr));
137126
}
138127

128+
pub fn module_mapping<P: AsRef<std::path::Path>>(
129+
&self,
130+
module_path: P,
131+
) -> Option<&[(u64, u64)]> {
132+
self.module_mappings
133+
.get(module_path.as_ref())
134+
.map(|bounds| bounds.as_slice())
135+
}
136+
139137
pub fn save_to<P: AsRef<std::path::Path>>(&self, folder: P) -> anyhow::Result<()> {
140138
if self.modules.is_empty() {
141139
return Ok(());
@@ -144,7 +142,7 @@ impl ProcessSymbols {
144142
let symbols_path = folder.as_ref().join(format!("perf-{}.map", self.pid));
145143
for module in self.modules.values() {
146144
let Some((base_addr, _)) = self
147-
.modules_bounds
145+
.module_mappings
148146
.get(&module.path)
149147
.and_then(|bounds| bounds.iter().min_by_key(|(start, _)| start))
150148
else {

0 commit comments

Comments
 (0)