Skip to content

Commit a784a39

Browse files
feat: deduplicate symbol maps, unwind_data and debug info
- Store pid-agnostic data in a file or json map under a mapped `path_key` for each elf - For each pid, store pid specific data, mostly the computed load_bias from where each module was loaded into memory at runtime, alongside a key to retrieve the pid-agnostic data This way, we only write to disk relevant parts of the information.
1 parent 2d950cd commit a784a39

24 files changed

Lines changed: 788 additions & 585 deletions

.gitattributes

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
testdata/perf_map/* filter=lfs diff=lfs merge=lfs -text
22
*.gif binary filter=lfs diff=lfs merge=lfs -text
33
src/executor/wall_time/perf/snapshots/codspeed_runner__executor__wall_time__perf__debug_info__tests__ruff_debug_info.snap filter=lfs diff=lfs merge=lfs -text
4-
src/executor/wall_time/perf/snapshots/codspeed_runner__executor__wall_time__perf__perf_map__tests__ruff_symbols.snap filter=lfs diff=lfs merge=lfs -text
4+
src/executor/wall_time/perf/snapshots/codspeed_runner__executor__wall_time__perf__module_symbols__tests__ruff_symbols.snap filter=lfs diff=lfs merge=lfs -text

.pre-commit-config.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ repos:
1111
- id: check-yaml
1212
- id: check-toml
1313
- id: check-added-large-files
14+
args: ["--maxkb=1000"]
1415
- repo: https://github.com/doublify/pre-commit-rust
1516
rev: v1.0
1617
hooks:

Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ default-run = "codspeed"
1010
name = "codspeed"
1111
path = "src/main.rs"
1212

13-
1413
[dependencies]
1514
anyhow = { workspace = true }
1615
clap = { workspace = true, features = ["env", "color"] }

crates/runner-shared/src/unwind_data.rs

Lines changed: 3 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,8 @@ use std::{hash::DefaultHasher, ops::Range};
88

99
pub const UNWIND_FILE_EXT: &str = "unwind_data";
1010

11-
pub type UnwindData = UnwindDataV2;
12-
13-
impl UnwindDataV3 {
11+
pub type UnwindData = UnwindDataV3;
12+
impl UnwindData {
1413
pub fn parse(reader: &[u8]) -> anyhow::Result<Self> {
1514
let compat: UnwindDataCompat = bincode::deserialize(reader)?;
1615

@@ -93,46 +92,6 @@ impl UnwindDataV2 {
9392
}
9493
}
9594
}
96-
97-
/// Will be removed once the backend has been deployed and we can merge the changes in the
98-
/// runner
99-
pub fn save_to<P: AsRef<std::path::Path>>(&self, folder: P, pid: i32) -> anyhow::Result<()> {
100-
let unwind_data_path = folder.as_ref().join(format!(
101-
"{}_{:x}_{:x}_{}.{UNWIND_FILE_EXT}",
102-
pid,
103-
self.avma_range.start,
104-
self.avma_range.end,
105-
self.timestamp.unwrap_or_default()
106-
));
107-
self.to_file(unwind_data_path)?;
108-
109-
Ok(())
110-
}
111-
112-
pub fn to_file<P: AsRef<std::path::Path>>(&self, path: P) -> anyhow::Result<()> {
113-
if let Ok(true) = std::fs::exists(path.as_ref()) {
114-
// This happens in CI for the root `systemd-run` process which execs into bash which
115-
// also execs into bash, each process reloading common libraries like `ld-linux.so`.
116-
// We detect this when we harvest unwind_data by parsing the perf data (exec-harness).
117-
// Until we properly handle the process tree and deduplicate unwind data, just debug
118-
// log here
119-
// Any relevant occurence should have other symptoms reported by users.
120-
log::debug!(
121-
"{} already exists, file will be truncated",
122-
path.as_ref().display()
123-
);
124-
log::debug!("{} {:x?}", self.path, self.avma_range);
125-
}
126-
127-
let compat = UnwindDataCompat::V2(self.clone());
128-
let file = std::fs::File::create(path.as_ref())?;
129-
const BUFFER_SIZE: usize = 256 * 1024 /* 256 KB */;
130-
131-
let writer = BufWriter::with_capacity(BUFFER_SIZE, file);
132-
bincode::serialize_into(writer, &compat)?;
133-
134-
Ok(())
135-
}
13695
}
13796

13897
impl From<UnwindDataV1> for UnwindDataV2 {
@@ -293,7 +252,7 @@ mod tests {
293252
#[test]
294253
fn test_parse_v3_as_v3() {
295254
// Parse V3 binary artifact as V3 using UnwindData::parse
296-
let parsed_v3 = UnwindDataV3::parse(V3_BINARY).expect("Failed to parse V3 data as V3");
255+
let parsed_v3 = UnwindData::parse(V3_BINARY).expect("Failed to parse V3 data as V3");
297256

298257
// Should match expected V3 data
299258
let expected_v3 = create_sample_v3();

src/executor/wall_time/perf/debug_info.rs

Lines changed: 58 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,19 @@
1-
use crate::executor::wall_time::perf::perf_map::ModuleSymbols;
1+
use crate::executor::wall_time::perf::module_symbols::ModuleSymbols;
22
use crate::prelude::*;
33
use addr2line::{fallible_iterator::FallibleIterator, gimli};
44
use object::{Object, ObjectSection};
5+
use rayon::prelude::*;
56
use runner_shared::debug_info::{DebugInfo, ModuleDebugInfo};
67
use std::path::Path;
78

89
type EndianRcSlice = gimli::EndianRcSlice<gimli::RunTimeEndian>;
910

1011
pub trait ModuleDebugInfoExt {
11-
fn from_symbols<P: AsRef<Path>>(path: P, symbols: &ModuleSymbols) -> anyhow::Result<Self>
12+
fn from_symbols<P: AsRef<Path>>(
13+
path: P,
14+
symbols: &ModuleSymbols,
15+
load_bias: u64,
16+
) -> anyhow::Result<Self>
1217
where
1318
Self: Sized;
1419

@@ -36,12 +41,15 @@ pub trait ModuleDebugInfoExt {
3641

3742
impl ModuleDebugInfoExt for ModuleDebugInfo {
3843
/// Create debug info from existing symbols by looking up file/line in DWARF
39-
fn from_symbols<P: AsRef<Path>>(path: P, symbols: &ModuleSymbols) -> anyhow::Result<Self> {
44+
fn from_symbols<P: AsRef<Path>>(
45+
path: P,
46+
symbols: &ModuleSymbols,
47+
load_bias: u64,
48+
) -> anyhow::Result<Self> {
4049
let content = std::fs::read(path.as_ref())?;
4150
let object = object::File::parse(&*content)?;
4251

4352
let ctx = Self::create_dwarf_context(&object).context("Failed to create DWARF context")?;
44-
let load_bias = symbols.load_bias();
4553
let (mut min_addr, mut max_addr) = (None, None);
4654
let debug_infos = symbols
4755
.symbols()
@@ -96,34 +104,27 @@ impl ModuleDebugInfoExt for ModuleDebugInfo {
96104
}
97105
}
98106

99-
/// Represents all the modules inside a process and their debug info.
100-
pub struct ProcessDebugInfo {
101-
modules: Vec<ModuleDebugInfo>,
102-
}
103-
104-
impl ProcessDebugInfo {
105-
pub fn new(
106-
process_symbols: &crate::executor::wall_time::perf::perf_map::ProcessSymbols,
107-
) -> Self {
108-
let mut modules = Vec::new();
109-
for (path, module_symbols) in process_symbols.modules_with_symbols() {
110-
match ModuleDebugInfo::from_symbols(path, module_symbols) {
111-
Ok(module_debug_info) => {
112-
modules.push(module_debug_info);
113-
}
107+
/// Compute debug info once per unique ELF path from deduplicated symbols.
108+
/// Returns a map of path -> ModuleDebugInfo with `load_bias: 0` (load bias is per-pid).
109+
pub fn debug_info_by_path(
110+
mounted_modules_by_path: &std::collections::HashMap<
111+
std::path::PathBuf,
112+
crate::executor::wall_time::perf::parse_perf_file::MountedModule,
113+
>,
114+
) -> std::collections::HashMap<std::path::PathBuf, ModuleDebugInfo> {
115+
mounted_modules_by_path
116+
.par_iter()
117+
.filter_map(|(path, mounted_module)| {
118+
let module_symbols = mounted_module.module_symbols.as_ref()?;
119+
match ModuleDebugInfo::from_symbols(path, module_symbols, 0) {
120+
Ok(module_debug_info) => Some((path.clone(), module_debug_info)),
114121
Err(error) => {
115122
trace!("Failed to load debug info for module {path:?}: {error}");
123+
None
116124
}
117125
}
118-
}
119-
120-
Self { modules }
121-
}
122-
123-
/// Returns the debug info modules for this process
124-
pub fn modules(self) -> Vec<ModuleDebugInfo> {
125-
self.modules
126-
}
126+
})
127+
.collect()
127128
}
128129

129130
#[cfg(test)]
@@ -134,23 +135,30 @@ mod tests {
134135
fn test_golang_debug_info() {
135136
let (start_addr, end_addr, file_offset) =
136137
(0x0000000000402000_u64, 0x000000000050f000_u64, 0x2000);
137-
let module_symbols = ModuleSymbols::new(
138+
let module_symbols = ModuleSymbols::from_elf("testdata/perf_map/go_fib.bin").unwrap();
139+
let load_bias = ModuleSymbols::compute_load_bias(
138140
"testdata/perf_map/go_fib.bin",
139141
start_addr,
140142
end_addr,
141143
file_offset,
142144
)
143145
.unwrap();
144-
let module_debug_info =
145-
ModuleDebugInfo::from_symbols("testdata/perf_map/go_fib.bin", &module_symbols).unwrap();
146+
let module_debug_info = ModuleDebugInfo::from_symbols(
147+
"testdata/perf_map/go_fib.bin",
148+
&module_symbols,
149+
load_bias,
150+
)
151+
.unwrap();
146152
insta::assert_debug_snapshot!(module_debug_info.debug_infos);
147153
}
148154

149155
#[test]
150156
fn test_cpp_debug_info() {
151157
let (start_addr, end_addr, file_offset) =
152158
(0x0000000000400000_u64, 0x0000000000459000_u64, 0x0);
153-
let module_symbols = ModuleSymbols::new(
159+
let module_symbols =
160+
ModuleSymbols::from_elf("testdata/perf_map/cpp_my_benchmark.bin").unwrap();
161+
let load_bias = ModuleSymbols::compute_load_bias(
154162
"testdata/perf_map/cpp_my_benchmark.bin",
155163
start_addr,
156164
end_addr,
@@ -160,6 +168,7 @@ mod tests {
160168
let mut module_debug_info = ModuleDebugInfo::from_symbols(
161169
"testdata/perf_map/cpp_my_benchmark.bin",
162170
&module_symbols,
171+
load_bias,
163172
)
164173
.unwrap();
165174

@@ -172,27 +181,33 @@ mod tests {
172181
fn test_rust_divan_debug_info() {
173182
const MODULE_PATH: &str = "testdata/perf_map/divan_sleep_benches.bin";
174183

175-
let module_symbols =
176-
ModuleSymbols::new(MODULE_PATH, 0x00005555555a2000, 0x0000555555692000, 0x4d000)
177-
.unwrap();
184+
let module_symbols = ModuleSymbols::from_elf(MODULE_PATH).unwrap();
185+
let load_bias = ModuleSymbols::compute_load_bias(
186+
MODULE_PATH,
187+
0x00005555555a2000,
188+
0x0000555555692000,
189+
0x4d000,
190+
)
191+
.unwrap();
178192
let module_debug_info =
179-
ModuleDebugInfo::from_symbols(MODULE_PATH, &module_symbols).unwrap();
193+
ModuleDebugInfo::from_symbols(MODULE_PATH, &module_symbols, load_bias).unwrap();
180194
insta::assert_debug_snapshot!(module_debug_info.debug_infos);
181195
}
182196

183197
#[test]
184198
fn test_the_algorithms_debug_info() {
185199
const MODULE_PATH: &str = "testdata/perf_map/the_algorithms.bin";
186200

187-
let module_symbols = ModuleSymbols::new(
201+
let module_symbols = ModuleSymbols::from_elf(MODULE_PATH).unwrap();
202+
let load_bias = ModuleSymbols::compute_load_bias(
188203
MODULE_PATH,
189204
0x00005573e59fe000,
190205
0x00005573e5b07000,
191206
0x00052000,
192207
)
193208
.unwrap();
194209
let module_debug_info =
195-
ModuleDebugInfo::from_symbols(MODULE_PATH, &module_symbols).unwrap();
210+
ModuleDebugInfo::from_symbols(MODULE_PATH, &module_symbols, load_bias).unwrap();
196211
insta::assert_debug_snapshot!(module_debug_info.debug_infos);
197212
}
198213

@@ -202,10 +217,12 @@ mod tests {
202217

203218
let (start_addr, end_addr, file_offset) =
204219
(0x0000555555e6d000_u64, 0x0000555556813000_u64, 0x918000);
205-
let module_symbols =
206-
ModuleSymbols::new(MODULE_PATH, start_addr, end_addr, file_offset).unwrap();
220+
let module_symbols = ModuleSymbols::from_elf(MODULE_PATH).unwrap();
221+
let load_bias =
222+
ModuleSymbols::compute_load_bias(MODULE_PATH, start_addr, end_addr, file_offset)
223+
.unwrap();
207224
let module_debug_info =
208-
ModuleDebugInfo::from_symbols(MODULE_PATH, &module_symbols).unwrap();
225+
ModuleDebugInfo::from_symbols(MODULE_PATH, &module_symbols, load_bias).unwrap();
209226
insta::assert_debug_snapshot!(module_debug_info.debug_infos);
210227
}
211228
}

src/executor/wall_time/perf/elf_helper.rs

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -185,18 +185,6 @@ pub fn relative_address_base(object_file: &object::File) -> u64 {
185185
object_file.relative_address_base()
186186
}
187187

188-
pub fn compute_base_avma(
189-
runtime_start_addr: u64,
190-
runtime_end_addr: u64,
191-
runtime_file_offset: u64,
192-
object: &object::File,
193-
) -> anyhow::Result<u64> {
194-
let bias = compute_load_bias(
195-
runtime_start_addr,
196-
runtime_end_addr,
197-
runtime_file_offset,
198-
object,
199-
)?;
200-
let base_svma = relative_address_base(object);
201-
Ok(base_svma.wrapping_add(bias))
188+
pub fn compute_base_avma(base_svma: u64, load_bias: u64) -> u64 {
189+
base_svma.wrapping_add(load_bias)
202190
}

0 commit comments

Comments
 (0)