Skip to content

Commit 3d7bfa1

Browse files
feat: deduplicate symbol maps, unwind_data and debug info
- Store pid-agnostic data in a file or json map under a mapped `path_key` for each elf - For each pid, store pid specific data, mostly the computed load_bias from where each module was loaded into memory at runtime, alongside a key to retrieve the pid-agnostic data This way, we only write to disk relevant parts of the information.
1 parent 5cad97d commit 3d7bfa1

30 files changed

Lines changed: 1034 additions & 610 deletions

.gitattributes

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
testdata/perf_map/* filter=lfs diff=lfs merge=lfs -text
22
*.gif binary filter=lfs diff=lfs merge=lfs -text
33
src/executor/wall_time/perf/snapshots/codspeed_runner__executor__wall_time__perf__debug_info__tests__ruff_debug_info.snap filter=lfs diff=lfs merge=lfs -text
4-
src/executor/wall_time/perf/snapshots/codspeed_runner__executor__wall_time__perf__perf_map__tests__ruff_symbols.snap filter=lfs diff=lfs merge=lfs -text
4+
src/executor/wall_time/perf/snapshots/codspeed_runner__executor__wall_time__perf__module_symbols__tests__ruff_symbols.snap filter=lfs diff=lfs merge=lfs -text

.pre-commit-config.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ repos:
1111
- id: check-yaml
1212
- id: check-toml
1313
- id: check-added-large-files
14+
args: ["--maxkb=1000"]
1415
- repo: https://github.com/doublify/pre-commit-rust
1516
rev: v1.0
1617
hooks:

Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@ default-run = "codspeed"
1010
name = "codspeed"
1111
path = "src/main.rs"
1212

13+
[[bin]]
14+
name = "compare_walltime_output"
15+
path = "src/bin/compare_walltime_output.rs"
16+
1317

1418
[dependencies]
1519
anyhow = { workspace = true }

crates/runner-shared/src/debug_info.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,13 @@ impl std::fmt::Debug for DebugInfo {
2121
}
2222
}
2323

24+
/// Per-pid mounting info referencing a deduplicated debug info entry.
25+
#[derive(Serialize, Deserialize, Clone, Debug)]
26+
pub struct MappedProcessDebugInfo {
27+
pub debug_info_key: String,
28+
pub load_bias: u64,
29+
}
30+
2431
#[derive(Debug, Clone, Serialize, Deserialize)]
2532
pub struct ModuleDebugInfo {
2633
/// The path to the object file on disk (e.g. `/usr/lib/libc.so.6`)

crates/runner-shared/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,6 @@ pub mod debug_info;
33
pub mod fifo;
44
pub mod metadata;
55
pub mod perf_event;
6+
pub mod perf_map;
67
pub mod unwind_data;
78
pub mod walltime_results;

crates/runner-shared/src/metadata.rs

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,15 @@
11
use anyhow::Context;
2+
use libc::pid_t;
23
use serde::{Deserialize, Serialize};
4+
use std::collections::HashMap;
35
use std::io::BufWriter;
46
use std::path::Path;
7+
use std::path::PathBuf;
58

6-
use crate::debug_info::ModuleDebugInfo;
9+
use crate::debug_info::{MappedProcessDebugInfo, ModuleDebugInfo};
710
use crate::fifo::MarkerType;
11+
use crate::perf_map::MappedProcessModuleSymbols;
12+
use crate::unwind_data::MappedProcessUnwindData;
813

914
#[derive(Serialize, Deserialize)]
1015
pub struct PerfMetadata {
@@ -25,9 +30,31 @@ pub struct PerfMetadata {
2530
#[deprecated(note = "Use ExecutionTimestamps in the 'artifacts' module instead")]
2631
pub markers: Vec<MarkerType>,
2732

28-
/// Debug info for all modules across all processes, mapping PID to module debug info
29-
#[serde(default, skip_serializing_if = "std::collections::HashMap::is_empty")]
30-
pub debug_info_by_pid: std::collections::HashMap<i32, Vec<ModuleDebugInfo>>,
33+
/// Kept for backward compatibility, was used before deduplication of debug info entries.
34+
#[serde(default, skip_serializing_if = "HashMap::is_empty")]
35+
#[deprecated(note = "Use 'debug_info' + 'mapped_process_debug_info_by_pid' instead")]
36+
pub debug_info_by_pid: HashMap<pid_t, Vec<ModuleDebugInfo>>,
37+
38+
/// Deduplicated debug info entries, keyed by semantic key
39+
#[serde(default, skip_serializing_if = "HashMap::is_empty")]
40+
pub debug_info: HashMap<String, ModuleDebugInfo>,
41+
42+
/// Per-pid debug info references, mapping PID to list of debug info index + load bias
43+
#[serde(default, skip_serializing_if = "HashMap::is_empty")]
44+
pub mapped_process_debug_info_by_pid: HashMap<pid_t, Vec<MappedProcessDebugInfo>>,
45+
46+
/// Per-pid unwind data references, mapping PID to list of unwind data index + mounting info
47+
#[serde(default, skip_serializing_if = "HashMap::is_empty")]
48+
pub mapped_process_unwind_data_by_pid: HashMap<pid_t, Vec<MappedProcessUnwindData>>,
49+
50+
/// Per-pid symbol references, mapping PID to list of perf map index + load bias
51+
#[serde(default, skip_serializing_if = "HashMap::is_empty")]
52+
pub mapped_process_module_symbols: HashMap<pid_t, Vec<MappedProcessModuleSymbols>>,
53+
54+
/// Mapping from semantic `path_key` to original binary path on host disk
55+
/// Kept for traceability, and if we ever need to reconstruct the original paths from the keys
56+
#[serde(default, skip_serializing_if = "HashMap::is_empty")]
57+
pub path_key_to_path: HashMap<String, PathBuf>,
3158
}
3259

3360
impl PerfMetadata {
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
use serde::{Deserialize, Serialize};
2+
3+
/// File suffix used when registering module symbols in a PID agnostic way.
4+
pub const SYMBOLS_MAP_SUFFIX: &str = "symbols.map";
5+
6+
/// Per-pid mounting info referencing a deduplicated perf map entry.
7+
#[derive(Serialize, Deserialize, Clone, Debug)]
8+
pub struct MappedProcessModuleSymbols {
9+
pub perf_map_key: String,
10+
pub load_bias: u64,
11+
}

0 commit comments

Comments
 (0)