Skip to content

Commit ad25e2a

Browse files
committed
feat: extract symbols from separate debug files via .gnu_debuglink
Move find_debug_file to elf_helper.rs so it can be shared, and use it in ModuleSymbols::from_elf to merge symbols from the .debug file when the main binary has an incomplete symbol table (e.g. only .dynsym). This fixes "Unknown symbol" entries in flamegraphs for internal functions in stripped system libraries like ld-linux, where the full .symtab is only available in the debug package.
1 parent e615aee commit ad25e2a

3 files changed

Lines changed: 98 additions & 40 deletions

File tree

src/executor/wall_time/perf/debug_info.rs

Lines changed: 1 addition & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use super::elf_helper::find_debug_file;
12
use super::parse_perf_file::LoadedModule;
23
use crate::executor::wall_time::perf::module_symbols::ModuleSymbols;
34
use crate::prelude::*;
@@ -11,41 +12,6 @@ use std::path::PathBuf;
1112

1213
type EndianRcSlice = gimli::EndianRcSlice<gimli::RunTimeEndian>;
1314

14-
/// Search for a separate debug info file using `.gnu_debuglink`.
15-
///
16-
/// Follows the standard GDB search order:
17-
/// 1. `<binary_dir>/<debuglink>`
18-
/// 2. `<binary_dir>/.debug/<debuglink>`
19-
/// 3. `/usr/lib/debug/<binary_dir>/<debuglink>`
20-
fn find_debug_file(object: &object::File, binary_path: &Path) -> Option<PathBuf> {
21-
let (debuglink, expected_crc) = object.gnu_debuglink().ok()??;
22-
let debuglink = std::str::from_utf8(debuglink).ok()?;
23-
let dir = binary_path.parent()?;
24-
25-
let candidates = [
26-
dir.join(debuglink),
27-
dir.join(".debug").join(debuglink),
28-
Path::new("/usr/lib/debug")
29-
.join(dir.strip_prefix("/").unwrap_or(dir))
30-
.join(debuglink),
31-
];
32-
33-
candidates.into_iter().find(|p| {
34-
let Ok(content) = std::fs::read(p) else {
35-
return false;
36-
};
37-
let actual_crc = crc32fast::hash(&content);
38-
if actual_crc != expected_crc {
39-
trace!(
40-
"CRC mismatch for {}: expected {expected_crc:#x}, got {actual_crc:#x}",
41-
p.display()
42-
);
43-
return false;
44-
}
45-
true
46-
})
47-
}
48-
4915
pub trait ModuleDebugInfoExt {
5016
fn from_symbols<P: AsRef<Path>>(
5117
path: P,

src/executor/wall_time/perf/elf_helper.rs

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
//! Based on this: https://github.com/mstange/samply/blob/4a5afec57b7c68b37ecde12b5a258de523e89463/samply/src/linux_shared/svma_file_range.rs#L8
22
33
use anyhow::Context;
4+
use log::trace;
45
use object::Object;
56
use object::ObjectSegment;
7+
use std::path::{Path, PathBuf};
68

79
// A file range in an object file, such as a segment or a section,
810
// for which we know the corresponding Stated Virtual Memory Address (SVMA).
@@ -188,3 +190,40 @@ pub fn relative_address_base(object_file: &object::File) -> u64 {
188190
pub fn compute_base_avma(base_svma: u64, load_bias: u64) -> u64 {
189191
base_svma.wrapping_add(load_bias)
190192
}
193+
194+
/// Search for a separate debug info file using `.gnu_debuglink`.
195+
///
196+
/// Follows the standard GDB search order:
197+
/// 1. `<binary_dir>/<debuglink>`
198+
/// 2. `<binary_dir>/.debug/<debuglink>`
199+
/// 3. `/usr/lib/debug/<binary_dir>/<debuglink>`
200+
///
201+
/// Validates the CRC32 checksum to avoid using stale debug files.
202+
pub fn find_debug_file(object: &object::File, binary_path: &Path) -> Option<PathBuf> {
203+
let (debuglink, expected_crc) = object.gnu_debuglink().ok()??;
204+
let debuglink = std::str::from_utf8(debuglink).ok()?;
205+
let dir = binary_path.parent()?;
206+
207+
let candidates = [
208+
dir.join(debuglink),
209+
dir.join(".debug").join(debuglink),
210+
Path::new("/usr/lib/debug")
211+
.join(dir.strip_prefix("/").unwrap_or(dir))
212+
.join(debuglink),
213+
];
214+
215+
candidates.into_iter().find(|p| {
216+
let Ok(content) = std::fs::read(p) else {
217+
return false;
218+
};
219+
let actual_crc = crc32fast::hash(&content);
220+
if actual_crc != expected_crc {
221+
trace!(
222+
"CRC mismatch for {}: expected {expected_crc:#x}, got {actual_crc:#x}",
223+
p.display()
224+
);
225+
return false;
226+
}
227+
true
228+
})
229+
}

src/executor/wall_time/perf/module_symbols.rs

Lines changed: 58 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
use crate::executor::wall_time::perf::elf_helper;
2+
use log::trace;
23
use object::{Object, ObjectSymbol, ObjectSymbolTable};
34
use runner_shared::module_symbols::SYMBOLS_MAP_SUFFIX;
45
use std::{
6+
collections::HashSet,
57
fmt::Debug,
68
io::{BufWriter, Write},
79
path::Path,
@@ -55,11 +57,8 @@ impl ModuleSymbols {
5557
)
5658
}
5759

58-
/// Extract symbols from an ELF file (pid-agnostic, load_bias = 0).
59-
pub fn from_elf<P: AsRef<Path>>(path: P) -> anyhow::Result<Self> {
60-
let content = std::fs::read(path.as_ref())?;
61-
let object = object::File::parse(&*content)?;
62-
60+
/// Extract raw symbols from an object file's `.symtab` and `.dynsym` tables.
61+
fn extract_symbols_from_object(object: &object::File) -> Vec<Symbol> {
6362
let mut symbols = Vec::new();
6463

6564
if let Some(symbol_table) = object.symbol_table() {
@@ -82,6 +81,44 @@ impl ModuleSymbols {
8281
}));
8382
}
8483

84+
symbols
85+
}
86+
87+
/// Extract symbols from an ELF file (pid-agnostic, load_bias = 0).
88+
///
89+
/// If the binary has a `.gnu_debuglink` pointing to a separate debug file,
90+
/// symbols from that file are merged in. This provides full symbol coverage
91+
/// for stripped system libraries when debug packages are installed.
92+
pub fn from_elf<P: AsRef<Path>>(path: P) -> anyhow::Result<Self> {
93+
let content = std::fs::read(path.as_ref())?;
94+
let object = object::File::parse(&*content)?;
95+
96+
let mut symbols = Self::extract_symbols_from_object(&object);
97+
98+
// Merge symbols from a separate debug file if available
99+
if let Some(debug_path) = elf_helper::find_debug_file(&object, path.as_ref()) {
100+
trace!(
101+
"Merging symbols from debug file {:?} for {:?}",
102+
debug_path,
103+
path.as_ref()
104+
);
105+
let debug_symbols = std::fs::read(&debug_path).ok().and_then(|c| {
106+
object::File::parse(&*c)
107+
.ok()
108+
.map(|o| Self::extract_symbols_from_object(&o))
109+
});
110+
111+
if let Some(debug_symbols) = debug_symbols {
112+
let existing: HashSet<(u64, String)> =
113+
symbols.iter().map(|s| (s.addr, s.name.clone())).collect();
114+
symbols.extend(
115+
debug_symbols
116+
.into_iter()
117+
.filter(|s| !existing.contains(&(s.addr, s.name.clone()))),
118+
);
119+
}
120+
}
121+
85122
// Filter out
86123
// - ARM ELF "mapping symbols" (https://github.com/torvalds/linux/blob/9448598b22c50c8a5bb77a9103e2d49f134c9578/tools/perf/util/symbol-elf.c#L1591C1-L1598C4)
87124
// - symbols that have en empty name
@@ -227,4 +264,20 @@ mod tests {
227264
let module_symbols = ModuleSymbols::from_elf(MODULE_PATH).unwrap();
228265
insta::assert_debug_snapshot!(module_symbols);
229266
}
267+
268+
#[test]
269+
fn test_stripped_binary_merges_debug_file_symbols() {
270+
// The stripped binary has only .dynsym, the .debug file has the full .symtab.
271+
// from_elf should merge both via .gnu_debuglink.
272+
let stripped_only =
273+
ModuleSymbols::from_elf("testdata/perf_map/cpp_my_benchmark_stripped.bin").unwrap();
274+
let full = ModuleSymbols::from_elf("testdata/perf_map/cpp_my_benchmark.bin").unwrap();
275+
276+
assert!(
277+
stripped_only.symbols().len() == full.symbols().len(),
278+
"stripped+debug ({}) should have the same number of symbols as the original ({})",
279+
stripped_only.symbols().len(),
280+
full.symbols().len(),
281+
);
282+
}
230283
}

0 commit comments

Comments
 (0)