Skip to content

Commit 9b26b2b

Browse files
committed
feat: resolve debug info and symbols from separate debug files
Stripped system libraries (e.g. libc.so.6) ship without DWARF and with only .dynsym. Installing debug packages like libc6-dbg drops a companion .debug file, but the runner had no way to find it — leaving "Unknown symbol" entries and missing file/line info in flamegraphs. Resolve debug files via two mechanisms: 1. Build-ID lookup at /usr/lib/debug/.build-id/XX/YYYYYY.debug, which is how Ubuntu/Debian debug packages actually install files. 2. .gnu_debuglink with the standard GDB search order (same dir, .debug/ subdir, /usr/lib/debug/<path>/) and CRC32 validation to avoid using stale debug files after binary upgrades. The resolved debug file is used both for DWARF (file/line lookup in ModuleDebugInfo) and for merging the full .symtab into ModuleSymbols.
1 parent f3f1d11 commit 9b26b2b

5 files changed

Lines changed: 308 additions & 7 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ shell-words = "1.1.0"
7373
rmp-serde = "1.3.0"
7474
uuid = { version = "1.21.0", features = ["v4"] }
7575
which = "8.0.2"
76+
crc32fast = "1.5.0"
7677

7778
[target.'cfg(target_os = "linux")'.dependencies]
7879
procfs = "0.17.0"

src/executor/wall_time/perf/debug_info.rs

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use super::elf_helper::find_debug_file;
12
use super::parse_perf_file::LoadedModule;
23
use crate::executor::wall_time::perf::module_symbols::ModuleSymbols;
34
use crate::prelude::*;
@@ -43,7 +44,10 @@ pub trait ModuleDebugInfoExt {
4344
}
4445

4546
impl ModuleDebugInfoExt for ModuleDebugInfo {
46-
/// Create debug info from existing symbols by looking up file/line in DWARF
47+
/// Create debug info from existing symbols by looking up file/line in DWARF.
48+
///
49+
/// If the binary has no DWARF sections, tries to find a separate debug file
50+
/// via `.gnu_debuglink` (e.g. installed by `libc6-dbg`).
4751
fn from_symbols<P: AsRef<Path>>(
4852
path: P,
4953
symbols: &ModuleSymbols,
@@ -52,7 +56,25 @@ impl ModuleDebugInfoExt for ModuleDebugInfo {
5256
let content = std::fs::read(path.as_ref())?;
5357
let object = object::File::parse(&*content)?;
5458

55-
let ctx = Self::create_dwarf_context(&object).context("Failed to create DWARF context")?;
59+
// If the binary has no DWARF, try a separate debug file via .gnu_debuglink
60+
let ctx = if object.section_by_name(".debug_info").is_some() {
61+
Self::create_dwarf_context(&object).context("Failed to create DWARF context")?
62+
} else {
63+
let debug_path = find_debug_file(&object, path.as_ref()).with_context(|| {
64+
format!(
65+
"No DWARF in {:?} and no separate debug file found",
66+
path.as_ref()
67+
)
68+
})?;
69+
trace!(
70+
"Using separate debug file {debug_path:?} for {:?}",
71+
path.as_ref()
72+
);
73+
let debug_content = std::fs::read(&debug_path)?;
74+
let debug_object = object::File::parse(&*debug_content)?;
75+
Self::create_dwarf_context(&debug_object)
76+
.context("Failed to create DWARF context from debug file")?
77+
};
5678
let (mut min_addr, mut max_addr) = (None, None);
5779
let debug_infos = symbols
5880
.symbols()
@@ -213,6 +235,31 @@ mod tests {
213235
insta::assert_debug_snapshot!(module_debug_info.debug_infos);
214236
}
215237

238+
#[rstest::rstest]
239+
#[case::cpp(
240+
"testdata/perf_map/cpp_my_benchmark_stripped.bin",
241+
"testdata/perf_map/cpp_my_benchmark.debug"
242+
)]
243+
#[case::libc("testdata/perf_map/libc.so.6", "testdata/perf_map/libc.so.6.debug")]
244+
fn test_stripped_binary_with_debuglink_resolves_debug_info(
245+
#[case] binary: &str,
246+
#[case] debug_file: &str,
247+
) {
248+
let (_dir, binary, _debug_file) = super::super::elf_helper::setup_debuglink_tmpdir(
249+
Path::new(binary),
250+
Path::new(debug_file),
251+
);
252+
253+
let module_symbols = ModuleSymbols::from_elf(&binary).unwrap();
254+
assert!(!module_symbols.symbols().is_empty());
255+
256+
let module_debug_info = ModuleDebugInfo::from_symbols(&binary, &module_symbols, 0).unwrap();
257+
assert!(
258+
!module_debug_info.debug_infos.is_empty(),
259+
"DWARF should resolve via .gnu_debuglink"
260+
);
261+
}
262+
216263
#[test]
217264
fn test_ruff_debug_info() {
218265
const MODULE_PATH: &str = "testdata/perf_map/ty_walltime";

src/executor/wall_time/perf/elf_helper.rs

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
//! Based on this: https://github.com/mstange/samply/blob/4a5afec57b7c68b37ecde12b5a258de523e89463/samply/src/linux_shared/svma_file_range.rs#L8
22
33
use anyhow::Context;
4+
use log::trace;
45
use object::Object;
56
use object::ObjectSegment;
7+
use std::path::{Path, PathBuf};
68

79
// A file range in an object file, such as a segment or a section,
810
// for which we know the corresponding Stated Virtual Memory Address (SVMA).
@@ -188,3 +190,175 @@ pub fn relative_address_base(object_file: &object::File) -> u64 {
188190
pub fn compute_base_avma(base_svma: u64, load_bias: u64) -> u64 {
189191
base_svma.wrapping_add(load_bias)
190192
}
193+
194+
const DEFAULT_DEBUG_DIR: &str = "/usr/lib/debug";
195+
196+
/// Search for a separate debug info file.
197+
///
198+
/// Tries two mechanisms in order:
199+
/// 1. **Build-ID path**: `<debug_dir>/.build-id/<XX>/<YYYYYY...>.debug`
200+
/// 2. **`.gnu_debuglink`** with GDB search order and CRC32 validation
201+
///
202+
/// This is the same order GDB uses (see [Separate Debug Files]). Build-ID is
203+
/// preferred because it's a cryptographic hash of the binary contents, so a
204+
/// match cannot be a false positive — whereas `.gnu_debuglink` matches by
205+
/// filename and relies on a CRC32 check. On Debian/Ubuntu, `*-dbg` and
206+
/// `*-dbgsym` packages install their files under `/usr/lib/debug/.build-id/`,
207+
/// so this path is what actually resolves stripped system libraries in
208+
/// practice.
209+
///
210+
/// [Separate Debug Files]: https://sourceware.org/gdb/current/onlinedocs/gdb.html/Separate-Debug-Files.html
211+
pub fn find_debug_file(object: &object::File, binary_path: &Path) -> Option<PathBuf> {
212+
find_debug_file_in(object, binary_path, Path::new(DEFAULT_DEBUG_DIR))
213+
}
214+
215+
fn find_debug_file_in(
216+
object: &object::File,
217+
binary_path: &Path,
218+
debug_dir: &Path,
219+
) -> Option<PathBuf> {
220+
if let Some(path) = find_debug_file_by_build_id(object, debug_dir) {
221+
return Some(path);
222+
}
223+
find_debug_file_by_debuglink(object, binary_path, debug_dir)
224+
}
225+
226+
/// Tries to find a debug file using the build-id.
227+
///
228+
/// ## How it works
229+
///
230+
/// For build-id a05cfb6313fe06a13c9b4b5cb86c2069faa3951f, the debug file lives at:
231+
/// ```text
232+
/// /usr/lib/debug/.build-id/a0/5cfb6313fe06a13c9b4b5cb86c2069faa3951f.debug
233+
/// ^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
234+
/// first byte (2 hex chars) as subdir
235+
/// rest as the filename
236+
/// ```
237+
fn find_debug_file_by_build_id(object: &object::File, debug_dir: &Path) -> Option<PathBuf> {
238+
let build_id = object.build_id().ok()??;
239+
if build_id.is_empty() {
240+
return None;
241+
}
242+
243+
let hex = build_id
244+
.iter()
245+
.map(|b| format!("{b:02x}"))
246+
.collect::<String>();
247+
let path = debug_dir
248+
.join(".build-id")
249+
.join(&hex[..2])
250+
.join(format!("{}.debug", &hex[2..]));
251+
252+
if path.exists() {
253+
return Some(path);
254+
}
255+
256+
None
257+
}
258+
259+
fn find_debug_file_by_debuglink(
260+
object: &object::File,
261+
binary_path: &Path,
262+
debug_dir: &Path,
263+
) -> Option<PathBuf> {
264+
let (debuglink, expected_crc) = object.gnu_debuglink().ok()??;
265+
let debuglink = std::str::from_utf8(debuglink).ok()?;
266+
let dir = binary_path.parent()?;
267+
268+
let candidates = [
269+
dir.join(debuglink),
270+
dir.join(".debug").join(debuglink),
271+
debug_dir
272+
.join(dir.strip_prefix("/").unwrap_or(dir))
273+
.join(debuglink),
274+
];
275+
276+
candidates.into_iter().find(|p| {
277+
let Ok(content) = std::fs::read(p) else {
278+
return false;
279+
};
280+
let actual_crc = crc32fast::hash(&content);
281+
if actual_crc != expected_crc {
282+
trace!(
283+
"CRC mismatch for {}: expected {expected_crc:#x}, got {actual_crc:#x}",
284+
p.display()
285+
);
286+
return false;
287+
}
288+
true
289+
})
290+
}
291+
292+
/// Copy `binary` and `debug_file` in a fresh tempdir, renaming the debug
293+
/// file to match the binary's `.gnu_debuglink` basename so `find_debug_file`
294+
/// resolves the pair.
295+
///
296+
/// Returns `(TempDir, staged_binary, staged_debug_file)`. Keep the `TempDir`
297+
/// alive for the duration of the test — dropping it removes the files.
298+
#[cfg(all(test, target_os = "linux"))]
299+
pub(super) fn setup_debuglink_tmpdir(
300+
binary: &Path,
301+
debug_file: &Path,
302+
) -> (tempfile::TempDir, PathBuf, PathBuf) {
303+
let src = std::fs::read(binary).unwrap();
304+
let object = object::File::parse(&*src).unwrap();
305+
let (debuglink, _crc) = object
306+
.gnu_debuglink()
307+
.unwrap()
308+
.expect("binary has no .gnu_debuglink");
309+
let debuglink = std::str::from_utf8(debuglink).unwrap();
310+
311+
let dir = tempfile::tempdir().unwrap();
312+
let staged_binary = dir.path().join("binary");
313+
let staged_debug = dir.path().join(debuglink);
314+
std::fs::copy(binary, &staged_binary).unwrap();
315+
std::fs::copy(debug_file, &staged_debug).unwrap();
316+
317+
(dir, staged_binary, staged_debug)
318+
}
319+
320+
#[cfg(all(test, target_os = "linux"))]
321+
mod tests {
322+
use super::*;
323+
324+
// The fixtures `testdata/perf_map/libc.so.6` and `libc.so.6.debug` are the
325+
// stripped libc plus its separate debug file from Ubuntu 22.04's `libc6`
326+
// and `libc6-dbg` packages.
327+
const LIBC_PATH: &str = "testdata/perf_map/libc.so.6";
328+
const LIBC_DEBUG_PATH: &str = "testdata/perf_map/libc.so.6.debug";
329+
330+
#[test]
331+
fn test_find_debug_file_by_build_id() {
332+
// Ubuntu's `libc6-dbg` installs its debug file under
333+
// `/usr/lib/debug/.build-id/<xx>/<rest>.debug`. Reproduce that layout
334+
// in a tempdir and confirm we resolve it via the build-id note.
335+
let binary_path = Path::new(LIBC_PATH);
336+
let content = std::fs::read(binary_path).unwrap();
337+
let object = object::File::parse(&*content).unwrap();
338+
339+
let build_id = object.build_id().unwrap().unwrap();
340+
let hex: String = build_id.iter().map(|b| format!("{b:02x}")).collect();
341+
342+
let tmp = tempfile::tempdir().unwrap();
343+
let debug_file_dir = tmp.path().join(".build-id").join(&hex[..2]);
344+
std::fs::create_dir_all(&debug_file_dir).unwrap();
345+
346+
let debug_file_path = debug_file_dir.join(format!("{}.debug", &hex[2..]));
347+
std::fs::copy(LIBC_DEBUG_PATH, &debug_file_path).unwrap();
348+
349+
let result = find_debug_file_in(&object, binary_path, tmp.path());
350+
assert_eq!(result, Some(debug_file_path));
351+
}
352+
353+
#[test]
354+
fn test_find_debug_file_by_debuglink() {
355+
let (_dir, binary, debug_file) =
356+
setup_debuglink_tmpdir(Path::new(LIBC_PATH), Path::new(LIBC_DEBUG_PATH));
357+
let content = std::fs::read(&binary).unwrap();
358+
let object = object::File::parse(&*content).unwrap();
359+
360+
let empty_debug_dir = tempfile::tempdir().unwrap();
361+
let result = find_debug_file_in(&object, &binary, empty_debug_dir.path());
362+
assert_eq!(result, Some(debug_file));
363+
}
364+
}

src/executor/wall_time/perf/module_symbols.rs

Lines changed: 83 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
use crate::executor::wall_time::perf::elf_helper;
2+
use log::trace;
23
use object::{Object, ObjectSymbol, ObjectSymbolTable};
34
use runner_shared::module_symbols::SYMBOLS_MAP_SUFFIX;
45
use std::{
6+
collections::HashSet,
57
fmt::Debug,
68
io::{BufWriter, Write},
79
path::Path,
@@ -55,11 +57,8 @@ impl ModuleSymbols {
5557
)
5658
}
5759

58-
/// Extract symbols from an ELF file (pid-agnostic, load_bias = 0).
59-
pub fn from_elf<P: AsRef<Path>>(path: P) -> anyhow::Result<Self> {
60-
let content = std::fs::read(path.as_ref())?;
61-
let object = object::File::parse(&*content)?;
62-
60+
/// Extract raw symbols from an object file's `.symtab` and `.dynsym` tables.
61+
fn extract_symbols_from_object(object: &object::File) -> Vec<Symbol> {
6362
let mut symbols = Vec::new();
6463

6564
if let Some(symbol_table) = object.symbol_table() {
@@ -82,6 +81,44 @@ impl ModuleSymbols {
8281
}));
8382
}
8483

84+
symbols
85+
}
86+
87+
/// Extract symbols from an ELF file (pid-agnostic, load_bias = 0).
88+
///
89+
/// If the binary has a `.gnu_debuglink` pointing to a separate debug file,
90+
/// symbols from that file are merged in. This provides full symbol coverage
91+
/// for stripped system libraries when debug packages are installed.
92+
pub fn from_elf<P: AsRef<Path>>(path: P) -> anyhow::Result<Self> {
93+
let content = std::fs::read(path.as_ref())?;
94+
let object = object::File::parse(&*content)?;
95+
96+
let mut symbols = Self::extract_symbols_from_object(&object);
97+
98+
// Merge symbols from a separate debug file if available
99+
if let Some(debug_path) = elf_helper::find_debug_file(&object, path.as_ref()) {
100+
trace!(
101+
"Merging symbols from debug file {:?} for {:?}",
102+
debug_path,
103+
path.as_ref()
104+
);
105+
let debug_symbols = std::fs::read(&debug_path).ok().and_then(|c| {
106+
object::File::parse(&*c)
107+
.ok()
108+
.map(|o| Self::extract_symbols_from_object(&o))
109+
});
110+
111+
if let Some(debug_symbols) = debug_symbols {
112+
let existing: HashSet<(u64, String)> =
113+
symbols.iter().map(|s| (s.addr, s.name.clone())).collect();
114+
symbols.extend(
115+
debug_symbols
116+
.into_iter()
117+
.filter(|s| !existing.contains(&(s.addr, s.name.clone()))),
118+
);
119+
}
120+
}
121+
85122
// Filter out
86123
// - ARM ELF "mapping symbols" (https://github.com/torvalds/linux/blob/9448598b22c50c8a5bb77a9103e2d49f134c9578/tools/perf/util/symbol-elf.c#L1591C1-L1598C4)
87124
// - symbols that have en empty name
@@ -227,4 +264,45 @@ mod tests {
227264
let module_symbols = ModuleSymbols::from_elf(MODULE_PATH).unwrap();
228265
insta::assert_debug_snapshot!(module_symbols);
229266
}
267+
268+
#[test]
269+
fn test_stripped_binary_merges_debug_file_symbols() {
270+
// The stripped binary has only .dynsym, the .debug file has the full .symtab.
271+
// from_elf should merge both via .gnu_debuglink.
272+
let stripped_only =
273+
ModuleSymbols::from_elf("testdata/perf_map/cpp_my_benchmark_stripped.bin").unwrap();
274+
let full = ModuleSymbols::from_elf("testdata/perf_map/cpp_my_benchmark.bin").unwrap();
275+
276+
assert!(
277+
stripped_only.symbols().len() == full.symbols().len(),
278+
"stripped+debug ({}) should have the same number of symbols as the original ({})",
279+
stripped_only.symbols().len(),
280+
full.symbols().len(),
281+
);
282+
}
283+
284+
#[test]
285+
fn test_libc_symbols_merge_with_debug_file() {
286+
// libc.so.6 ships with .dynsym populated, so from_elf alone would skip
287+
// the debug file under a naive fallback. Merging must pick up .symtab
288+
// symbols like `_int_malloc` that only live in the debug file —
289+
// this is the coverage needed for full libc symbolication.
290+
let (_dir, binary, _debug_file) = elf_helper::setup_debuglink_tmpdir(
291+
Path::new("testdata/perf_map/libc.so.6"),
292+
Path::new("testdata/perf_map/libc.so.6.debug"),
293+
);
294+
295+
let module_symbols = ModuleSymbols::from_elf(&binary).unwrap();
296+
assert!(
297+
module_symbols.symbols().iter().any(|s| s.name == "malloc"),
298+
"libc dynsym symbol `malloc` should be present"
299+
);
300+
assert!(
301+
module_symbols
302+
.symbols()
303+
.iter()
304+
.any(|s| s.name == "_int_malloc"),
305+
"internal libc symbol `_int_malloc` should be merged in from the debug file"
306+
);
307+
}
230308
}

0 commit comments

Comments
 (0)