Skip to content

Commit a89ef28

Browse files
avrabeclaude
andauthored
feat(dwarf): emit DWARF debug sections behind --debug-line (VCR-DBG-001, #242, #394) (#429)
* feat(dwarf): emit DWARF debug sections behind --debug-line (VCR-DBG-001, #242, #394) PR B of v0.12.0: with `--debug-line`, synth emits a full DWARF unit (`.debug_info`/`.debug_abbrev`/`.debug_str`/`.debug_line`) into the relocatable ELF, mapping ARM `.text` addresses back to the input wasm's source lines via PR A's per-instruction line_map. Pipeline: read the input wasm's `.debug_line` (gimli, now a production dep of synth-core — auto-resolved by Bazel via from_cargo), compose `func_offset + machine_offset → op_offsets[op_idx] → op_offsets_to_source → source line`, emit a real `DW_TAG_compile_unit` whose `DW_AT_stmt_list` points at `.debug_line`, and add the sections as NON-ALLOC trailing PROGBITS (after `.text`/`.data`/`.bss`, so the hardcoded `with_section(4/5/6)` symbol indices are undisturbed). Frozen-safe: the entire emit is behind `if debug_line` (default off), so the default build is byte-identical. Oracles (dwarf_debug_line_emit_394.rs): - additivity: on a DWARF input (msgq) `.text`/`.data`/`.bss` are byte-identical with/without the flag and the `.debug_*` sections appear only under it; on a no-DWARF input (gust_kernel) the whole `.o` is byte-identical. - reachability: the emitted DWARF is walked via the NORMAL gimli `dwarf.units()` → CU `DW_AT_stmt_list` line-program path (the path a debugger uses), resolving 110 in-range `.text` addresses to non-zero source lines. EXPERIMENTAL scope: addresses are object-relative (`.text` base 0) with no `.rela.debug_*` yet, so they are correct for the unlinked object but shift by the load base once linked. PR C (VCR-DBG-002) generalizes the elf_builder's `.rel.text`-only relocation machinery to emit `.rela.debug_*` against the `.text` symbol — required before v0.12.0 is tagged. ARM only; RISC-V is a follow-up. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> * fix(bazel): list @crates//:gimli in synth-core deps (VCR-DBG-001, #394) The hand-written rust_library `deps` in crates/BUILD.bazel are not generated from Cargo.toml, so moving gimli to a production dependency of synth-core (which cargo honours) left the Bazel target compiling synth-core without it → `error[E0433]: unresolved crate gimli` in "Bazel Build & Proofs". from_cargo makes the `@crates//:gimli` alias available; the manual target must still list it. Verified `bazel build //crates:synth` green. --------- Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
1 parent b267d54 commit a89ef28

7 files changed

Lines changed: 564 additions & 9 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/BUILD.bazel

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@ rust_library(
1414
edition = "2024",
1515
deps = [
1616
"@crates//:anyhow",
17+
# VCR-DBG-001 (#394): DWARF `.debug_line` read+emit. The hand-written
18+
# Bazel deps are not generated from Cargo.toml, so a new production dep
19+
# must be listed here as well as in synth-core/Cargo.toml.
20+
"@crates//:gimli",
1721
"@crates//:serde",
1822
"@crates//:serde_json",
1923
"@crates//:sha2",

crates/synth-cli/Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,11 @@ wast.workspace = true
5454

5555
[dev-dependencies]
5656
object.workspace = true
57+
# VCR-DBG-001 step 4 (#394) — oracle B parses the EMITTED `.debug_line` back with
58+
# gimli::read to prove the section is real debugger-readable DWARF (addresses in
59+
# `.text` range, lines non-zero). Test-only; the production read+emit lives in
60+
# synth-core. Matches synth-core's gimli pin.
61+
gimli = { version = "0.31", default-features = false, features = ["read", "std"] }
5762
# VCR-MEM-001 (#383) layer-2 substrate: scry's sound shadow-stack-depth analysis,
5863
# verified in-tree against a real module. DEV-dependency only — the production
5964
# binary does not pull scry until the gated consumption step (the .bss shrink /

crates/synth-cli/src/main.rs

Lines changed: 117 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,19 @@ enum Commands {
252252
/// above B will mis-address. Only meaningful with `--native-pointer-abi`.
253253
#[arg(long, value_name = "BYTES")]
254254
shadow_stack_size: Option<u32>,
255+
256+
/// VCR-DBG-001 (#394): emit DWARF debug sections (`.debug_info`/
257+
/// `.debug_abbrev`/`.debug_str`/`.debug_line`) mapping ARM `.text`
258+
/// addresses back to the input wasm's source lines. Requires the input to
259+
/// carry DWARF (`.debug_line` custom section) and the ARM backend (RISC-V
260+
/// carries no line_map). Purely additive: `.text`/`.data`/`.bss` stay
261+
/// byte-identical; off by default. Wired on the relocatable-object
262+
/// (host-link) path. EXPERIMENTAL: addresses are object-relative (`.text`
263+
/// base 0) and carry no relocations yet, so they are correct for the
264+
/// unlinked object but shift by the load base once linked — linked-binary
265+
/// debugging needs the `.rela.debug_*` follow-up (VCR-DBG-002).
266+
#[arg(long)]
267+
debug_line: bool,
255268
},
256269

257270
/// Disassemble an ARM ELF file (e.g., synth disasm output.elf)
@@ -369,6 +382,7 @@ fn main() -> Result<()> {
369382
sbom,
370383
sign_output,
371384
shadow_stack_size,
385+
debug_line,
372386
} => {
373387
// Resolve target spec: --target overrides, --cortex-m is backwards compat
374388
let target_spec = resolve_target_spec(target.as_deref(), cortex_m, &backend)?;
@@ -409,6 +423,7 @@ fn main() -> Result<()> {
409423
sbom_path,
410424
sign_output,
411425
shadow_stack_size,
426+
debug_line,
412427
)?;
413428

414429
// If --link requested, invoke the cross-linker
@@ -716,6 +731,15 @@ struct ElfFunction {
716731
code: Vec<u8>,
717732
/// Relocations targeting external symbols (from import dispatch stubs)
718733
relocations: Vec<synth_core::backend::CodeRelocation>,
734+
/// VCR-DBG-001 step 4 (#394): per-op wasm code BYTE offsets (decoder side
735+
/// table, `FunctionOps.op_offsets`) — module-relative, parallel to the wasm
736+
/// ops. Threaded here so the `--debug-line` emitter can normalize against
737+
/// `code_base` and compose with `line_map`. Empty unless DWARF emission is on.
738+
op_offsets: Vec<u32>,
739+
/// VCR-DBG-001 step 4 (#394): `(machine_offset_within_function → wasm_op_index)`
740+
/// captured by the ARM backend (`CompiledFunction.line_map`). Empty for the
741+
/// RISC-V backend. Composed with `op_offsets` to map ARM text address → source.
742+
line_map: synth_core::backend::LineMap,
719743
}
720744

721745
/// Resolve --target / --cortex-m into a TargetSpec
@@ -959,6 +983,8 @@ fn compile_command(
959983
sbom_path: Option<PathBuf>,
960984
sign_output: bool,
961985
shadow_stack_size: Option<u32>,
986+
// VCR-DBG-001 step 4 (#394): `--debug-line` — emit `.debug_line` DWARF.
987+
debug_line: bool,
962988
) -> Result<()> {
963989
// Validate backend exists
964990
let registry = build_backend_registry();
@@ -1006,6 +1032,7 @@ fn compile_command(
10061032
sbom_path,
10071033
sign_output,
10081034
shadow_stack_size,
1035+
debug_line,
10091036
);
10101037
}
10111038

@@ -1761,6 +1788,9 @@ fn compile_all_exports(
17611788
sbom_path: Option<PathBuf>,
17621789
sign_output: bool,
17631790
shadow_stack_size: Option<u32>,
1791+
// VCR-DBG-001 step 4 (#394): emit a `.debug_line` section from the input
1792+
// wasm's DWARF + the ARM line_maps. Default off ⇒ output byte-identical.
1793+
debug_line: bool,
17641794
) -> Result<()> {
17651795
let path = input.context("--all-exports requires an input file")?;
17661796

@@ -2106,6 +2136,10 @@ fn compile_all_exports(
21062136
wasm_index: func.index,
21072137
code: compiled.code,
21082138
relocations: compiled.relocations,
2139+
// VCR-DBG-001 step 4: carry the op-offset side table + the backend's
2140+
// line_map so `--debug-line` can compose ARM text addr → source.
2141+
op_offsets: func.op_offsets.clone(),
2142+
line_map: compiled.line_map,
21092143
});
21102144

21112145
// Run verification if requested
@@ -2184,6 +2218,20 @@ fn compile_all_exports(
21842218
// Tracks whether we emitted an ET_REL object (needs linking) vs a standalone
21852219
// executable, so the summary below reports the right type and link hint.
21862220
let produced_relocatable = is_riscv || has_external_relocations || relocatable;
2221+
2222+
// VCR-DBG-001 step 4 (#394): when `--debug-line` is set, parse the input
2223+
// wasm's `.debug_line` from the bytes synth actually compiled
2224+
// (`sbom_wasm_bytes` = post-WAT/post-loom). A DWARF-free input yields empty
2225+
// rows ⇒ the emitter no-ops ⇒ the object stays byte-identical. Default
2226+
// (flag off) ⇒ `None` ⇒ zero new work, zero output change.
2227+
let input_dwarf = if debug_line {
2228+
sbom_wasm_bytes
2229+
.as_deref()
2230+
.map(synth_core::dwarf_line::read_input_dwarf_line)
2231+
} else {
2232+
None
2233+
};
2234+
21872235
let elf_data = if is_riscv {
21882236
info!("Building RISC-V multi-function relocatable object (EM_RISCV)");
21892237
build_multi_func_riscv_elf(&compiled_funcs)?
@@ -2212,6 +2260,7 @@ fn compile_all_exports(
22122260
} else {
22132261
None
22142262
},
2263+
input_dwarf.as_ref(),
22152264
)?
22162265
} else if cortex_m {
22172266
build_multi_func_cortex_m_elf(&compiled_funcs, &all_memories, target_spec)?
@@ -2367,6 +2416,10 @@ fn build_relocatable_elf(
23672416
data_segments: &[(u32, Vec<u8>)],
23682417
linear_memory_bytes: u32,
23692418
native_globals: Option<NativeGlobalsLayout>,
2419+
// VCR-DBG-001 step 4 (#394): the input wasm's parsed `.debug_line` (rows +
2420+
// code_base). `None` ⇒ `--debug-line` off OR the input carried no DWARF ⇒
2421+
// no `.debug_line` section emitted ⇒ output byte-identical to the default.
2422+
dwarf_line: Option<&synth_core::dwarf_line::InputDwarfLine>,
23702423
) -> Result<Vec<u8>> {
23712424
use std::collections::HashMap;
23722425

@@ -3076,6 +3129,61 @@ fn build_relocatable_elf(
30763129
}
30773130
}
30783131

3132+
// VCR-DBG-001 step 4 (#394): emit a FULL DWARF unit (`.debug_info`,
3133+
// `.debug_abbrev`, `.debug_str`, `.debug_line`, ...) as NON-ALLOC trailing
3134+
// PROGBITS sections. Each is structurally a clone of `.meld_import_table`: no
3135+
// symbol or relocation targets them, and `.rel.text`'s `sh_info` is hardcoded
3136+
// to `.text` (index 4) AFTER the user-section loop — so appending here gives
3137+
// each a fresh section index without disturbing the `with_section` (4/5/6)
3138+
// symbol indices, keeping the feature PURELY ADDITIVE. The unit carries a
3139+
// real `DW_TAG_compile_unit` whose `DW_AT_stmt_list` points at `.debug_line`,
3140+
// so a debugger reaches the line table via the NORMAL `.debug_info` → CU walk.
3141+
// Composed from `func_offsets[i] + machine_offset → op_offsets[op_idx] → src`.
3142+
if let Some(input_dwarf) = dwarf_line
3143+
&& !input_dwarf.rows.is_empty()
3144+
{
3145+
use synth_core::dwarf_line::{SourceLoc, op_offsets_to_source};
3146+
let mut table: Vec<(u64, u32)> = Vec::new();
3147+
for (i, func) in funcs.iter().enumerate() {
3148+
if func.line_map.is_empty() || func.op_offsets.is_empty() {
3149+
continue; // RISC-V (empty line_map) or a func with no op offsets
3150+
}
3151+
// op-index → source for this function's ops (parallel to op_offsets).
3152+
let locs =
3153+
op_offsets_to_source(&func.op_offsets, input_dwarf.code_base, &input_dwarf.rows);
3154+
for &(machine_off, op_idx) in &func.line_map {
3155+
// None entries (prologue / literal pool) carry no source.
3156+
let Some(op_idx) = op_idx else { continue };
3157+
if let Some(Some(SourceLoc { line, .. })) = locs.get(op_idx)
3158+
&& *line != 0
3159+
{
3160+
let arm_addr = (func_offsets[i] + machine_off) as u64;
3161+
table.push((arm_addr, *line));
3162+
}
3163+
}
3164+
}
3165+
// One address-ordered, de-duped sequence covering every function.
3166+
table.sort_by_key(|&(a, _)| a);
3167+
table.dedup_by_key(|&mut (a, _)| a);
3168+
3169+
let dwarf_sections = synth_core::dwarf_line::emit_debug_sections(&table);
3170+
if !dwarf_sections.is_empty() {
3171+
let names: Vec<&str> = dwarf_sections.iter().map(|(n, _)| *n).collect();
3172+
for (name, bytes) in &dwarf_sections {
3173+
let dbg_section = Section::new(name, ElfSectionType::ProgBits)
3174+
.with_align(1)
3175+
.with_data(bytes.clone());
3176+
elf_builder.add_section(dbg_section);
3177+
}
3178+
info!(
3179+
"DWARF: emitted {} sections {:?} ({} address rows, --debug-line)",
3180+
dwarf_sections.len(),
3181+
names,
3182+
table.len()
3183+
);
3184+
}
3185+
}
3186+
30793187
let (external_count, reloc_count) = extern_sym_indices;
30803188
info!(
30813189
"Relocatable ELF: {} functions, {} external symbols, {} relocations",
@@ -4462,6 +4570,8 @@ mod tests {
44624570
kind: synth_core::backend::RelocKind::MovtAbs,
44634571
},
44644572
],
4573+
op_offsets: vec![],
4574+
line_map: vec![],
44654575
};
44664576
let linear_memory_bytes: u32 = 131_072; // 2 wasm pages
44674577
// Native globals: SP-init = 65536 (the shadow-stack top) drives the
@@ -4472,7 +4582,7 @@ mod tests {
44724582
shadow_stack_size: None,
44734583
};
44744584

4475-
let elf = build_relocatable_elf(&[func], &[], &[], linear_memory_bytes, Some(native))
4585+
let elf = build_relocatable_elf(&[func], &[], &[], linear_memory_bytes, Some(native), None)
44764586
.expect("#345: native-pointer zero-linmem object builds");
44774587

44784588
// Parse the ELF and inspect sections by name + type.
@@ -4560,13 +4670,15 @@ mod tests {
45604670
kind: synth_core::backend::RelocKind::Abs32,
45614671
},
45624672
],
4673+
op_offsets: vec![],
4674+
line_map: vec![],
45634675
};
45644676
let native = NativeGlobalsLayout {
45654677
globals: vec![(0, 65_536)],
45664678
sp_init: 65_536,
45674679
shadow_stack_size: None,
45684680
};
4569-
let elf = build_relocatable_elf(&[func], &[], &[], 131_072, Some(native))
4681+
let elf = build_relocatable_elf(&[func], &[], &[], 131_072, Some(native), None)
45704682
.expect("#345: native-pointer literal-pool object builds");
45714683

45724684
let header = object::elf::FileHeader32::<Endianness>::parse(&*elf).expect("valid ELF32");
@@ -4638,6 +4750,8 @@ mod tests {
46384750
symbol: "__synth_wasm_data".to_string(),
46394751
kind: synth_core::backend::RelocKind::Abs32,
46404752
}],
4753+
op_offsets: vec![],
4754+
line_map: vec![],
46414755
};
46424756
// 12-byte init segment at the high offset, above the shadow stack.
46434757
let seg: Vec<u8> = vec![0, 0, 0, 0, 0, 0, 0, 0, 0xf4, 0xff, 0xff, 0xff];
@@ -4648,7 +4762,7 @@ mod tests {
46484762
shadow_stack_size: None,
46494763
};
46504764

4651-
let elf = build_relocatable_elf(&[func], &[], &data_segments, 131_072, Some(native))
4765+
let elf = build_relocatable_elf(&[func], &[], &data_segments, 131_072, Some(native), None)
46524766
.expect("#354: mixed-case object builds");
46534767

46544768
let header = object::elf::FileHeader32::<Endianness>::parse(&*elf).expect("valid ELF32");

0 commit comments

Comments
 (0)