Skip to content

Commit 8ca4a9a

Browse files
authored
feat(bloat): expose linker --cref back-references as referenced_by (#459) (#460)
1 parent 7a40675 commit 8ca4a9a

7 files changed

Lines changed: 725 additions & 19 deletions

File tree

.github/workflows/template_build.yml

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -107,17 +107,26 @@ jobs:
107107
run: fbuild build ${{ inputs.test-dir }} -e ${{ inputs.env-name }} --quick
108108

109109
- name: Verify quick firmware output
110+
# BuildLayout (PR #455, FastLED/fbuild#432) collapses the
111+
# `<env>` segment when the sketch dir basename == env-name.
112+
# `tests/platform/lpc845/` (env `lpc845`) → collapses to
113+
# `.fbuild/build/quick/firmware.<ext>`; but
114+
# `tests/platform/giga_r1_m7/` (env `giga_r1`) → stays as
115+
# `.fbuild/build/giga_r1/quick/firmware.<ext>`. Find both
116+
# layouts so the verify works either way.
110117
run: |
111-
test -f ${{ inputs.test-dir }}/.fbuild/build/${{ inputs.env-name }}/quick/firmware.${{ inputs.firmware-ext }}
112-
echo "Quick build successful - firmware.${{ inputs.firmware-ext }} generated"
118+
firmware=$(find ${{ inputs.test-dir }}/.fbuild/build -type f -name "firmware.${{ inputs.firmware-ext }}" -path "*/quick/*" 2>/dev/null | head -1)
119+
test -n "$firmware" && test -f "$firmware"
120+
echo "Quick build successful - firmware.${{ inputs.firmware-ext }} at $firmware"
113121
114122
- name: Build ${{ inputs.workflow-name }} (release)
115123
run: fbuild build ${{ inputs.test-dir }} -e ${{ inputs.env-name }} --release
116124

117125
- name: Verify release firmware output
118126
run: |
119-
test -f ${{ inputs.test-dir }}/.fbuild/build/${{ inputs.env-name }}/release/firmware.${{ inputs.firmware-ext }}
120-
echo "Release build successful - firmware.${{ inputs.firmware-ext }} generated"
127+
firmware=$(find ${{ inputs.test-dir }}/.fbuild/build -type f -name "firmware.${{ inputs.firmware-ext }}" -path "*/release/*" 2>/dev/null | head -1)
128+
test -n "$firmware" && test -f "$firmware"
129+
echo "Release build successful - firmware.${{ inputs.firmware-ext }} at $firmware"
121130
122131
- name: Save fbuild toolchains
123132
if: always()

crates/fbuild-build/src/symbol_analyzer.rs

Lines changed: 145 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ use std::collections::BTreeMap;
1414

1515
use fbuild_core::subprocess::run_command_with_stdin;
1616
use fbuild_core::symbol_analysis::{
17-
build_fine_grained_map_with_synth, collect_map_derived_owners, parse_linker_map,
18-
parse_nm_output, FineGrainedSymbolMap, LoadedRegion,
17+
build_fine_grained_map_with_synth, collect_map_derived_owners, parse_cref_table,
18+
parse_linker_map, parse_nm_output, FineGrainedSymbolMap, LoadedRegion, SymbolReference,
1919
};
2020
use fbuild_core::{FbuildError, Result};
2121

@@ -226,19 +226,20 @@ pub fn analyze_elf(cfg: AnalyzeConfig<'_>) -> Result<FineGrainedSymbolMap> {
226226
mangled.clone()
227227
};
228228

229-
let ranges = if let Some(map_path) = cfg.map_path {
229+
let (ranges, cref_map) = if let Some(map_path) = cfg.map_path {
230230
match std::fs::read_to_string(map_path) {
231-
Ok(text) => parse_linker_map(&text),
231+
Ok(text) => (parse_linker_map(&text), parse_cref_table(&text)),
232232
Err(e) => {
233233
tracing::warn!(
234-
"could not read map file {}: {e}; archive attribution will be unavailable",
234+
"could not read map file {}: {e}; archive attribution and \
235+
referenced_by will be unavailable",
235236
map_path.display()
236237
);
237-
Vec::new()
238+
(Vec::new(), BTreeMap::<String, Vec<SymbolReference>>::new())
238239
}
239240
}
240241
} else {
241-
Vec::new()
242+
(Vec::new(), BTreeMap::<String, Vec<SymbolReference>>::new())
242243
};
243244

244245
// Pre-walk the ranges to collect mangled owners for map-derived
@@ -271,6 +272,7 @@ pub fn analyze_elf(cfg: AnalyzeConfig<'_>) -> Result<FineGrainedSymbolMap> {
271272
demangled,
272273
ranges,
273274
&synth_demangled,
275+
&cref_map,
274276
);
275277

276278
// Strip symbols that nm enumerated but that don't actually consume
@@ -457,20 +459,21 @@ pub fn format_markdown_report(map: &FineGrainedSymbolMap, top_n: usize) -> Strin
457459
let _ = writeln!(out);
458460
let _ = writeln!(
459461
out,
460-
"| Bytes | Archive | Object | Section | Source | Symbol |"
462+
"| Bytes | Archive | Object | Section | Source | Referenced by | Symbol |"
461463
);
462-
let _ = writeln!(out, "|---:|---|---|---|---|---|");
464+
let _ = writeln!(out, "|---:|---|---|---|---|---|---|");
463465
for s in syms.into_iter().take(top_n) {
464466
let archive = s.archive.as_deref().unwrap_or("(none)");
465467
let object = s.object.as_deref().unwrap_or("-");
466468
let sect = s.output_section.as_deref().unwrap_or("-");
467469
// Pipe-escape the demangled name so it doesn't break MD
468470
// table parsing (rare but possible with operator overloads).
469471
let name = s.demangled.replace('|', "\\|");
472+
let refs = format_referenced_by(&s.referenced_by, 3);
470473
let _ = writeln!(
471474
out,
472-
"| {} | {} | {} | {} | {} | `{}` |",
473-
s.size, archive, object, sect, s.source, name
475+
"| {} | {} | {} | {} | {} | {} | `{}` |",
476+
s.size, archive, object, sect, s.source, refs, name
474477
);
475478
}
476479
let _ = writeln!(out);
@@ -605,6 +608,40 @@ fn walk_for_elf(dir: &Path, newest: &mut Option<(std::time::SystemTime, PathBuf)
605608
}
606609
}
607610

611+
/// Format up to `top_k` `referenced_by` entries for a Markdown table
612+
/// cell. Each referencer is rendered as `archive(object)` (or just
613+
/// `object` for bare TUs with no archive) and joined with `, `. When
614+
/// the list exceeds `top_k`, append ` (… and N more)`. Returns `-`
615+
/// for an empty list so the column stays scannable.
616+
///
617+
/// `top_k = 3` is the column-friendly default — the issue proposes
618+
/// K=5 as a follow-up-table value, but five `lib.a(obj.o)` strings
619+
/// per row makes the GitHub-rendered table awkward. Three keeps the
620+
/// signal-to-width ratio readable while still surfacing the most
621+
/// common "libc internal wrapper escapes to an ESP-IDF/mbedTLS TU"
622+
/// pattern documented in #459.
623+
fn format_referenced_by(
624+
refs: &[fbuild_core::symbol_analysis::SymbolReference],
625+
top_k: usize,
626+
) -> String {
627+
if refs.is_empty() {
628+
return "-".to_string();
629+
}
630+
let mut parts: Vec<String> = refs
631+
.iter()
632+
.take(top_k)
633+
.map(|r| match &r.archive {
634+
Some(a) => format!("{a}({})", r.object),
635+
None => r.object.clone(),
636+
})
637+
.collect();
638+
if refs.len() > top_k {
639+
parts.push(format!("(… and {} more)", refs.len() - top_k));
640+
}
641+
// Pipe-escape so the joined string doesn't break MD table cells.
642+
parts.join(", ").replace('|', "\\|")
643+
}
644+
608645
fn truncate(s: &str, max: usize) -> String {
609646
if s.len() <= max {
610647
s.to_string()
@@ -732,6 +769,7 @@ mod tests {
732769
object: Some("foo.o".into()),
733770
output_section: Some(".flash.text".into()),
734771
source: "nm".into(),
772+
referenced_by: Vec::new(),
735773
},
736774
FineGrainedSymbol {
737775
mangled: "_Z3barv".into(),
@@ -744,6 +782,7 @@ mod tests {
744782
object: Some("bar.o".into()),
745783
output_section: Some(".dram0.bss".into()),
746784
source: "nm".into(),
785+
referenced_by: Vec::new(),
747786
},
748787
],
749788
sections: Vec::<SectionBytes>::new(),
@@ -754,9 +793,9 @@ mod tests {
754793
assert!(md.contains("**Flash**: 100 B"));
755794
assert!(md.contains("**RAM**: 50 B"));
756795
assert!(md.contains("## Top 1 flash symbols"));
757-
assert!(md.contains("| 100 | libA.a | foo.o | .flash.text | nm | `foo(int)` |"));
796+
assert!(md.contains("| 100 | libA.a | foo.o | .flash.text | nm | - | `foo(int)` |"));
758797
assert!(md.contains("## Top 1 ram symbols"));
759-
assert!(md.contains("| 50 | libB.a | bar.o | .dram0.bss | nm | `bar()` |"));
798+
assert!(md.contains("| 50 | libB.a | bar.o | .dram0.bss | nm | - | `bar()` |"));
760799
assert!(md.contains("## Flash bytes by archive"));
761800
assert!(md.contains("| 100 | libA.a |"));
762801
}
@@ -781,10 +820,103 @@ mod tests {
781820
object: None,
782821
output_section: None,
783822
source: "nm".into(),
823+
referenced_by: Vec::new(),
784824
}],
785825
sections: Vec::<SectionBytes>::new(),
786826
};
787827
let md = format_markdown_report(&map, 5);
788828
assert!(md.contains("operator\\|(int const&, int const&)"));
789829
}
830+
831+
#[test]
832+
fn format_markdown_report_renders_referenced_by_column() {
833+
// The motivating #459 case: a libc symbol like `_vfprintf_r`
834+
// shows its non-libc referencers so the agent can answer
835+
// "who pulled this in?" without spawning a separate query.
836+
use fbuild_core::symbol_analysis::{
837+
FineGrainedSymbol, FineGrainedSymbolMap, SectionBytes, SymbolReference,
838+
};
839+
let map = FineGrainedSymbolMap {
840+
elf_path: "fw.elf".into(),
841+
map_path: None,
842+
total_flash: 11309,
843+
total_ram: 0,
844+
symbols: vec![FineGrainedSymbol {
845+
mangled: "_vfprintf_r".into(),
846+
demangled: "_vfprintf_r".into(),
847+
address: 0x4000,
848+
size: 11309,
849+
sym_type: 'T',
850+
region: fbuild_core::MemoryRegion::Flash,
851+
archive: Some("libc.a".into()),
852+
object: Some("libc_a-vfprintf.o".into()),
853+
output_section: Some(".flash.text".into()),
854+
source: "nm".into(),
855+
referenced_by: vec![
856+
SymbolReference {
857+
archive: Some("libc.a".into()),
858+
object: "libc_a-vprintf.o".into(),
859+
},
860+
SymbolReference {
861+
archive: Some("libc.a".into()),
862+
object: "libc_a-printf.o".into(),
863+
},
864+
SymbolReference {
865+
archive: Some("libc.a".into()),
866+
object: "libc_a-fprintf.o".into(),
867+
},
868+
SymbolReference {
869+
archive: Some("liblog.a".into()),
870+
object: "log_write.c.obj".into(),
871+
},
872+
SymbolReference {
873+
archive: Some("libmbedcrypto.a".into()),
874+
object: "sha512.c.obj".into(),
875+
},
876+
],
877+
}],
878+
sections: Vec::<SectionBytes>::new(),
879+
};
880+
let md = format_markdown_report(&map, 5);
881+
// Header includes the new column.
882+
assert!(
883+
md.contains("| Bytes | Archive | Object | Section | Source | Referenced by | Symbol |")
884+
);
885+
// Cell shows top-3 referencers + "(… and 2 more)" overflow.
886+
assert!(
887+
md.contains("libc.a(libc_a-vprintf.o), libc.a(libc_a-printf.o), libc.a(libc_a-fprintf.o), (… and 2 more)"),
888+
"expected top-3 + overflow in referenced_by cell, got:\n{md}"
889+
);
890+
}
891+
892+
#[test]
893+
fn format_markdown_report_referenced_by_empty_renders_dash() {
894+
use fbuild_core::symbol_analysis::{FineGrainedSymbol, FineGrainedSymbolMap, SectionBytes};
895+
let map = FineGrainedSymbolMap {
896+
elf_path: "fw.elf".into(),
897+
map_path: None,
898+
total_flash: 10,
899+
total_ram: 0,
900+
symbols: vec![FineGrainedSymbol {
901+
mangled: "main".into(),
902+
demangled: "main".into(),
903+
address: 0x4000,
904+
size: 10,
905+
sym_type: 'T',
906+
region: fbuild_core::MemoryRegion::Flash,
907+
archive: None,
908+
object: Some("main.cpp.o".into()),
909+
output_section: Some(".flash.text".into()),
910+
source: "nm".into(),
911+
referenced_by: Vec::new(),
912+
}],
913+
sections: Vec::<SectionBytes>::new(),
914+
};
915+
let md = format_markdown_report(&map, 5);
916+
// The "Referenced by" cell is `-` when no cref data exists.
917+
assert!(
918+
md.contains("| 10 | (none) | main.cpp.o | .flash.text | nm | - | `main` |"),
919+
"expected dash in referenced_by cell, got:\n{md}"
920+
);
921+
}
790922
}

crates/fbuild-core/src/symbol_analysis/README.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,17 @@ Pure parsers and aggregators behind `fbuild bloat` (legacy `fbuild symbols`):
44

55
- `parse_nm_line` / `parse_nm_output``nm --print-size -S` row parsing.
66
- `parse_linker_map` — GNU `ld -Map` output → per-input-section ranges.
7+
- `parse_cref_table` — GNU `ld --cref` `Cross Reference Table` block → mangled-symbol → referencer `(archive, object)` list. See [#459](https://github.com/FastLED/fbuild/issues/459). Empty result when the map lacks a cref block (older `ld`, `-Wl,--no-cref`) — never a hard error.
78
- `classify_region` — nm type letter → `Flash` / `Ram` bucket.
89
- `FineGrainedSymbolMap::retain_loaded_symbols` — drop symbols whose `[addr, addr+size)` doesn't fit any `PT_LOAD` region, so linker-script boundary markers (`__StackTop`, `__flash_arduino_end`) don't pollute the bloat report.
9-
- `build_fine_grained_map_with_synth` — fold nm rows + map ranges + demangled names into the per-symbol report.
10+
- `build_fine_grained_map_with_synth` — fold nm rows + map ranges + demangled names + cref into the per-symbol report.
11+
12+
Each `FineGrainedSymbol` row carries a `referenced_by: Vec<SymbolReference>` field populated from the cref table. Granularity is `(archive, object)`, not per-symbol — that's a property of `ld --cref` itself.
1013

1114
Intentionally has no ELF-parsing dep; ELF I/O lives in `fbuild_build::symbol_analyzer`, which calls into this module.
1215

1316
## Files
1417

1518
- `mod.rs` — types and pure functions.
19+
- `cref.rs``Cross Reference Table` parser.
1620
- `tests.rs` — unit tests.

0 commit comments

Comments
 (0)