Skip to content

Commit 84bec6d

Browse files
committed
fp-stability: lead with cancellation origins, report digits lost (not 'severe'), collapse sensitivity
Review feedback on the summary: (1) it buried the interesting cancellation origins below the long, mostly-expected sensitivity list; (2) 'severe' is a binary label when an actual magnitude is far more useful; (3) 'bits lost' is not intuitive. Reorder: the cancellation-origins section now leads (right after the results table), ranked worst-first; the single-precision sensitivity list (dominated by the benign time integrator) is collapsed into a <details>. Severity as a number: a sweep of --cc-threshold-double levels [10,20,30,40,48] buckets each site by the highest it survives (_cancellation_severity), giving per-site bits lost (a lower bound). Bits are translated to decimal digits (a double carries ~16; _digits_left) so each entry reads e.g. '>= 12 digits lost (~4 of 16 left)' with the full statement. On sod_standard the worst origins (flux divergence, divided differences, HLLC wave speeds) lose ~14 of 16 digits; the sweep discriminates (23 sites >=10 bits, 11 >=48). 69 toolchain tests.
1 parent b9e790f commit 84bec6d

2 files changed

Lines changed: 88 additions & 45 deletions

File tree

toolchain/mfc/fp_stability.py

Lines changed: 70 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -750,10 +750,28 @@ def _parse_vg_error_locs(log_path: str, error_keyword: str) -> list:
750750
return locs
751751

752752

753-
# A site reported at this bit threshold has lost at least this many significant
754-
# bits to cancellation — a *severity* floor (Verrou only reports a site when it
755-
# exceeds the threshold, so a high-threshold pass has no false positives).
756-
CANCEL_SEVERE_BITS = 26
753+
# Verrou exposes no per-site bit-count, but --cc-threshold-double is a severity
754+
# filter: a site is reported only if it lost >= the threshold bits. Sweeping these
755+
# levels and taking the highest each site survives gives a per-site "bits lost"
756+
# severity (a lower bound — no false positives). 48 ~ full double mantissa.
757+
CANCEL_BIT_LEVELS = [10, 20, 30, 40, 48]
758+
759+
760+
def _cancellation_severity(level_sites: list) -> dict:
761+
"""Given [(threshold, [sites])], return {site: highest threshold it survives}
762+
= the per-site bits-lost severity (a lower bound)."""
763+
sev = {}
764+
for level, sites in level_sites:
765+
for site in sites:
766+
if level > sev.get(site, 0):
767+
sev[site] = level
768+
return sev
769+
770+
771+
def _digits_left(bits_lost: float) -> float:
772+
"""Approximate trustworthy decimal digits remaining after losing `bits_lost`
773+
bits of a double's 53-bit mantissa (~15.95 digits full)."""
774+
return max(0.0, (53 - bits_lost) / math.log2(10))
757775

758776

759777
def _run_cancellation_check(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, threshold: int = 10) -> list:
@@ -1356,7 +1374,7 @@ def _run_case(
13561374
"dd_line_confirmed": None,
13571375
"dd_line_confirm_dev": None,
13581376
"cancellation_locs": [],
1359-
"cancellation_severe": set(),
1377+
"cancellation_bits": {},
13601378
"mca_dev": None,
13611379
"mca_sigbits": None,
13621380
"float_max_locs": [],
@@ -1493,13 +1511,15 @@ def _run_case(
14931511
if run_cancellation:
14941512
cons.print(" [dim]cancellation detection...[/dim]")
14951513
try:
1496-
locs = _run_cancellation_check(case, verrou_bin, sim_bin, work_dir)
1514+
# sweep bit thresholds to get per-site severity (bits lost)
1515+
level_sites = [(level, _run_cancellation_check(case, verrou_bin, sim_bin, work_dir, threshold=level)) for level in CANCEL_BIT_LEVELS]
1516+
locs = level_sites[0][1] # lowest threshold = full list
1517+
bits = _cancellation_severity(level_sites)
14971518
result["cancellation_locs"] = locs
1519+
result["cancellation_bits"] = bits
14981520
if locs:
1499-
# severity pass: which sites lose >= CANCEL_SEVERE_BITS bits
1500-
severe = set(_run_cancellation_check(case, verrou_bin, sim_bin, work_dir, threshold=CANCEL_SEVERE_BITS))
1501-
result["cancellation_severe"] = severe
1502-
cons.print(f" cancellation: {len(locs)} site(s), {len(severe)} severe (>= {CANCEL_SEVERE_BITS} bits lost)")
1521+
worst = max(bits.values()) if bits else 0
1522+
cons.print(f" cancellation: {len(locs)} site(s), worst loses ≥ {worst / math.log2(10):.0f} of ~16 digits")
15031523
else:
15041524
cons.print(" cancellation: none detected")
15051525
# cross-reference: label dd_line hotspots that sit on a cancellation site
@@ -1631,6 +1651,40 @@ def _emit_github_summary(results: list, n_samples: int):
16311651
md.append(f"| `{r['name']}` | {status} | {bits} / {MIN_SIG_BITS} | {r['max_dev']:.2e} | {fp} | {sb} |")
16321652
md.append("")
16331653

1654+
# Cancellation ORIGINS — where ill-conditioning actually arises, led with the
1655+
# most severe (most bits lost). The numerically interesting signal; the
1656+
# sensitivity list further down is dominated by the (benign) time integrator.
1657+
cases_with_cancel = [r for r in results if r.get("cancellation_locs")]
1658+
if cases_with_cancel:
1659+
md.append("### Catastrophic cancellation origins (ranked by digits lost)\n")
1660+
md.append(
1661+
"> Subtraction of nearly-equal values loses leading significant digits. A double carries "
1662+
"~**16 significant digits** (53 bits); each entry shows how many that subtraction throws away "
1663+
"(worst case, a lower bound). Losing ~8 digits halves your accuracy; losing ~13+ leaves only "
1664+
"single-precision trust. Site *count* is not severity — one site losing many digits outweighs "
1665+
"many mild ones.\n"
1666+
)
1667+
for r in cases_with_cancel:
1668+
site_bits = r.get("cancellation_bits") or {}
1669+
# collapse continuation fragments to one entry per logical statement,
1670+
# keeping the worst bits-lost seen on that statement
1671+
stmts = {} # (basename, stmt_start) -> {where, bits, text}
1672+
for fname, lineno in r["cancellation_locs"]:
1673+
stmt_start, _end, stmt_text = _statement_at(fname, lineno)
1674+
key = (os.path.basename(fname), stmt_start)
1675+
e = stmts.setdefault(key, {"where": f"{fname}:{stmt_start}", "bits": 0, "text": stmt_text})
1676+
e["bits"] = max(e["bits"], site_bits.get((fname, lineno), 0))
1677+
ordered = sorted(stmts.values(), key=lambda e: (-e["bits"], e["where"]))
1678+
if ordered:
1679+
w = ordered[0]
1680+
md.append(f"**`{r['name']}`** — {len(stmts)} statement(s); worst loses ≥ {w['bits'] / math.log2(10):.0f} of ~16 digits\n")
1681+
for e in ordered[:15]:
1682+
lost = e["bits"] / math.log2(10)
1683+
md.append(f"- **≥ {lost:.0f} digits lost** (~{_digits_left(e['bits']):.0f} of 16 left) — `{e['where']}`" + (f" — `{e['text']}`" if e["text"] else ""))
1684+
if len(ordered) > 15:
1685+
md.append(f"- _…and {len(ordered) - 15} more statement(s); see fp-stability-logs/_")
1686+
md.append("")
1687+
16341688
# VPREC sweep — one column per bit level, ❌ where bits retained < floor
16351689
if any(r["vprec"] for r in results):
16361690
_labels = {52: "52b", 23: "23b", 16: "16b", 10: "10b"}
@@ -1660,11 +1714,12 @@ def _emit_github_summary(results: list, n_samples: int):
16601714
# get re-rounded there. Not a culprit-finder for ill-conditioning.
16611715
cases_with_locs = [r for r in results if r["dd_line_locs"]]
16621716
if cases_with_locs:
1663-
md.append("### Single-precision sensitivity (dd\\_line)\n")
1717+
md.append("<details>")
1718+
md.append("<summary>Single-precision sensitivity (dd_line) — usually the time integrator; expand for details</summary>\n")
16641719
md.append(
16651720
"> Where reduced precision most moves the output — **typically the time integrator / "
1666-
"final accumulation, which is expected and benign**. This is *not* the same as where "
1667-
"cancellation originates; see **Catastrophic cancellation sites** below for that.\n"
1721+
"final accumulation, which is expected and benign**. This is *not* where cancellation "
1722+
"originates (that's the section above); it shows where precision matters most.\n"
16681723
)
16691724
_confirm_label = {True: "✅ confirmed", False: "⚠️ unconfirmed (suspect-only perturbation did not reproduce)", None: "— not checked"}
16701725
for r in cases_with_locs:
@@ -1695,6 +1750,7 @@ def _emit_github_summary(results: list, n_samples: int):
16951750
if len(r["dd_line_locs"]) > 10:
16961751
md.append(f"- _…and {len(r['dd_line_locs']) - 10} more hotspot(s); see fp-stability-logs/_")
16971752
md.append("")
1753+
md.append("</details>\n")
16981754

16991755
# dd_sym function names (collapsed, since less actionable than dd_line)
17001756
cases_with_syms = [r for r in results if r["dd_sym_syms"]]
@@ -1707,37 +1763,6 @@ def _emit_github_summary(results: list, n_samples: int):
17071763
md.append(f"- `{sym}`")
17081764
md.append("\n</details>\n")
17091765

1710-
# Cancellation hotspots — the ORIGIN view (where ill-conditioning concentrates).
1711-
cases_with_cancel = [r for r in results if r.get("cancellation_locs")]
1712-
if cases_with_cancel:
1713-
md.append("### Catastrophic cancellation sites\n")
1714-
md.append(
1715-
"> Where cancellation actually originates (subtraction of nearly-equal values). "
1716-
f"**Severity = significant bits lost; severe = ≥ {CANCEL_SEVERE_BITS} bits.** Site *count* is "
1717-
"not severity — one severe site outweighs many mild ones, so the severe sites are listed "
1718-
"first. (Severe detection has no false positives but may under-count.)\n"
1719-
)
1720-
for r in cases_with_cancel:
1721-
severe = r.get("cancellation_severe") or set()
1722-
# collapse continuation fragments to one entry per logical statement,
1723-
# severe statements first (the ones that matter)
1724-
stmts = {} # (basename, stmt_start) -> {where, severe, text}
1725-
for fname, lineno in sorted(r["cancellation_locs"]):
1726-
stmt_start, _end, stmt_text = _statement_at(fname, lineno)
1727-
key = (os.path.basename(fname), stmt_start)
1728-
entry = stmts.setdefault(key, {"where": f"{fname}:{stmt_start}", "severe": False, "text": stmt_text})
1729-
if (fname, lineno) in severe:
1730-
entry["severe"] = True
1731-
ordered = sorted(stmts.values(), key=lambda e: (not e["severe"], e["where"]))
1732-
n_severe_stmt = sum(1 for e in ordered if e["severe"])
1733-
md.append(f"**`{r['name']}`** — {len(stmts)} statement(s), " f"**{n_severe_stmt} severe (≥ {CANCEL_SEVERE_BITS} bits lost)**\n")
1734-
for e in ordered[:15]:
1735-
sev = " **severe**" if e["severe"] else ""
1736-
md.append(f"- `{e['where']}`{sev}" + (f" — `{e['text']}`" if e["text"] else ""))
1737-
if len(ordered) > 15:
1738-
md.append(f"- _…and {len(ordered) - 15} more statement(s); see fp-stability-logs/_")
1739-
md.append("")
1740-
17411766
# Float-max overflow sites
17421767
cases_with_fmax = [r for r in results if r.get("float_max_locs")]
17431768
if cases_with_fmax:
@@ -1844,7 +1869,7 @@ def fp_stability():
18441869
"dd_line_confirmed": None,
18451870
"dd_line_confirm_dev": None,
18461871
"cancellation_locs": [],
1847-
"cancellation_severe": set(),
1872+
"cancellation_bits": {},
18481873
"mca_dev": None,
18491874
"mca_sigbits": None,
18501875
"float_max_locs": [],

toolchain/mfc/test_fp_stability.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
MIN_SIG_BITS,
1111
_build_source_filter,
1212
_cancellation_by_file,
13+
_cancellation_severity,
1314
_confirm_decision,
1415
_macro_context_in_lines,
1516
_mark_cancellation,
@@ -224,6 +225,23 @@ def test_cancellation_by_file_empty():
224225
assert _cancellation_by_file([]) == []
225226

226227

228+
# --- per-site cancellation severity (bits lost), from a threshold sweep ---
229+
230+
231+
def test_cancellation_severity_takes_highest_surviving_threshold():
232+
level_sites = [
233+
(10, [("a.fpp", 1), ("b.fpp", 2)]),
234+
(20, [("a.fpp", 1)]),
235+
(30, [("a.fpp", 1)]),
236+
]
237+
# a.fpp:1 survives to 30 bits; b.fpp:2 only at 10
238+
assert _cancellation_severity(level_sites) == {("a.fpp", 1): 30, ("b.fpp", 2): 10}
239+
240+
241+
def test_cancellation_severity_empty():
242+
assert _cancellation_severity([]) == {}
243+
244+
227245
# --- scale-free pass/fail: significant bits retained ---
228246

229247

0 commit comments

Comments
 (0)