Skip to content

Commit 1825dd9

Browse files
committed
fp-stability: rank cancellation by severity (bits lost), not count; resolve continuations
Two fixes prompted by review: (1) site COUNT is not severity — one catastrophic cancellation outweighs many mild ones; (2) attribution can land on a continuation fragment, so the labelled line was unclear. Severity: Verrou exposes no per-site bit-count, but --cc-threshold-double is itself a severity filter (a site is only reported if it lost >= threshold bits). A second pass at 26 bits identifies SEVERE sites with no false positives (may under-count). Severe sites are listed first and labelled; the count-by-file view is demoted with an explicit 'count != severity' caveat. On sod_standard this surfaces the real origins — flux divergence (m_rhs), divided differences and smoothness indicators (m_weno), HLLC wave speeds (m_riemann) — and correctly omits the time integrator. Continuations: _statement_bounds_in_lines follows free-form '&' continuations (leading or trailing) to the logical-statement start; cancellation sites are de-duplicated and displayed as the full statement at its canonical start line, so a hit on a fragment resolves to the whole expression. Pure helpers TDD'd (60 toolchain tests).
1 parent bc7e516 commit 1825dd9

2 files changed

Lines changed: 127 additions & 22 deletions

File tree

toolchain/mfc/fp_stability.py

Lines changed: 88 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,53 @@ def _macro_context(fname: str, lineno: int) -> str:
183183
return _macro_context_in_lines(lines, lineno)
184184

185185

186+
def _ends_with_continuation(line: str) -> bool:
187+
"""True if a free-form Fortran line ends with a continuation '&' (the last
188+
non-blank token before any trailing comment)."""
189+
code = line.split("!", 1)[0].rstrip() # drop trailing comment (string-'!' is rare; fine here)
190+
return code.endswith("&")
191+
192+
193+
def _statement_bounds_in_lines(lines: list, lineno: int) -> tuple:
194+
"""Return the (start, end) 1-based physical line range of the Fortran logical
195+
statement containing lineno, following '&' continuations in both directions.
196+
197+
A hit reported on a continuation fragment thus resolves to the whole
198+
statement, so the labelled location is the full expression rather than a
199+
mid-statement piece.
200+
"""
201+
n = len(lines)
202+
start = lineno
203+
while start > 1 and _ends_with_continuation(lines[start - 2]):
204+
start -= 1
205+
end = lineno
206+
while end < n and _ends_with_continuation(lines[end - 1]):
207+
end += 1
208+
return start, end
209+
210+
211+
def _statement_at(fname: str, lineno: int) -> tuple:
212+
"""File-backed (start, end, text) for the logical statement at fname:lineno;
213+
text is the joined statement. Returns (lineno, lineno, '') if unreadable."""
214+
if os.path.isabs(fname) and os.path.isfile(fname):
215+
candidates = [fname]
216+
else:
217+
candidates = glob.glob(os.path.join(MFC_ROOT_DIR, "src", "**", os.path.basename(fname)), recursive=True)
218+
if not candidates:
219+
return lineno, lineno, ""
220+
try:
221+
with open(candidates[0]) as fh:
222+
lines = fh.readlines()
223+
except OSError:
224+
return lineno, lineno, ""
225+
if not 0 < lineno <= len(lines):
226+
return lineno, lineno, ""
227+
start, end = _statement_bounds_in_lines(lines, lineno)
228+
# join physical lines, dropping the continuation '&' that may lead or trail each
229+
text = " ".join(line.strip().strip("&").strip() for line in lines[start - 1 : end])
230+
return start, end, text
231+
232+
186233
def _is_arithmetic_loc(fname: str, start: int, end: int) -> bool:
187234
"""Return True if any line in [start, end] contains non-trivial arithmetic.
188235
@@ -678,14 +725,22 @@ def _parse_vg_error_locs(log_path: str, error_keyword: str) -> list:
678725
return locs
679726

680727

681-
def _run_cancellation_check(case: dict, verrou_bin: str, sim_bin: str, work_dir: str) -> list:
682-
"""Run with --check-cancellation=yes; return [(fname, line)] of MFC cancellation sites."""
683-
run_dir = os.path.join(work_dir, "cancellation")
728+
# A site reported at this bit threshold has lost at least this many significant
729+
# bits to cancellation — a *severity* floor (Verrou only reports a site when it
730+
# exceeds the threshold, so a high-threshold pass has no false positives).
731+
CANCEL_SEVERE_BITS = 26
732+
733+
734+
def _run_cancellation_check(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, threshold: int = 10) -> list:
735+
"""Run --check-cancellation at the given bit threshold; return [(fname, line)]
736+
of MFC cancellation sites (subtractions losing >= `threshold` significant bits)."""
737+
tag = f"cancellation_{threshold}"
738+
run_dir = os.path.join(work_dir, tag)
684739
os.makedirs(run_dir, exist_ok=True)
685740
gen_path = os.path.join(run_dir, "cancel_gen.txt")
686741
flags = [
687742
"--check-cancellation=yes",
688-
"--cc-threshold-double=10",
743+
f"--cc-threshold-double={threshold}",
689744
f"--cc-gen-file={gen_path}",
690745
]
691746
try:
@@ -695,7 +750,7 @@ def _run_cancellation_check(case: dict, verrou_bin: str, sim_bin: str, work_dir:
695750
raw = _parse_cancel_gen(gen_path)
696751
filtered = [(f, ln) for f, ln in raw if _is_arithmetic_loc(f, ln, ln)]
697752
skipped = len(raw) - len(filtered)
698-
if skipped:
753+
if skipped and threshold == 10:
699754
cons.print(f" [dim]cancellation: filtered {skipped} control-flow boundary site(s)[/dim]")
700755
return filtered
701756

@@ -1277,6 +1332,7 @@ def _run_case(
12771332
"dd_line_confirmed": None,
12781333
"dd_line_confirm_dev": None,
12791334
"cancellation_locs": [],
1335+
"cancellation_severe": set(),
12801336
"mca_dev": None,
12811337
"mca_sigbits": None,
12821338
"float_max_locs": [],
@@ -1411,7 +1467,10 @@ def _run_case(
14111467
locs = _run_cancellation_check(case, verrou_bin, sim_bin, work_dir)
14121468
result["cancellation_locs"] = locs
14131469
if locs:
1414-
cons.print(f" cancellation: {len(locs)} unique source location(s)")
1470+
# severity pass: which sites lose >= CANCEL_SEVERE_BITS bits
1471+
severe = set(_run_cancellation_check(case, verrou_bin, sim_bin, work_dir, threshold=CANCEL_SEVERE_BITS))
1472+
result["cancellation_severe"] = severe
1473+
cons.print(f" cancellation: {len(locs)} site(s), {len(severe)} severe (>= {CANCEL_SEVERE_BITS} bits lost)")
14151474
else:
14161475
cons.print(" cancellation: none detected")
14171476
# cross-reference: label dd_line hotspots that sit on a cancellation site
@@ -1620,24 +1679,30 @@ def _emit_github_summary(results: list, n_samples: int):
16201679
if cases_with_cancel:
16211680
md.append("### Catastrophic cancellation sites\n")
16221681
md.append(
1623-
"> Where cancellation actually originates (subtraction of nearly-equal values). This is "
1624-
"the numerically interesting signal — and it usually differs from the sensitivity leader "
1625-
"above.\n"
1682+
"> Where cancellation actually originates (subtraction of nearly-equal values). "
1683+
f"**Severity = significant bits lost; severe = ≥ {CANCEL_SEVERE_BITS} bits.** Site *count* is "
1684+
"not severity — one severe site outweighs many mild ones, so the severe sites are listed "
1685+
"first. (Severe detection has no false positives but may under-count.)\n"
16261686
)
16271687
for r in cases_with_cancel:
1628-
by_file = _cancellation_by_file(r["cancellation_locs"])
1629-
density = ", ".join(f"`{f}` ({n})" for f, n in by_file[:6])
1630-
md.append(f"**`{r['name']}`** — {len(r['cancellation_locs'])} site(s); concentrates in: {density}\n")
1631-
for fname, lineno in r["cancellation_locs"][:15]:
1632-
md.append(f"- `{fname}:{lineno}`")
1633-
snippet = _get_source_context(fname, lineno)
1634-
if snippet:
1635-
md.append(" ```fortran")
1636-
for line in snippet.splitlines():
1637-
md.append(f" {line}")
1638-
md.append(" ```")
1639-
if len(r["cancellation_locs"]) > 15:
1640-
md.append(f"- _…and {len(r['cancellation_locs']) - 15} more site(s); see fp-stability-logs/_")
1688+
severe = r.get("cancellation_severe") or set()
1689+
# collapse continuation fragments to one entry per logical statement,
1690+
# severe statements first (the ones that matter)
1691+
stmts = {} # (basename, stmt_start) -> {where, severe, text}
1692+
for fname, lineno in sorted(r["cancellation_locs"]):
1693+
stmt_start, _end, stmt_text = _statement_at(fname, lineno)
1694+
key = (os.path.basename(fname), stmt_start)
1695+
entry = stmts.setdefault(key, {"where": f"{fname}:{stmt_start}", "severe": False, "text": stmt_text})
1696+
if (fname, lineno) in severe:
1697+
entry["severe"] = True
1698+
ordered = sorted(stmts.values(), key=lambda e: (not e["severe"], e["where"]))
1699+
n_severe_stmt = sum(1 for e in ordered if e["severe"])
1700+
md.append(f"**`{r['name']}`** — {len(stmts)} statement(s), " f"**{n_severe_stmt} severe (≥ {CANCEL_SEVERE_BITS} bits lost)**\n")
1701+
for e in ordered[:15]:
1702+
sev = " **severe**" if e["severe"] else ""
1703+
md.append(f"- `{e['where']}`{sev}" + (f" — `{e['text']}`" if e["text"] else ""))
1704+
if len(ordered) > 15:
1705+
md.append(f"- _…and {len(ordered) - 15} more statement(s); see fp-stability-logs/_")
16411706
md.append("")
16421707

16431708
# Float-max overflow sites
@@ -1746,6 +1811,7 @@ def fp_stability():
17461811
"dd_line_confirmed": None,
17471812
"dd_line_confirm_dev": None,
17481813
"cancellation_locs": [],
1814+
"cancellation_severe": set(),
17491815
"mca_dev": None,
17501816
"mca_sigbits": None,
17511817
"float_max_locs": [],

toolchain/mfc/test_fp_stability.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
_macro_context_in_lines,
1414
_mark_cancellation,
1515
_rank_locs,
16+
_statement_bounds_in_lines,
1617
)
1718

1819
# --- #2: fypp macro-expansion context detection ---
@@ -218,3 +219,41 @@ def test_cancellation_by_file_breaks_ties_by_name():
218219

219220
def test_cancellation_by_file_empty():
220221
assert _cancellation_by_file([]) == []
222+
223+
224+
# --- Fortran line-continuation handling (correct-line labeling) ---
225+
226+
227+
def test_statement_bounds_single_line():
228+
lines = [" a = b - c\n"]
229+
assert _statement_bounds_in_lines(lines, 1) == (1, 1)
230+
231+
232+
def test_statement_bounds_spans_continuation_from_first_line():
233+
lines = [" poly = (s_cb(i+3) - s_cb(i+1)) * &\n", " (s_cb(i+2) - s_cb(i))\n"]
234+
assert _statement_bounds_in_lines(lines, 1) == (1, 2)
235+
236+
237+
def test_statement_bounds_from_middle_continuation_line():
238+
# a hit on the continuation fragment must resolve to the statement start
239+
lines = [" x = a + &\n", " b + &\n", " c\n"]
240+
assert _statement_bounds_in_lines(lines, 2) == (1, 3)
241+
assert _statement_bounds_in_lines(lines, 3) == (1, 3)
242+
243+
244+
def test_statement_bounds_ignores_ampersand_in_trailing_comment_logic():
245+
# a real continuation '&' before a trailing comment still continues
246+
lines = [" x = a & ! note\n", " + b\n"]
247+
assert _statement_bounds_in_lines(lines, 1) == (1, 2)
248+
249+
250+
def test_statement_bounds_non_continuation_neighbors():
251+
lines = [" x = 1\n", " y = 2\n", " z = 3\n"]
252+
assert _statement_bounds_in_lines(lines, 2) == (2, 2)
253+
254+
255+
def test_statement_bounds_with_leading_ampersand_continuation():
256+
# the MFC WENO style: line ends with '&' and the next line *starts* with '&'
257+
lines = [" beta = x**2 &\n", " & + eps\n"]
258+
assert _statement_bounds_in_lines(lines, 1) == (1, 2)
259+
assert _statement_bounds_in_lines(lines, 2) == (1, 2)

0 commit comments

Comments
 (0)