Skip to content

Commit b9e790f

Browse files
committed
fp-stability: scale-free pass/fail via significant bits, replacing 6 hand-tuned thresholds
Each case had a hand-tuned absolute L-inf threshold spanning 1e-13..2e-7 (six orders), driven by field magnitude and conditioning. Maintaining per-case thresholds is fragile. Normalizing the deviation by the field's peak magnitude removes the scale, so a single global criterion suffices. Pass/fail is now sig_bits = -log2(max_dev / max|ref|) >= MIN_SIG_BITS (24 = single precision retained under random rounding). The per-case 'threshold' field is removed from CASES; pass/fail, the VPREC FAIL marker, console, summary table, and inline annotations all report bits-retained vs the floor. The dd_sym/dd_line oracle keeps its own float-proxy-derived threshold (unchanged). Validated: max_dev spans 1e-14..7e-8 across the 6 cases but sig_bits is a tight 30.3..48.7 band, all >= 24 with margin; classification matches the prior thresholds (6/6 pass). Pure _sig_bits/_stability_pass are TDD'd (67 toolchain tests). A per-case auto-measured baseline + regression delta would add sensitivity for moderate drops; deferred as a heavier change.
1 parent 1825dd9 commit b9e790f

2 files changed

Lines changed: 98 additions & 25 deletions

File tree

toolchain/mfc/fp_stability.py

Lines changed: 58 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
Features
55
--------
66
A. Stability suite (always)
7-
N random-rounding samples per case, threshold-based PASS/FAIL.
7+
N random-rounding samples per case; PASS/FAIL on significant bits retained
8+
(scale-free: -log2(max_dev/scale) vs one global floor, no per-case threshold).
89
910
B. Float proxy (--no-float-proxy to skip)
1011
One run with --rounding-mode=float — deterministic proxy for
@@ -92,6 +93,35 @@
9293
# 52 = full double, 23 = single, 16 = half-ish, 10 = ultra-low.
9394
VPREC_MANTISSA_BITS = [52, 23, 16, 10]
9495

96+
# Stability pass/fail (stage A) is scale-free: a case must retain at least this
97+
# many significant bits under random rounding (sig_bits = -log2(max_dev/scale)).
98+
# 24 ~= single precision. One global floor replaces per-case absolute thresholds
99+
# (which spanned 6 orders of magnitude purely from field scale + conditioning);
100+
# normalising by the field scale collapses that, so a single number suffices.
101+
MIN_SIG_BITS = 24
102+
103+
# Fallback absolute threshold for the dd_sym/dd_line oracle when no float-proxy-
104+
# derived threshold is supplied (callers always pass one, so this is only a guard).
105+
_DD_FALLBACK_THRESHOLD = 1e-12
106+
107+
108+
def _sig_bits(max_dev: float, ref_scale: float) -> float:
109+
"""Significant bits retained = -log2(max_dev / ref_scale).
110+
111+
Scale-free: dividing the deviation by the field's peak magnitude removes the
112+
absolute scale, leaving only the conditioning. Zero deviation (or zero
113+
scale) returns 53.0 = full double precision retained.
114+
"""
115+
if not (max_dev > 0) or not (ref_scale > 0):
116+
return 53.0
117+
return -math.log2(max_dev / ref_scale)
118+
119+
120+
def _stability_pass(max_dev: float, ref_scale: float, floor: float) -> bool:
121+
"""A case passes when it retains at least `floor` significant bits."""
122+
return _sig_bits(max_dev, ref_scale) >= floor
123+
124+
95125
# Matches "path/file.f90:123" or "path/file.fpp:123-456" in dd_line rddmin_summary.
96126
_LOC_RE = re.compile(r"(\S+\.(?:f90|fpp|c|cpp|h|F90))\s*:(\d+)(?:-(\d+))?", re.IGNORECASE)
97127

@@ -341,16 +371,16 @@ def _merge(*dicts):
341371
# name - unique identifier used in log paths and console output
342372
# description - human-readable summary
343373
# compare - D/ output files compared between reference and perturbed runs
344-
# threshold - max L∞ deviation allowed before the case is declared FAIL
345374
# ill_cond - known source of cancellation (empty string = none expected)
375+
# Pass/fail is scale-free (>= MIN_SIG_BITS significant bits retained), so cases
376+
# need no per-case deviation threshold regardless of field magnitude.
346377
# pre - parameters for pre_process (generates initial conditions)
347378
# sim - parameters for simulation
348379
CASES = [
349380
{
350381
"name": "sod_standard",
351382
"description": "1-D standard Sod, p_L/p_R=10, ideal gas (well-conditioned baseline)",
352383
"compare": ["cons.1.00.000050.dat", "cons.3.00.000050.dat"],
353-
"threshold": 1e-13,
354384
"ill_cond": "",
355385
"pre": _merge(
356386
_BASE_PRE,
@@ -373,7 +403,6 @@ def _merge(*dicts):
373403
"name": "sod_strong",
374404
"description": "1-D Sod, p_L/p_R=100,000, ideal gas",
375405
"compare": ["cons.1.00.000050.dat", "cons.3.00.000050.dat"],
376-
"threshold": 1e-10,
377406
"ill_cond": "HLLC xi factor: (s_L - vel_L)/(s_L - s_S) cancels near sonic contact",
378407
"pre": _merge(
379408
_BASE_PRE,
@@ -396,8 +425,7 @@ def _merge(*dicts):
396425
"name": "water_stiffened",
397426
"description": "1-D water shock, stiffened EOS (pi_inf=4046)",
398427
"compare": ["cons.1.00.000050.dat", "prim.3.00.000050.dat"],
399-
"threshold": 1e-8,
400-
"ill_cond": "Pressure recovery: p=(E-pi_inf)/gamma loses ~4 digits (pi_inf/p_right~40,000) [threshold loosened until reduced-energy (Etilde) scheme is merged]",
428+
"ill_cond": "Pressure recovery: p=(E-pi_inf)/gamma loses ~4 digits (pi_inf/p_right~40,000)",
401429
"pre": _merge(
402430
_BASE_PRE,
403431
_WATER_EOS,
@@ -419,7 +447,6 @@ def _merge(*dicts):
419447
"name": "air_water_interface",
420448
"description": "1-D air/water isobaric contact (two-fluid, pi_inf=4046)",
421449
"compare": ["cons.1.00.000050.dat", "cons.4.00.000050.dat", "cons.5.00.000050.dat"],
422-
"threshold": 1e-10,
423450
"ill_cond": "Mixed-cell pressure recovery: E-alpha_w*gamma_w*pi_inf cancels when alpha_w<<1",
424451
"pre": _merge(
425452
_BASE_PRE,
@@ -460,7 +487,6 @@ def _merge(*dicts):
460487
"name": "bubble_rp",
461488
"description": "1-D bubbly water, pressure step 2:1 driving Rayleigh-Plesset oscillations (nb=1, Keller-Miksis)",
462489
"compare": ["cons.1.00.000050.dat", "prim.3.00.000050.dat"],
463-
"threshold": 1e-8,
464490
"ill_cond": "RP ODE: (p_bub - p_ext) cancels near bubble equilibrium",
465491
"pre": _merge(
466492
_BASE_PRE,
@@ -528,8 +554,7 @@ def _merge(*dicts):
528554
"name": "low_mach",
529555
"description": "1-D water shock with low_Mach=1 HLLC correction active",
530556
"compare": ["cons.1.00.000050.dat", "prim.3.00.000050.dat"],
531-
"threshold": 2e-7,
532-
"ill_cond": "low_Mach correction: velocity perturbation ~u/c cancels severely at M≈0 (threshold loosened to 2e-7 to absorb MCA sampling variance)",
557+
"ill_cond": "low_Mach correction: velocity perturbation ~u/c cancels severely at M≈0",
533558
"pre": _merge(
534559
_BASE_PRE,
535560
_WATER_EOS,
@@ -1121,7 +1146,7 @@ def _run_dd_sym(case: dict, verrou_bin: str, sim_bin: str, work_dir: str, log_di
11211146
dd_run_sh = os.path.join(dd_dir, "dd_run.sh")
11221147
dd_cmp_py = os.path.join(dd_dir, "dd_cmp.py")
11231148
_write_dd_run_sh(dd_run_sh, verrou_bin, sim_bin, work_dir)
1124-
_write_dd_cmp_py(dd_cmp_py, case["compare"], threshold if threshold is not None else case["threshold"])
1149+
_write_dd_cmp_py(dd_cmp_py, case["compare"], threshold if threshold is not None else _DD_FALLBACK_THRESHOLD)
11251150
_run_dd_tool(dd_bin, dd_dir, dd_run_sh, dd_cmp_py, _dd_env(verrou_bin), "dd_sym.log", "dd.sym", "verrou_dd_sym")
11261151
cons.print(f" [dim]dd_sym logs: {dd_dir}[/dim]")
11271152
return _parse_rddmin_syms(os.path.join(dd_dir, "dd.sym", "rddmin_summary"))
@@ -1145,7 +1170,7 @@ def _run_dd_line(
11451170
os.makedirs(dd_dir, exist_ok=True)
11461171
dd_run_sh = os.path.join(dd_dir, "dd_run.sh")
11471172
dd_cmp_py = os.path.join(dd_dir, "dd_cmp.py")
1148-
effective_threshold = threshold if threshold is not None else case["threshold"]
1173+
effective_threshold = threshold if threshold is not None else _DD_FALLBACK_THRESHOLD
11491174
_write_dd_run_sh(dd_run_sh, verrou_bin, sim_bin, work_dir)
11501175
_write_dd_cmp_py(dd_cmp_py, case["compare"], effective_threshold)
11511176
_run_dd_tool(dd_bin, dd_dir, dd_run_sh, dd_cmp_py, _dd_env(verrou_bin), "dd_line.log", "dd.line", "verrou_dd_line")
@@ -1310,21 +1335,20 @@ def _run_case(
13101335
prec_sim_bin: str = None,
13111336
) -> dict:
13121337
name = case["name"]
1313-
threshold = case["threshold"]
13141338
compare = case["compare"]
13151339

13161340
cons.print(f"[bold]{name}[/bold]: {case['description']}")
13171341
cons.indent()
13181342
if case["ill_cond"]:
13191343
cons.print(f" ill-conditioning: {case['ill_cond']}")
1320-
cons.print(f" threshold: {threshold:.0e}")
1344+
cons.print(f" pass floor: >= {MIN_SIG_BITS} significant bits retained")
13211345

13221346
work_dir = tempfile.mkdtemp(prefix=f"mfc-fps-{name}-")
13231347
result = {
13241348
"name": name,
13251349
"passed": False,
13261350
"max_dev": float("inf"),
1327-
"threshold": threshold,
1351+
"sig_bits": None,
13281352
"float_proxy": None,
13291353
"vprec": [],
13301354
"dd_sym_syms": [],
@@ -1348,6 +1372,9 @@ def _run_case(
13481372
_run_simulation_verrou(verrou_bin, sim_bin, work_dir, ref_dir, rounding_mode="nearest")
13491373

13501374
# --- A: random-rounding stability samples ---
1375+
# Pass/fail is scale-free: bits retained = -log2(max_dev / field-scale),
1376+
# vs one global floor (no per-case hand-tuned absolute threshold).
1377+
ref_scale = _max_abs_np(ref_dir, compare)
13511378
max_dev = 0.0
13521379
cons.print(f" [dim]random-rounding runs (N={n_samples})...[/dim]")
13531380
for i in range(n_samples):
@@ -1356,11 +1383,13 @@ def _run_case(
13561383
_run_simulation_verrou(verrou_bin, sim_bin, work_dir, run_dir, rounding_mode="random")
13571384
max_dev = max(max_dev, _max_diff_np(ref_dir, run_dir, compare))
13581385

1359-
passed = max_dev <= threshold
1386+
sig_bits = _sig_bits(max_dev, ref_scale)
1387+
passed = sig_bits >= MIN_SIG_BITS
13601388
result["passed"] = passed
13611389
result["max_dev"] = max_dev
1390+
result["sig_bits"] = sig_bits
13621391
tag = "[bold green]PASS[/bold green]" if passed else "[bold red]FAIL[/bold red]"
1363-
cons.print(f" {tag} max_dev={max_dev:.3e} threshold={threshold:.0e}")
1392+
cons.print(f" {tag} {sig_bits:.1f} bits retained (floor {MIN_SIG_BITS}) max_dev={max_dev:.3e}")
13641393

13651394
# --- B: float proxy ---
13661395
if run_float:
@@ -1383,7 +1412,7 @@ def _run_case(
13831412
marker = ""
13841413
if dev == float("inf"):
13851414
marker = " [red]crashed[/red]"
1386-
elif dev > threshold:
1415+
elif _sig_bits(dev, ref_scale) < MIN_SIG_BITS:
13871416
marker = " [red]FAIL[/red]"
13881417
cons.print(f" {bits:2d} bits{label_str}: dev={dev:.3e}{marker}")
13891418

@@ -1531,7 +1560,9 @@ def _emit_github_annotations(results: list):
15311560
return
15321561
for r in results:
15331562
status = "FAIL" if not r["passed"] else "sensitivity"
1534-
dev_str = f"max_dev={r['max_dev']:.2e} (threshold {r['threshold']:.0e})"
1563+
_sb = r.get("sig_bits")
1564+
_sb_str = f"{_sb:.0f} bits retained (floor {MIN_SIG_BITS})" if _sb is not None else "n/a"
1565+
dev_str = f"{_sb_str}, max_dev={r['max_dev']:.2e}"
15351566
unconfirmed = r.get("dd_line_confirmed") is False
15361567

15371568
for loc in r.get("dd_line_locs", [])[:3]:
@@ -1588,17 +1619,19 @@ def _emit_github_summary(results: list, n_samples: int):
15881619
"they do not reach.\n"
15891620
)
15901621

1591-
# Main results table
1592-
md.append("| Case | Status | max\\_dev | threshold | Float proxy | MCA sig bits |")
1593-
md.append("|------|:------:|--------:|--------:|--------:|:------:|")
1622+
# Main results table — pass/fail is scale-free: bits retained vs a single floor
1623+
md.append(f"_Pass = at least **{MIN_SIG_BITS} significant bits** retained under random rounding (scale-free; no per-case threshold)._\n")
1624+
md.append("| Case | Status | bits retained | max\\_dev | Float proxy | MCA sig bits |")
1625+
md.append("|------|:------:|:------:|--------:|--------:|:------:|")
15941626
for r in results:
15951627
status = "✅" if r["passed"] else "❌"
1628+
bits = f"{r['sig_bits']:.1f}" if r.get("sig_bits") is not None else "—"
15961629
fp = f"{r['float_proxy']:.2e}" if r["float_proxy"] is not None else "—"
15971630
sb = str(r["mca_sigbits"]) if r.get("mca_sigbits") is not None else "—"
1598-
md.append(f"| `{r['name']}` | {status} | {r['max_dev']:.2e} | {r['threshold']:.0e} | {fp} | {sb} |")
1631+
md.append(f"| `{r['name']}` | {status} | {bits} / {MIN_SIG_BITS} | {r['max_dev']:.2e} | {fp} | {sb} |")
15991632
md.append("")
16001633

1601-
# VPREC sweep — one column per bit level, ❌ where dev > threshold
1634+
# VPREC sweep — one column per bit level, ❌ where bits retained < floor
16021635
if any(r["vprec"] for r in results):
16031636
_labels = {52: "52b", 23: "23b", 16: "16b", 10: "10b"}
16041637
header = " | ".join(_labels[b] for b in VPREC_MANTISSA_BITS)
@@ -1803,7 +1836,7 @@ def fp_stability():
18031836
"name": case["name"],
18041837
"passed": False,
18051838
"max_dev": float("inf"),
1806-
"threshold": case["threshold"],
1839+
"sig_bits": None,
18071840
"float_proxy": None,
18081841
"vprec": [],
18091842
"dd_sym_syms": [],

toolchain/mfc/test_fp_stability.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,15 @@
77
"""
88

99
from mfc.fp_stability import (
10+
MIN_SIG_BITS,
1011
_build_source_filter,
1112
_cancellation_by_file,
1213
_confirm_decision,
1314
_macro_context_in_lines,
1415
_mark_cancellation,
1516
_rank_locs,
17+
_sig_bits,
18+
_stability_pass,
1619
_statement_bounds_in_lines,
1720
)
1821

@@ -221,6 +224,43 @@ def test_cancellation_by_file_empty():
221224
assert _cancellation_by_file([]) == []
222225

223226

227+
# --- scale-free pass/fail: significant bits retained ---
228+
229+
230+
def test_sig_bits_relative_deviation():
231+
# max_dev/ref_scale = 1e-14 -> ~46.5 retained bits
232+
assert 46 < _sig_bits(1e-14, 1.0) < 47
233+
234+
235+
def test_sig_bits_is_scale_free():
236+
# same relative deviation -> same bits regardless of absolute magnitude
237+
assert abs(_sig_bits(1e-9, 1.0) - _sig_bits(1e-4, 1e5)) < 1e-9
238+
239+
240+
def test_sig_bits_zero_deviation_is_full_precision():
241+
assert _sig_bits(0.0, 1.0) == 53.0
242+
243+
244+
def test_sig_bits_zero_scale_is_safe():
245+
assert _sig_bits(1e-12, 0.0) == 53.0
246+
247+
248+
def test_sig_bits_deviation_at_scale_is_unstable():
249+
# deviation as large as the field -> <= 0 retained bits
250+
assert _sig_bits(1.0, 1.0) <= 0.0
251+
252+
253+
def test_stability_pass_uses_global_floor():
254+
# well-conditioned: ~46 bits >= floor
255+
assert _stability_pass(1e-14, 1.0, MIN_SIG_BITS) is True
256+
# catastrophic: deviation at field scale -> fails
257+
assert _stability_pass(0.5, 1.0, MIN_SIG_BITS) is False
258+
259+
260+
def test_min_sig_bits_is_single_precision_floor():
261+
assert MIN_SIG_BITS == 24
262+
263+
224264
# --- Fortran line-continuation handling (correct-line labeling) ---
225265

226266

0 commit comments

Comments
 (0)