Skip to content

Commit caf7e38

Browse files
authored
Merge pull request #723 from PlanExeOrg/feat/napkin-math-explain-saturated-gate-exclusion
fix(napkin-math): saturated-gate note + DOOM severity wording + MARGINAL band bucketing
2 parents a401990 + 694e72f commit caf7e38

1 file changed

Lines changed: 81 additions & 13 deletions

File tree

experiments/napkin_math/summarize_assessment.py

Lines changed: 81 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -728,12 +728,22 @@ def render_decision_implications(mc: dict | None, params: dict | None) -> list[s
728728
f"({(1 - prob) * 100:.1f}%). External commitments built on this "
729729
f"gate are exposed."
730730
)
731-
else: # MARGINAL
732-
consequence = (
733-
f"The `{cond}` requirement passes {prob * 100:.1f}% of runs — "
734-
f"close enough to coin-flip that downstream commitments should "
735-
f"not assume it holds."
736-
)
731+
else: # MARGINAL (50–80% pass rate)
732+
# The MARGINAL band spans a large range. A 51% pass is genuinely
733+
# coin-flip; a 79% pass is one slip from ROBUST. Bucket the
734+
# wording so the language tracks the position within the band.
735+
if prob >= 0.70:
736+
consequence = (
737+
f"The `{cond}` requirement passes {prob * 100:.1f}% of runs — "
738+
f"just below the ROBUST band. The gate passes in most runs, "
739+
f"but downstream commitments should not treat it as secure."
740+
)
741+
else:
742+
consequence = (
743+
f"The `{cond}` requirement passes {prob * 100:.1f}% of runs — "
744+
f"close to coin-flip. Downstream commitments should not "
745+
f"assume it holds."
746+
)
737747
drivers = quartile.get(r["id"]) or []
738748
top = max(drivers, key=lambda d: abs(d.get("delta_pp", 0)), default=None)
739749
if is_saturated_failure(r["probability"], top):
@@ -835,17 +845,51 @@ def render_failure_drivers(mc: dict | None, params: dict | None) -> list[str]:
835845

836846
# ─── missing inputs ranked by impact ───────────────────────────────────────
837847

838-
def render_missing_inputs_ranked(mc: dict | None) -> list[str]:
848+
def saturated_doom_gates(mc: dict | None, params: dict | None) -> list[str]:
849+
"""Identify DOOM gates where no single quartile movement changes the
850+
outcome. These gates are absent from `missing_value_priority` (no input
851+
has them as its worst-affected gate, by construction). Naming them
852+
explicitly above the ranking removes the ambiguity a downstream reader
853+
would otherwise hit: "why is the ranking targeting other gates than the
854+
worst declared one?".
855+
"""
856+
if not mc:
857+
return []
858+
quartile = mc.get("quartile_analysis") or {}
859+
out: list[str] = []
860+
for r in threshold_entries(mc, params):
861+
if r["verdict"] != "DOOM":
862+
continue
863+
drivers = quartile.get(r["id"]) or []
864+
top = max(drivers, key=lambda d: abs(d.get("delta_pp", 0)), default=None)
865+
if is_saturated_failure(r["probability"], top):
866+
out.append(r["id"])
867+
return out
868+
869+
870+
def render_missing_inputs_ranked(mc: dict | None, params: dict | None = None) -> list[str]:
839871
if not mc or not mc.get("missing_value_priority"):
840872
return []
841873
rows = [
842874
"## Missing inputs ranked by impact",
843875
"",
844876
"The plan does not state these values; the model assumed bounds. Rank by `|Δ-pp on the worst-affected gate| × (1 − that gate's pass rate) × bound-width-ratio` — the higher, the more decision-value in pinning this input down.",
845877
"",
846-
"| Rank | Input | Worst-affected gate | Score | Bound width / base | Basis |",
847-
"|---:|---|---|---:|---:|---|",
848878
]
879+
saturated = saturated_doom_gates(mc, params)
880+
if saturated:
881+
gate_list = ", ".join(f"`{g}`" for g in saturated)
882+
if len(saturated) == 1:
883+
rows.append(
884+
f"Note: the saturated DOOM gate {gate_list} is absent from the ranking because no single missing-input restriction can lift its pass rate under current bounds. The inputs below target the next most decision-relevant non-saturated gates; the saturated gate needs a bounds or threshold-definition audit, not a single input fix."
885+
)
886+
else:
887+
rows.append(
888+
f"Note: the saturated DOOM gates {gate_list} are absent from the ranking because no single missing-input restriction can lift their pass rates under current bounds. The inputs below target the next most decision-relevant non-saturated gates; saturated gates need a bounds or threshold-definition audit, not a single input fix."
889+
)
890+
rows.append("")
891+
rows.append("| Rank | Input | Worst-affected gate | Score | Bound width / base | Basis |")
892+
rows.append("|---:|---|---|---:|---:|---|")
849893
for i, e in enumerate(mc["missing_value_priority"], 1):
850894
basis = BASIS_FROM_SOURCE.get(e["source"], e["source"])
851895
rows.append(
@@ -941,13 +985,37 @@ def render_suggested_next_actions(mc: dict | None, params: dict | None) -> list[
941985
"",
942986
]
943987
if failing:
988+
# Distinguish DOOM (<20%) from FRAGILE (20–50%) and name the worst
989+
# gate by id + pass rate. "N gates fail" by itself understates a
990+
# 0.0% DOOM the way "1 gate fails" understates total structural
991+
# failure.
992+
doom_sorted = sorted(
993+
(r for r in thresholds if r["verdict"] == "DOOM"),
994+
key=lambda r: r["probability"] if r["probability"] is not None else 1.0,
995+
)
996+
fragile_sorted = sorted(
997+
(r for r in thresholds if r["verdict"] == "FRAGILE"),
998+
key=lambda r: r["probability"] if r["probability"] is not None else 1.0,
999+
)
1000+
worst = doom_sorted[0] if doom_sorted else fragile_sorted[0]
1001+
worst_pct = (worst["probability"] or 0) * 100
1002+
band_parts = []
1003+
if doom_sorted:
1004+
band_parts.append(
1005+
f"{len(doom_sorted)} declared gate{'s' if len(doom_sorted) != 1 else ''} in the DOOM band"
1006+
)
1007+
if fragile_sorted:
1008+
band_parts.append(
1009+
f"{len(fragile_sorted)} in the FRAGILE band"
1010+
)
1011+
band_summary = "; ".join(band_parts)
9441012
rows.append(
945-
"1. To answer whether the plan is viable, lead with the gate verdicts above — not the source plan's narrative. "
946-
f"{len(failing)} gate(s) currently fail at the 50% pass-rate bar."
1013+
f"1. To answer whether the plan is viable, lead with the gate verdicts above — not the source plan's narrative. "
1014+
f"{band_summary}. Worst: `{worst['id']}` at {worst_pct:.1f}% pass rate under current bounds."
9471015
)
9481016
else:
9491017
rows.append(
950-
"1. To answer whether the plan is viable, lead with the gate verdicts above. No gate currently fails the 50% pass-rate bar — but read the bounds and trust boundaries before treating that as a green light."
1018+
"1. To answer whether the plan is viable, lead with the gate verdicts above. No declared gate is in the DOOM or FRAGILE band — but read the bounds and trust boundaries before treating that as a green light."
9511019
)
9521020
rows.append(
9531021
"2. To prioritise data-gathering, inspect `missing_value_priority` in `montecarlo.json`. The top-scored entries are the cheapest improvements to the simulation's predictive value."
@@ -1011,7 +1079,7 @@ def build_assessment(params: dict | None, bounds: dict | None,
10111079
render_gate_verdicts(mc, params),
10121080
render_decision_implications(mc, params),
10131081
render_failure_drivers(mc, params),
1014-
render_missing_inputs_ranked(mc),
1082+
render_missing_inputs_ranked(mc, params),
10151083
render_confidence_and_trust(mc, validation),
10161084
render_scenario_sanity_check(scenarios),
10171085
render_suggested_next_actions(mc, params),

0 commit comments

Comments
 (0)