Skip to content

Commit 372ddc3

Browse files
authored
Merge pull request #747 from PlanExeOrg/napkin-math/generate-bounds-phase4-runtime
napkin-math(bounds): Phase 4 runtime + schema readiness
2 parents 70048df + f958aa9 commit 372ddc3

4 files changed

Lines changed: 316 additions & 14 deletions

File tree

experiments/napkin_math/.claude/skills/generate-bounds/system-prompt.txt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,8 @@ sampling_discipline must be one of:
187187
- "integer" — countable units (people, households, days, kits, sites, …); downstream samplers round draws to the nearest integer and re-clamp to [low, high]
188188
- "fraction" — bounded in [0, 1]; downstream samplers clamp draws to that interval
189189
- "continuous" — real-valued; downstream samplers do not round or clamp beyond the [low, high] range
190+
- "lognormal" — fat-tailed real-valued draw whose [low, high] mean P5 / P95, not hard cutoffs. **Schema-reserved: the Monte Carlo runner accepts this value at validation time but raises NotImplementedError at sample time until Phase 8 lands the matching sampler.** Do not emit yet unless a follow-up rule (megaproject CAPEX default) explicitly directs you to.
191+
- "pert" — three-point modified-beta draw centered on base with low/high as the supports. **Schema-reserved with the same Phase-8 caveat as lognormal.** Do not emit yet.
190192

191193
Choose sampling_discipline by the variable's nature, not by lexical tokens in its id or unit. The downstream Monte Carlo runner does not pattern-match on unit strings; it reads sampling_discipline directly. There must be no fallback path that re-guesses the discipline.
192194

@@ -196,11 +198,13 @@ default_pass_probability:
196198
- For sampling_discipline "bernoulli_gate": must be a number in [0, 1]; this is the assumed pass probability when the caller does not override it
197199
- For every other sampling_discipline: must be null
198200

201+
A single optional top-level key `correlations` is reserved alongside the per-variable entries for declaring cross-variable correlation groups. **Schema-reserved with the same Phase-8 caveat as lognormal/pert: the runner preserves the key but does not yet apply correlated sampling.** Do not emit a `correlations` block yet; the detailed selection rules will land alongside the copula sampler.
202+
199203
Rules for the output:
200204

201205
- Output only variables selected by the rules above.
202206
- Do not invent ids.
203-
- Every top-level key must correspond to a declared id in key_values or missing_values_to_estimate.
207+
- Every top-level key must correspond to a declared id in key_values or missing_values_to_estimate (except for the reserved `correlations` key described above, which is not yet emitted).
204208
- Order keys by importance: critical-priority first, then high, then medium, then remaining missing_values_to_estimate not already placed.
205209
- rationale must be at most 50 words. The cap exists to discourage prose, not to suppress required disclosures: the named-anchor paraphrase required by ACTUAL-VS-COMMITMENT and the base-vs-threshold clause required by SANITY CHECK are exempt from the cap if they push the rationale past 50 words.
206210
- Split rationale on whitespace for word count; hyphenated and slash-joined tokens count as one word.

experiments/napkin_math/run_monte_carlo.py

Lines changed: 84 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,26 @@
3535
"correlation_groups": [],
3636
}
3737

38-
VALID_DISCIPLINES = {"fixed", "bernoulli_gate", "integer", "fraction", "continuous"}
38+
VALID_DISCIPLINES = {
39+
"fixed", "bernoulli_gate", "integer", "fraction", "continuous",
40+
# Phase 4 reserved; sampler raises NotImplementedError until Phase 8
41+
# implements the matching samplers. Validation passes so that
42+
# generate-bounds can begin emitting these for plan_type-driven
43+
# megaproject CAPEX without a downstream schema error.
44+
"lognormal", "pert",
45+
}
46+
47+
# Disciplines whose schema is reserved but whose sampler is not yet
48+
# implemented. ``sample_one`` raises ``NotImplementedError`` loudly so a
49+
# user trying to run Monte Carlo against bounds that use them gets an
50+
# explicit failure rather than a silent fall-back to triangular.
51+
UNIMPLEMENTED_DISCIPLINES = frozenset({"lognormal", "pert"})
52+
53+
# Top-level keys in ``bounds.json`` that are not per-variable bound
54+
# entries. ``correlations`` carries the optional R1.3 declared-correlation
55+
# groups; it is preserved as-is through ``strip_threshold_bounds`` and
56+
# never reaches per-variable validation or sampling.
57+
RESERVED_TOP_LEVEL_KEYS = frozenset({"correlations"})
3958

4059
THRESHOLD_SUFFIXES = (
4160
"_threshold", "_target", "_ceiling", "_floor",
@@ -135,10 +154,30 @@ def _collect_formula_threshold_ids(parameters: dict) -> set[str]:
135154
return threshold_ids
136155

137156

157+
def _collect_calculation_output_names(parameters: dict) -> set[str]:
158+
"""``output_name`` values declared by any calculation in
159+
``recommended_first_calculations`` or ``derived_questions``. These
160+
names refer to *computed* quantities — a bound on the output would
161+
sample the aggregate independently of its formula's named inputs,
162+
so a single Monte Carlo trial can pair sub-component p95s with a
163+
total p05 (Gemini R1.1, disconnected aggregates).
164+
"""
165+
out: set[str] = set()
166+
for src in ("recommended_first_calculations", "derived_questions"):
167+
for entry in parameters.get(src, []) or []:
168+
if not isinstance(entry, dict):
169+
continue
170+
name = entry.get("output_name")
171+
if isinstance(name, str) and name:
172+
out.add(name)
173+
return out
174+
175+
138176
def strip_threshold_bounds(
139177
bounds: dict, parameters: dict,
140178
) -> tuple[dict, list[dict]]:
141-
"""Remove bounds entries for threshold/target variables.
179+
"""Remove bounds entries for threshold/target variables and for
180+
variables that are calculation outputs.
142181
143182
Bounds entries on threshold variables silently change what `pass_rate`
144183
measures from "does actual >= stated_threshold" to
@@ -147,28 +186,41 @@ def strip_threshold_bounds(
147186
`collect_input_specs` does this automatically when the threshold variable
148187
is absent from bounds. This function enforces that absence.
149188
189+
Bounds entries on calculation outputs (variables that appear as a
190+
declared ``output_name`` in ``recommended_first_calculations`` or
191+
``derived_questions``) would let a single Monte Carlo trial pair
192+
sub-component p95s with a total p05; the output is computed from its
193+
inputs, not sampled.
194+
150195
Deterministic backstop for the rule documented in
151196
`.claude/skills/generate-bounds/system-prompt.txt`. The LLM is asked to
152-
skip threshold variables but does not reliably do so when parameter-JSON
153-
metadata (medium uncertainty + critical/high priority) signals strongly
154-
to include them.
197+
skip threshold and aggregate variables but does not reliably do so when
198+
parameter-JSON metadata (medium uncertainty + critical/high priority)
199+
signals strongly to include them.
155200
156201
Identification:
157202
1. id suffix match against `THRESHOLD_SUFFIXES`
158203
2. id appears as the non-`actual_` operand in a binary-subtraction
159204
margin formula declared in `recommended_first_calculations` or
160205
`derived_questions`
206+
3. id matches the ``output_name`` of any declared calculation
161207
162-
Variables prefixed `actual_` are never stripped.
208+
Variables prefixed `actual_` are never stripped. Reserved top-level
209+
keys (currently ``correlations``) pass through unchanged so the
210+
optional correlations block survives this pre-processor.
163211
164212
Returns ``(cleaned_bounds, stripped)``. ``stripped`` is an ordered list
165-
of ``{"id": str, "reason": "suffix" | "formula-side"}`` records. The
166-
input ``bounds`` is not mutated.
213+
of ``{"id": str, "reason": "suffix" | "formula-side" | "calculation-output"}``
214+
records. The input ``bounds`` is not mutated.
167215
"""
168216
formula_threshold_ids = _collect_formula_threshold_ids(parameters)
217+
calculation_output_names = _collect_calculation_output_names(parameters)
169218
cleaned: dict = {}
170219
stripped: list[dict] = []
171220
for bound_id, bound in bounds.items():
221+
if bound_id in RESERVED_TOP_LEVEL_KEYS:
222+
cleaned[bound_id] = bound
223+
continue
172224
if bound_id.startswith("actual_"):
173225
cleaned[bound_id] = bound
174226
continue
@@ -178,6 +230,9 @@ def strip_threshold_bounds(
178230
if bound_id in formula_threshold_ids:
179231
stripped.append({"id": bound_id, "reason": "formula-side"})
180232
continue
233+
if bound_id in calculation_output_names:
234+
stripped.append({"id": bound_id, "reason": "calculation-output"})
235+
continue
181236
cleaned[bound_id] = bound
182237
return cleaned, stripped
183238

@@ -226,6 +281,15 @@ def sample_one(rng: np.random.Generator, bound: dict, distribution_default: str,
226281
p = gate_probabilities.get(var_id, bound["default_pass_probability"])
227282
return high if rng.random() < p else low
228283

284+
if discipline in UNIMPLEMENTED_DISCIPLINES:
285+
raise NotImplementedError(
286+
f"bound '{var_id}' has sampling_discipline {discipline!r}; "
287+
f"the matching sampler is not yet implemented in run_monte_carlo. "
288+
f"This value is accepted by the schema (Phase 4) but the sampler "
289+
f"lands in Phase 8 — switch to triangular/continuous in bounds.json "
290+
f"or wait for Phase 8."
291+
)
292+
229293
if low == high:
230294
val = low
231295
elif distribution_default == "uniform":
@@ -427,11 +491,18 @@ def run(params_path: Path, bounds_path: Path, calc_path: Path,
427491

428492
bounds, stripped_threshold_bounds = strip_threshold_bounds(bounds, params)
429493
for entry in stripped_threshold_bounds:
430-
warnings_text.append(
431-
f"stripped threshold variable '{entry['id']}' from bounds "
432-
f"(reason: {entry['reason']}); simulation will use the stated value "
433-
f"from parameters.json"
434-
)
494+
if entry["reason"] == "calculation-output":
495+
warnings_text.append(
496+
f"stripped calculation output '{entry['id']}' from bounds; "
497+
f"simulation will compute it from calculations.py instead of "
498+
f"sampling it independently"
499+
)
500+
else:
501+
warnings_text.append(
502+
f"stripped threshold variable '{entry['id']}' from bounds "
503+
f"(reason: {entry['reason']}); simulation will use the stated value "
504+
f"from parameters.json"
505+
)
435506

436507
input_specs = collect_input_specs(params, bounds)
437508
plan, plan_warnings = build_calculation_plan(params, module)

experiments/napkin_math/tests/test_run_monte_carlo.py

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,136 @@ def test_continuous_with_negatives_allowed(self):
186186
self.assertTrue(any(v < 0 for v in out))
187187
self.assertTrue(all(-10 <= v <= 10 for v in out))
188188

189+
def test_lognormal_passes_schema_validation(self):
190+
"""The schema accepts lognormal so that generate-bounds can begin
191+
emitting it (Phase 4 readiness for the megaproject CAPEX default
192+
that lands in the prompt-side follow-up)."""
193+
bound = make_bound(unit="EUR", low=1e6, base=5e6, high=2e7,
194+
sampling_discipline="lognormal")
195+
rmc.validate_bound("capex", bound)
196+
197+
def test_pert_passes_schema_validation(self):
198+
bound = make_bound(unit="EUR", low=1e6, base=5e6, high=2e7,
199+
sampling_discipline="pert")
200+
rmc.validate_bound("opex", bound)
201+
202+
def test_lognormal_sampler_raises_not_implemented(self):
203+
"""Sampling raises loudly until Phase 8 lands the sampler. A silent
204+
fall-back to triangular would let the user see "100% Robust" on a
205+
megaproject whose CAPEX bounds are actually fat-tailed — exactly
206+
the megaproject illusion Phase 4 is laying groundwork to fix."""
207+
bound = make_bound(unit="EUR", low=1e6, base=5e6, high=2e7,
208+
sampling_discipline="lognormal")
209+
with self.assertRaises(NotImplementedError) as ctx:
210+
rmc.sample_one(self.rng, bound, "triangular", {}, "capex")
211+
self.assertIn("lognormal", str(ctx.exception))
212+
self.assertIn("Phase 8", str(ctx.exception))
213+
214+
def test_pert_sampler_raises_not_implemented(self):
215+
bound = make_bound(unit="EUR", low=1e6, base=5e6, high=2e7,
216+
sampling_discipline="pert")
217+
with self.assertRaises(NotImplementedError) as ctx:
218+
rmc.sample_one(self.rng, bound, "triangular", {}, "opex")
219+
self.assertIn("pert", str(ctx.exception))
220+
221+
222+
class TestStrippedBoundsWarnings(unittest.TestCase):
223+
"""Reason-branched warning text for ``strip_threshold_bounds`` results.
224+
225+
Review feedback on PR #747: the original warning said every stripped
226+
item was a "threshold variable" whose value would be read from
227+
parameters.json — false for calculation outputs, which are computed
228+
from calculations.py instead of read as a stated value.
229+
"""
230+
231+
def test_calculation_output_warning_mentions_calculations_py(self):
232+
with tempfile.TemporaryDirectory() as td:
233+
tmpdir = Path(td)
234+
calc = (
235+
"def total(a: float, b: float) -> float:\n"
236+
" return a + b\n"
237+
)
238+
bound_a = make_bound(unit="EUR", low=1, base=2, high=3,
239+
sampling_discipline="continuous")
240+
bound_b = make_bound(unit="EUR", low=4, base=5, high=6,
241+
sampling_discipline="continuous")
242+
bound_total = make_bound(unit="EUR", low=10, base=20, high=30,
243+
sampling_discipline="continuous")
244+
result = run_with_fixture(
245+
tmpdir,
246+
missing_values=[
247+
{"id": "a", "label": "a", "unit": "EUR",
248+
"why_needed": "x", "suggested_estimation_method": "x"},
249+
{"id": "b", "label": "b", "unit": "EUR",
250+
"why_needed": "x", "suggested_estimation_method": "x"},
251+
],
252+
recommended=[{
253+
"id": "c_total", "label": "total",
254+
"formula_hint": "total = a + b",
255+
"output_name": "total", "output_unit": "EUR",
256+
"depends_on": ["a", "b"], "why_first": "x",
257+
}],
258+
bounds={"a": bound_a, "b": bound_b, "total": bound_total},
259+
calc_source=calc,
260+
_settings={"n_runs": 100, "seed": 1},
261+
)
262+
263+
stripped = result["bounds_post_processor"]["stripped_threshold_ids"]
264+
self.assertEqual(stripped, [{"id": "total", "reason": "calculation-output"}])
265+
266+
messages = [w["message"] for w in result["warnings"]]
267+
calc_warnings = [m for m in messages if "calculation output" in m]
268+
self.assertEqual(len(calc_warnings), 1, msg=messages)
269+
warning = calc_warnings[0]
270+
self.assertIn("'total'", warning)
271+
self.assertIn("calculations.py", warning)
272+
# The threshold-variable phrasing must NOT appear for a
273+
# calculation-output strip — that wording is reserved for
274+
# suffix/formula-side strips where the value really does come
275+
# from parameters.json.
276+
self.assertNotIn("threshold variable", warning)
277+
self.assertNotIn("stated value", warning)
278+
279+
def test_threshold_strip_warning_unchanged(self):
280+
"""Sibling guard: the original threshold-variable wording is
281+
preserved for suffix/formula-side strips."""
282+
with tempfile.TemporaryDirectory() as td:
283+
tmpdir = Path(td)
284+
calc = "def out(x: float) -> float:\n return x\n"
285+
bound_x = make_bound(low=0.1, base=0.2, high=0.3,
286+
sampling_discipline="fraction")
287+
bound_threshold = make_bound(low=0.4, base=0.5, high=0.6,
288+
sampling_discipline="fraction")
289+
result = run_with_fixture(
290+
tmpdir,
291+
missing_values=[{"id": "x", "label": "x", "unit": "fraction",
292+
"why_needed": "x", "suggested_estimation_method": "x"}],
293+
key_values=[{
294+
"id": "x_threshold", "label": "threshold",
295+
"category": "operational", "value_type": "explicit",
296+
"unit": "fraction", "value": 0.5,
297+
"comment": "x", "formula_hint": None,
298+
"output_name": None, "output_unit": None,
299+
"depends_on": [], "modelling_priority": "high",
300+
"uncertainty": "low", "source_text": "x",
301+
}],
302+
recommended=[{
303+
"id": "c", "label": "c",
304+
"formula_hint": "out = x",
305+
"output_name": "out", "output_unit": "fraction",
306+
"depends_on": ["x"], "why_first": "x",
307+
}],
308+
bounds={"x": bound_x, "x_threshold": bound_threshold},
309+
calc_source=calc,
310+
_settings={"n_runs": 100, "seed": 1},
311+
)
312+
313+
messages = [w["message"] for w in result["warnings"]]
314+
threshold_warnings = [m for m in messages if "threshold variable" in m]
315+
self.assertEqual(len(threshold_warnings), 1, msg=messages)
316+
self.assertIn("'x_threshold'", threshold_warnings[0])
317+
self.assertIn("stated value", threshold_warnings[0])
318+
189319

190320
class TestSchemaValidation(unittest.TestCase):
191321
def test_missing_sampling_discipline(self):

0 commit comments

Comments
 (0)