sriumcp
diff --git a/‎orchestrator/iteration_mode.py‎
Lines changed: 103 additions & 5 deletions b/‎orchestrator/iteration_mode.py‎
Lines changed: 103 additions & 5 deletions
diff --git a/‎orchestrator/llm_dispatch.py‎
Lines changed: 109 additions & 0 deletions b/‎orchestrator/llm_dispatch.py‎
Lines changed: 109 additions & 0 deletions
@@ -81,10 +81,15 @@ def iteration_mode_for(campaign: dict, iteration: int) -> Mode:
 If you find any campaign-spec or brief inconsistencies (paths the
 validator rejects, broken argv quoting, wall-time claims that don't
 match reality, single-tenant probes when the target requires multi-
-tenant, etc.), write them to ``runs/iter-N/brief_amendments.md`` —
-one entry per finding, with file path + suggested change. The next
-``real`` iteration will read this; future runs of the same campaign
-will benefit indefinitely.
+tenant, etc.), write them to
+``runs/iter-N/inputs/brief_amendments.jsonl`` as one structured JSON
+object per line. Required fields: ``id`` (pattern ``BA-N``),
+``brief_section``, ``problem``, ``fix``, ``priority`` (one of
+``BLOCKING``, ``HIGH``, ``MEDIUM``, ``LOW``, ``INFO``). Optional
+``evidence``, ``impact``. Schema:
+``orchestrator/schemas/brief_amendments.schema.json``. The promote
+gate, the REPORT extractor, and the future ``apply-amendments`` CLI
+all read this structured form.
 
 **Do NOT:**
 - Author full multi-arm bundles. Keep arms minimal.
@@ -105,7 +110,7 @@ def iteration_mode_for(campaign: dict, iteration: int) -> Mode:
 
 
 def mode_guidance_for(mode: Mode) -> str:
-    """Return the prompt block that guides the agent for ``mode``.
+    """Return the DESIGN-phase prompt block that guides the agent for ``mode``.
 
     Raises ``ValueError`` on an unknown mode value. Silently defaulting
     to REAL_GUIDANCE was the prior behavior; that's the more dangerous
@@ -119,3 +124,96 @@ def mode_guidance_for(mode: Mode) -> str:
     raise ValueError(
         f"unknown iteration mode {mode!r}; expected one of {VALID_MODES}"
     )
+
+
+# ─── Execute-phase mode guidance (#221) ──────────────────────────────────
+#
+# The DESIGN agent's mode_guidance shaped how it scope-shrunk probes /
+# bundle authoring. EXECUTE_ANALYZE needs its OWN mode-aware guidance
+# so it doesn't fan out the bundle at full scope when iter is rehearsal.
+# Without this, post-#212 paper-burst reruns observed the DESIGN agent
+# honoring rehearsal scope while EXECUTE_ANALYZE dutifully ran the full
+# 50-arm experiment anyway — defeating the cost asymmetry that was the
+# entire economic argument for #212.
+
+EXECUTE_REHEARSAL_GUIDANCE = """\
+This iteration is in **REHEARSAL** mode. The DESIGN agent's bundle
+declares the full experimental design (so iter-2 / future runs can
+run it untouched). YOUR JOB this iter:
+
+1. **Honor the rehearsal scope.** If the bundle's
+   ``experiment_spec.rehearsal_subset`` is populated, execute ONLY
+   that subset (typically: 1 seed × the contrast-pair arms).
+   Do NOT fan out the full ``experiment_spec`` — that's iter-2's job.
+   If ``rehearsal_subset`` is missing, default to: first canonical
+   seed + ``h-main`` and ``h-control-negative`` arms only.
+
+2. **Validate the analysis pipeline.** Schema-pass at least one
+   result through the analysis_summary.json computation. If the
+   analysis script fails or returns null where data is present,
+   fix the script (or surface the issue) before iter-2 runs.
+
+3. **Append per-policy timing observations.** During the
+   feasibility / contrast-pair runs, measure wall-clock per policy.
+   Record into ``experiment_spec.timing_observations``:
+   ``expected_wall_time_seconds_per_policy: { ea-wfq: 25, wfq: 23, ... }``
+   and a derived ``recommended_turn_silence_threshold_seconds``
+   (~3× the slowest observed policy + buffer). iter-2's watchdog
+   reads these to calibrate.
+
+4. **Emit ``brief_amendments.jsonl``** at
+   ``runs/iter-N/inputs/brief_amendments.jsonl`` if you find any
+   campaign-spec friction (workload params, timing claims, missing
+   flags, etc.). One JSON object per line; required fields: ``id``
+   (pattern ``BA-N``), ``brief_section``, ``problem``, ``fix``,
+   ``priority`` (BLOCKING / HIGH / MEDIUM / LOW / INFO). Optional
+   ``evidence``, ``impact``.
+
+5. **Append to ``bundle_amendments.jsonl``** when you override
+   any parameter from ``experiment_spec.verified_parameters``.
+
+6. **Write findings.json with ``mode: rehearsal``** in the outcome,
+   noting that scientific claims are deferred to iter-2. The
+   ``experiment_valid: true`` flag means "the apparatus works" —
+   not "the hypothesis is confirmed/refuted."
+
+**Do NOT:**
+- Fan out the full bundle's seeds × policies grid.
+- Mark h-main as CONFIRMED / REFUTED based on rehearsal data.
+- Skip writing ``brief_amendments.jsonl`` if you discovered
+  campaign-spec friction.
+"""
+
+EXECUTE_REAL_GUIDANCE = """\
+This iteration is in **REAL** mode. Run the full experiment_spec at
+the bundle's prescribed scope: all arms, full seed list.
+
+If a prior ``rehearsal`` iter emitted ``brief_amendments.jsonl``, read
+it BEFORE launching the experiment. Any ``priority: BLOCKING``
+amendments encode constraints iter-2 must respect (e.g., a workload
+parameter the rehearsal verified is required for the experiment to
+engage the mechanism). Apply each BLOCKING amendment to your run
+configuration and proceed; if you cannot apply one, write a
+``failure_note.md`` describing why and STOP — the campaign should
+revise the brief before continuing.
+
+Write ``findings.json`` with ``mode: real`` and a CONFIRMED / REFUTED
+/ NULL status per arm. Append ``bundle_amendments.jsonl`` for any
+parameter overrides observed during execution (silent drift breaks
+reproducibility).
+"""
+
+
+def execute_mode_guidance_for(mode: Mode) -> str:
+    """Return the EXECUTE_ANALYZE-phase prompt block for ``mode`` (#221).
+
+    Distinct from ``mode_guidance_for`` (which targets the DESIGN agent).
+    Raises ``ValueError`` on unknown modes for the same fail-loud reason.
+    """
+    if mode == "rehearsal":
+        return EXECUTE_REHEARSAL_GUIDANCE
+    if mode == "real":
+        return EXECUTE_REAL_GUIDANCE
+    raise ValueError(
+        f"unknown iteration mode {mode!r}; expected one of {VALID_MODES}"
+    )
@@ -133,6 +133,94 @@ def _format_results_summary(work_dir: Path) -> str:
     return "\n".join(lines)
 
 
+def _format_brief_amendments_summary(work_dir: Path) -> str:
+    """#223: surface structured ``brief_amendments.jsonl`` entries to
+    the REPORT extractor.
+
+    Each amendment is a JSON object with required fields
+    ``id, brief_section, problem, fix, priority``. Optional
+    ``evidence``, ``impact``. The schema lives at
+    ``orchestrator/schemas/brief_amendments.schema.json`` and is
+    enforced by the agent that *writes* the file (per methodology) —
+    this renderer JSON-decodes each row and surfaces a count of
+    lines that failed to parse so the operator sees corruption,
+    but does not itself re-validate against the schema.
+
+    Walks ``runs/iter-*/inputs/brief_amendments.jsonl`` and renders a
+    per-iter listing grouped by priority. The REPORT extractor can use
+    this to: (a) cite which amendments shaped the iteration's findings,
+    (b) flag which BLOCKING amendments still need applying to the
+    upstream brief (the cross-run learning loop).
+    """
+    runs_dir = work_dir / "runs"
+    if not runs_dir.is_dir():
+        return "(no iteration directories — no brief amendments to report.)"
+    iter_dirs = sorted(
+        (d for d in runs_dir.iterdir()
+         if d.is_dir() and d.name.startswith("iter-")),
+        key=lambda d: d.name,
+    )
+    sections: list[str] = []
+    total = 0
+    for iter_dir in iter_dirs:
+        log = iter_dir / "inputs" / "brief_amendments.jsonl"
+        if not log.exists():
+            continue
+        try:
+            text = log.read_text()
+        except OSError as exc:
+            sections.append(
+                f"- {iter_dir.name}: brief_amendments.jsonl unreadable "
+                f"({type(exc).__name__})"
+            )
+            continue
+        rows: list[dict] = []
+        skipped_malformed = 0
+        for line in text.splitlines():
+            if not line.strip():
+                continue
+            try:
+                rows.append(json.loads(line))
+            except json.JSONDecodeError:
+                skipped_malformed += 1
+        if not rows and skipped_malformed == 0:
+            continue
+        # Group by priority for at-a-glance triage. BLOCKING first, then
+        # HIGH / MEDIUM / LOW / INFO. Unknown priorities sort last.
+        priority_order = {
+            "BLOCKING": 0, "HIGH": 1, "MEDIUM": 2, "LOW": 3, "INFO": 4,
+        }
+        rows_sorted = sorted(
+            rows,
+            key=lambda r: priority_order.get(
+                str(r.get("priority", "")).upper(), 99
+            ),
+        )
+        header = f"- {iter_dir.name}: {len(rows)} amendment(s)"
+        if skipped_malformed:
+            header += f" + {skipped_malformed} malformed line(s) skipped"
+        sections.append(header)
+        total += len(rows)
+        cap = 20
+        for r in rows_sorted[:cap]:
+            aid = r.get("id", "?")
+            prio = r.get("priority", "?")
+            section = r.get("brief_section", "?")
+            problem = r.get("problem", "")
+            sections.append(
+                f"  - [{prio}] {aid} (target: {section}) — "
+                + (problem[:160] + "..." if len(problem) > 160 else problem)
+            )
+        if len(rows_sorted) > cap:
+            sections.append(f"  - ... and {len(rows_sorted) - cap} more")
+    if not sections:
+        return (
+            "(no brief_amendments.jsonl entries — the campaign brief was "
+            "consistent with the agent's runs; no amendments queued.)"
+        )
+    return "\n".join(sections)
+
+
 def _format_bundle_amendments_summary(work_dir: Path) -> str:
     """#211: surface bundle_amendments.jsonl entries to the REPORT extractor.
 
@@ -594,6 +682,19 @@ def _build_context(
                     "No design handoff available — explore the system directly."
                 )
 
+            # #221: per-iteration mode signal in EXECUTE_ANALYZE too. The
+            # post-#212 paper-burst rerun observed the DESIGN agent
+            # honoring rehearsal scope-shrink while EXECUTE_ANALYZE
+            # dutifully fanned out the full bundle anyway — because the
+            # mode signal only flowed to DESIGN. Rendering it in execute
+            # too closes that gap.
+            from orchestrator.iteration_mode import (
+                iteration_mode_for, execute_mode_guidance_for,
+            )
+            mode = iteration_mode_for(self.campaign, iteration)
+            ctx["iteration_mode"] = mode
+            ctx["mode_guidance"] = execute_mode_guidance_for(mode)
+
         if perspective is not None:
             ctx["perspective_name"] = perspective
 
@@ -656,6 +757,14 @@ def _build_context(
             ctx["bundle_amendments_summary"] = (
                 _format_bundle_amendments_summary(self.work_dir)
             )
+            # #223: structured brief_amendments — propagate to REPORT
+            # so the extractor can cite which amendments shaped the
+            # iteration's findings AND surface BLOCKING amendments
+            # that haven't been applied to the upstream brief yet
+            # (cross-run learning loop).
+            ctx["brief_amendments_summary"] = (
+                _format_brief_amendments_summary(self.work_dir)
+            )
 
         return ctx