sriumcp
diff --git a/‎orchestrator/cli.py‎
Lines changed: 9 additions & 6 deletions b/‎orchestrator/cli.py‎
Lines changed: 9 additions & 6 deletions
diff --git a/‎orchestrator/complexity_tier.py‎
Lines changed: 27 additions & 2 deletions b/‎orchestrator/complexity_tier.py‎
Lines changed: 27 additions & 2 deletions
diff --git a/‎orchestrator/iteration_mode.py‎
Lines changed: 121 additions & 0 deletions b/‎orchestrator/iteration_mode.py‎
Lines changed: 121 additions & 0 deletions
@@ -196,10 +196,11 @@ def _cmd_stop(args):
     """Ask a running campaign to wind down cleanly between phases.
 
     Writes a ``STOP`` sentinel at the campaign work_dir root. The
-    next time the orchestrator passes a checkpoint (between
-    iterations today; between phases is on the roadmap), it raises
-    ``CampaignStopped``, persists a ``stopped_by_user`` ledger row,
-    and exits without orphaning worktrees or pending dispatcher calls.
+    next time the orchestrator passes a checkpoint — at the start of
+    each iteration AND at every phase transition within an iteration
+    (#198) — it raises ``CampaignStopped``, persists a
+    ``stopped_by_user`` ledger row, and exits without orphaning
+    worktrees or pending dispatcher calls.
 
     For mid-iteration interruption, ``Ctrl+C`` still works — the
     engine's atomic checkpoint means the next ``nous resume`` picks
@@ -229,8 +230,10 @@ def _cmd_stop(args):
     if reason:
         print(f"Reason: {reason}")
     print(
-        "The campaign will halt at the next iteration boundary. To "
-        "cancel the stop request, delete the sentinel file."
+        "The campaign will halt at the next phase boundary (a phase "
+        "transition within the current iteration, or the start of "
+        "the next iteration — whichever comes first). To cancel the "
+        "stop request, delete the sentinel file."
     )
 
 
 
@@ -37,7 +37,12 @@
 
 
 def _read_bundle_tier(path: Path) -> int | None:
-    """Read complexity_tier from a bundle.yaml. None if missing or malformed."""
+    """Read complexity_tier from a bundle.yaml. None if missing or malformed.
+
+    Looks under ``metadata`` first, then falls back to the legacy top-level
+    location (#206). When both are populated, the metadata value wins so
+    ``metadata`` is the canonical place to put it going forward.
+    """
     if not path.exists():
         return None
     try:
@@ -46,12 +51,32 @@ def _read_bundle_tier(path: Path) -> int | None:
         return None
     if not isinstance(data, dict):
         return None
+    metadata = data.get("metadata")
+    if isinstance(metadata, dict):
+        tier = metadata.get("complexity_tier")
+        if isinstance(tier, int) and 1 <= tier <= 4:
+            return tier
     tier = data.get("complexity_tier")
     if isinstance(tier, int) and 1 <= tier <= 4:
         return tier
     return None
 
 
+def _read_bundle_justification(bundle: object) -> str | None:
+    """Pull tier_justification from metadata, falling back to root (#206)."""
+    if not isinstance(bundle, dict):
+        return None
+    metadata = bundle.get("metadata")
+    if isinstance(metadata, dict):
+        j = metadata.get("tier_justification")
+        if isinstance(j, str) and j.strip():
+            return j
+    j = bundle.get("tier_justification")
+    if isinstance(j, str) and j.strip():
+        return j
+    return None
+
+
 def prior_iteration_tiers(work_dir: Path, *, up_to: int) -> dict[int, int]:
     """Return {iteration: tier} for completed prior iterations.
 
@@ -139,7 +164,7 @@ def format_tier_summary(
     # Show justification if present.
     try:
         bundle = yaml.safe_load(Path(bundle_path).read_text())
-        justification = bundle.get("tier_justification") if isinstance(bundle, dict) else None
+        justification = _read_bundle_justification(bundle)
     except (OSError, yaml.YAMLError):
         justification = None
     if justification:
 
@@ -0,0 +1,121 @@
+"""Per-iteration mode resolution.
+
+A campaign's ``iterations: [...]`` list, when present, lets operators tag
+each iteration as ``rehearsal`` or ``real``. The DESIGN methodology reads
+the resolved mode from its prompt context and scope-shrinks accordingly:
+rehearsal iterations focus on the *apparatus check* (does the workload
+spec parse, do BLIS args bind, does analysis validate?) and the
+*feasibility check* (does the parameter regime engage the mechanism?),
+emitting ``brief_amendments.md`` for any campaign-spec friction. Real
+iterations run the full bundle at full scope.
+
+This module is **pure Python — no LLM, no I/O**. The orchestrator and
+LLMDispatcher import the resolver to populate prompt context;
+test_iteration_mode covers the cases.
+"""
+from __future__ import annotations
+
+from typing import Literal
+
+
+# Type alias used by callers that want the type-checker to enforce the
+# enum at the API surface (instead of duck-typed strings flowing through).
+Mode = Literal["rehearsal", "real"]
+
+# Default when the campaign omits ``iterations``, or when an iteration
+# index is out of range. ``real`` is the conservative default — a
+# rehearsal-mode iteration scope-shrinks; defaulting to it could mean
+# "skip the full experiment by accident."
+DEFAULT_MODE: Mode = "real"
+
+VALID_MODES: tuple[Mode, ...] = ("rehearsal", "real")
+
+
+def iteration_mode_for(campaign: dict, iteration: int) -> Mode:
+    """Return the mode for iteration N, defaulting to ``real``.
+
+    Out-of-range index, missing block, or malformed entry: ``real``.
+    """
+    if iteration < 1:
+        return DEFAULT_MODE
+    iters = campaign.get("iterations")
+    if not isinstance(iters, list) or not iters:
+        return DEFAULT_MODE
+    idx = iteration - 1
+    if idx >= len(iters):
+        return DEFAULT_MODE
+    entry = iters[idx]
+    if not isinstance(entry, dict):
+        return DEFAULT_MODE
+    mode = entry.get("mode")
+    if mode in VALID_MODES:
+        return mode  # type: ignore[return-value] — narrowed by membership
+    return DEFAULT_MODE
+
+
+REHEARSAL_GUIDANCE = """\
+This iteration is a **REHEARSAL** (#212). Optimize for fast feedback over
+scientific completeness. Two distinct goals — score them separately:
+
+1. **Apparatus check.** Does the experimental machinery work end-to-end?
+   - Does the workload spec parse?
+   - Do BLIS / target-system args bind correctly?
+   - Does the analysis script schema-validate at least one result?
+   - Are the canonical seeds usable, or do they trip a known bug?
+
+2. **Feasibility check.** Is the parameter regime worth running?
+   - Does the workload actually engage the mechanism under test? (e.g.
+     does the burst create KV pressure, vs all adversary requests being
+     dropped_unservable?)
+   - Does the policy contrast actually differentiate on this workload,
+     or do both arms produce identical metrics?
+
+**Scope discipline for rehearsals:**
+- Use ONE seed (the first canonical seed for this campaign).
+- Use the contrast-pair arms only (h-main vs the most direct control).
+   Do NOT fan out across all arms in a multi-arm bundle.
+- Keep wall-time small. If a rehearsal is going to take more than ~5–10
+   minutes, you're doing too much.
+
+**What to emit alongside findings:**
+If you find any campaign-spec or brief inconsistencies (paths the
+validator rejects, broken argv quoting, wall-time claims that don't
+match reality, single-tenant probes when the target requires multi-
+tenant, etc.), write them to ``runs/iter-N/brief_amendments.md`` —
+one entry per finding, with file path + suggested change. The next
+``real`` iteration will read this; future runs of the same campaign
+will benefit indefinitely.
+
+**Do NOT:**
+- Author full multi-arm bundles. Keep arms minimal.
+- Run all canonical seeds. One seed is enough to verify apparatus
+   + feasibility.
+- Conclude on the research question. Rehearsals don't confirm or
+   refute hypotheses; they validate the apparatus.
+"""
+
+REAL_GUIDANCE = """\
+This iteration is a **REAL** run (#212). Run the bundle at full scope:
+all arms, full seed list, full workload. Do not scope-shrink.
+
+If a prior ``rehearsal`` iteration emitted ``brief_amendments.md``,
+read it before authoring the bundle — apply the amendments and don't
+re-discover the same friction.
+"""
+
+
+def mode_guidance_for(mode: Mode) -> str:
+    """Return the prompt block that guides the agent for ``mode``.
+
+    Raises ``ValueError`` on an unknown mode value. Silently defaulting
+    to REAL_GUIDANCE was the prior behavior; that's the more dangerous
+    default (rehearsal is the conservative one), so we fail loudly
+    instead of running a full experiment when a typo says otherwise.
+    """
+    if mode == "rehearsal":
+        return REHEARSAL_GUIDANCE
+    if mode == "real":
+        return REAL_GUIDANCE
+    raise ValueError(
+        f"unknown iteration mode {mode!r}; expected one of {VALID_MODES}"
+    )