sriumcp
diff --git a/‎orchestrator/campaign.py‎
Lines changed: 71 additions & 2 deletions b/‎orchestrator/campaign.py‎
Lines changed: 71 additions & 2 deletions
diff --git a/‎orchestrator/cli.py‎
Lines changed: 38 additions & 2 deletions b/‎orchestrator/cli.py‎
Lines changed: 38 additions & 2 deletions
diff --git a/‎orchestrator/cli_dispatch.py‎
Lines changed: 8 additions & 1 deletion b/‎orchestrator/cli_dispatch.py‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎orchestrator/engine.py‎
Lines changed: 9 additions & 2 deletions b/‎orchestrator/engine.py‎
Lines changed: 9 additions & 2 deletions
@@ -38,6 +38,7 @@
 from orchestrator.engine import Engine
 from orchestrator.gates import HumanGate
 from orchestrator.inline_dispatch import InlineDispatcher
+from orchestrator.util import atomic_write
 from orchestrator.ledger import append_failed_row, append_ledger_row
 from orchestrator.llm_dispatch import LLMDispatcher
 from orchestrator.metrics import summarize_metrics
@@ -175,6 +176,60 @@ def _generate_report(
         print(f"  Report generation skipped: {exc}")
 
 
+def _persist_max_iterations(work_dir: Path, max_iterations: int) -> None:
+    """Write effective max_iterations into state.json (#197).
+
+    Best-effort. Three early-return paths are silent benign no-ops:
+      * state.json doesn't exist (run hasn't called setup_work_dir yet)
+      * state.json content isn't a dict (corrupt; load_state will catch it)
+      * value is unchanged (idempotent skip)
+
+    OS / parse / write *errors* are logged at WARNING level with the
+    fallback chain noted, since the feature is operational sugar
+    (carries the original cap across resume) and not load-bearing for
+    correctness.
+    """
+    state_path = Path(work_dir) / "state.json"
+    if not state_path.exists():
+        return
+    try:
+        state = json.loads(state_path.read_text())
+        if not isinstance(state, dict):
+            return
+        if state.get("max_iterations") == max_iterations:
+            return
+        state["max_iterations"] = int(max_iterations)
+        atomic_write(state_path, json.dumps(state, indent=2) + "\n")
+    except (OSError, json.JSONDecodeError) as exc:
+        logger.warning(
+            "Could not persist max_iterations=%d to state.json (%s): "
+            "resume will fall back to CLI flag / campaign.yaml / default.",
+            max_iterations, exc,
+        )
+
+
+def read_persisted_max_iterations(work_dir: Path) -> int | None:
+    """Read max_iterations from state.json if present (#197).
+
+    Returns None when state.json doesn't exist, can't be parsed, or
+    doesn't carry a max_iterations field. Callers should fall back to
+    their normal resolution chain (CLI flag → campaign.yaml → default).
+    """
+    state_path = Path(work_dir) / "state.json"
+    if not state_path.exists():
+        return None
+    try:
+        state = json.loads(state_path.read_text())
+    except (OSError, json.JSONDecodeError):
+        return None
+    if not isinstance(state, dict):
+        return None
+    val = state.get("max_iterations")
+    if not isinstance(val, int) or val < 1:
+        return None
+    return val
+
+
 def _resume_completed_campaign(work_dir: Path, max_iterations: int) -> int:
     """Decide where to resume a campaign and, if DONE, advance it.
 
@@ -194,8 +249,16 @@ def _resume_completed_campaign(work_dir: Path, max_iterations: int) -> int:
     if engine.phase not in ("INIT", "DONE"):
         start = engine.iteration
         if start < 1:
-            logger.warning(
-                "state.json has iteration=%d (< 1); starting fresh.", start,
+            # #202: pre-#194, the engine kept state.iteration=0 throughout
+            # iter-1 (incrementing only on DONE→DESIGN). The earlier WARNING
+            # "starting fresh" wording read like data loss; in practice
+            # existing iter-1 artifacts are preserved and the resume
+            # continues at state.phase. Phrase it informationally.
+            logger.info(
+                "state.json has iteration=%d (no completed iterations yet); "
+                "treating as iter-1. Existing artifacts under runs/iter-1/ "
+                "are preserved; resuming at phase=%s.",
+                start, engine.phase,
             )
             return 1
         if start > max_iterations:
@@ -309,6 +372,12 @@ def run_campaign(
         )
         preflight_dispatcher.preflight_check()
 
+    # #197: persist effective max_iterations into state.json so a later
+    # `nous resume` (without --max-iterations) honors the original cap
+    # instead of silently defaulting to 10. The state file is the single
+    # source of truth across run/resume invocations.
+    _persist_max_iterations(work_dir, max_iterations)
+
     start_iter = _resume_completed_campaign(work_dir, max_iterations)
 
     max_redesigns = 3
 
@@ -114,6 +114,11 @@ def _cmd_run(args):
             file=sys.stderr,
         )
         sys.exit(1)
+    # #193: --sandbox CLI flag overrides campaign.sandbox if both present;
+    # leaving it unset preserves whatever the campaign.yaml declares (or
+    # the SDKDispatcher default of "bypass").
+    if getattr(args, "sandbox", None) is not None:
+        campaign["sandbox"] = args.sandbox
     run_campaign(
         campaign,
         work_dir,
@@ -132,7 +137,7 @@ def _cmd_run(args):
 def _cmd_resume(args):
     import logging
 
-    from orchestrator.campaign import run_campaign
+    from orchestrator.campaign import run_campaign, read_persisted_max_iterations
 
     logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
 
@@ -150,7 +155,31 @@ def _cmd_resume(args):
         print("resume requires campaign.yaml", file=sys.stderr)
         sys.exit(1)
 
-    max_iterations = args.max_iterations if args.max_iterations is not None else campaign.get("max_iterations", 10)
+    # #197: max_iterations resolution chain on resume:
+    #   1. CLI --max-iterations (explicit override wins).
+    #   2. state.json (preserves the cap from the original `nous run`).
+    #   3. campaign.yaml.max_iterations, or the hardcoded default 10 if
+    #      campaign.yaml doesn't pin it. (Both flow through the same
+    #      `campaign.get("max_iterations", 10)` call — legacy state files
+    #      pre-dating #197 land here.)
+    if args.max_iterations is not None:
+        max_iterations = args.max_iterations
+        print(f"Resuming with max_iterations={max_iterations} (CLI override).")
+    else:
+        persisted = read_persisted_max_iterations(work_dir)
+        if persisted is not None:
+            max_iterations = persisted
+            print(
+                f"Resuming with max_iterations={max_iterations} "
+                f"(persisted from original `nous run`)."
+            )
+        else:
+            max_iterations = campaign.get("max_iterations", 10)
+            print(
+                f"Resuming with max_iterations={max_iterations} "
+                f"(from campaign.yaml / default — state.json had no "
+                f"persisted value)."
+            )
     run_campaign(
         campaign,
         work_dir,
@@ -609,6 +638,13 @@ def main():
              "an enclosing agent framework. The legacy 'api' backend "
              "was removed in #183.",
     )
+    p_run.add_argument(
+        "--sandbox", choices=["bypass", "default"], default=None,
+        help="SDK filesystem sandbox mode (#193). Default 'bypass' (set "
+             "via campaign.sandbox). Pass 'default' to use the SDK's "
+             "default permission gating — only sensible when the "
+             "campaign's writes all land under the launched cwd.",
+    )
     p_run.add_argument(
         "--bundle", type=Path, default=None,
         help="Path to a pre-authored bundle.yaml. Skips DESIGN's agent "
 
@@ -186,7 +186,14 @@ def dispatch(
             else:
                 atomic_write(output_path, json.dumps(data, indent=2) + "\n")
 
-        logger.info("CLIDispatcher: role=%s phase=%s -> %s", role, phase, output_path)
+        logger.info(
+            # #196: report the runtime class name, not the parent's. SDKDispatcher
+            # inherits from CLIDispatcher and runs through this path, but the
+            # log line should say "SDKDispatcher" so operators don't think the
+            # legacy claude -p subprocess (removed in #183) is in use.
+            "%s: role=%s phase=%s -> %s",
+            type(self).__name__, role, phase, output_path,
+        )
 
     def _retry_cli_parse(self, original_prompt: str, error: Exception, fmt: str) -> dict:
         feedback = (
 
@@ -121,9 +121,16 @@ def transition(self, to_state: str) -> None:
                 f"Invalid transition: {current} -> {to_state}. "
                 f"Valid: {TRANSITIONS[current]}"
             )
-        # Build candidate state before writing to disk
+        # Build candidate state before writing to disk.
+        # #194: increment iteration whenever we leave INIT (iter-1 begins,
+        # whether via PRE_WORK or directly to DESIGN) and whenever DONE
+        # transitions to DESIGN (iter-N+1 begins). Pre-#194, the counter
+        # only ticked on DONE→DESIGN, so state.iteration stayed at 0
+        # throughout iter-1 even though artifacts lived at runs/iter-1/.
         new_state = dict(self._state)
-        if current == "DONE" and to_state == "DESIGN":
+        if current == "INIT":
+            new_state["iteration"] += 1
+        elif current == "DONE" and to_state == "DESIGN":
             new_state["iteration"] += 1
         new_state["phase"] = to_state
         new_state["timestamp"] = datetime.now(timezone.utc).isoformat()