AI-native-Systems-Research
diff --git a/‎orchestrator/iteration.py‎
Lines changed: 114 additions & 1 deletion b/‎orchestrator/iteration.py‎
Lines changed: 114 additions & 1 deletion
diff --git a/‎orchestrator/prompt_loader.py‎
Lines changed: 13 additions & 1 deletion b/‎orchestrator/prompt_loader.py‎
Lines changed: 13 additions & 1 deletion
diff --git a/‎orchestrator/schemas/campaign.schema.yaml‎
Lines changed: 17 additions & 0 deletions b/‎orchestrator/schemas/campaign.schema.yaml‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎orchestrator/schemas/findings.schema.json‎
Lines changed: 6 additions & 0 deletions b/‎orchestrator/schemas/findings.schema.json‎
Lines changed: 6 additions & 0 deletions
@@ -48,6 +48,97 @@ class IterationOutcome(str, Enum):
 
 TEMPLATES_DIR = Path(__file__).resolve().parent / "templates"
 SCHEMAS_DIR = Path(__file__).resolve().parent / "schemas"
+
+
+def _declared_code_change_paths(bundle_path: Path) -> set[str]:
+    """Read ``bundle.yaml`` and return every ``arms[].code_changes[].file``
+    declared on any arm (#230).
+
+    Returns the set of declared file paths verbatim — paths are NOT
+    normalized. If the bundle declares an absolute path or a ``./``-
+    prefixed path, it won't match the relative paths that
+    ``detect_undeclared_writes`` reports; that mismatch is the bundle
+    author's responsibility (the bundle schema already constrains
+    ``file`` shape).
+
+    Returns an empty set when the bundle is missing or declares no
+    ``code_changes``. A YAML parse failure is logged at ERROR (the
+    bundle is a system boundary; corruption is operator-actionable)
+    and an empty set returned so cleanup proceeds.
+    """
+    if not bundle_path.exists():
+        return set()
+    try:
+        bundle = yaml.safe_load(bundle_path.read_text()) or {}
+    except yaml.YAMLError as exc:
+        logger.error(
+            "_declared_code_change_paths: bundle.yaml parse failed at %s "
+            "(%s); treating as if no code_changes were declared. Every "
+            "executor write will be flagged as undeclared until this is fixed.",
+            bundle_path, exc,
+        )
+        return set()
+    arms = bundle.get("arms") or []
+    declared: set[str] = set()
+    for arm in arms:
+        if not isinstance(arm, dict):
+            continue
+        for change in arm.get("code_changes") or []:
+            if isinstance(change, dict) and isinstance(change.get("file"), str):
+                declared.add(change["file"])
+    return declared
+
+
+def _record_undeclared_writes_in_findings(
+    findings_path: Path,
+    undeclared: list[str],
+) -> None:
+    """Merge ``worktree_uncommitted_writes`` into ``findings.json`` (#230).
+
+    No-op if findings.json is missing — the cleanup may be running in
+    the execute-incomplete branch where findings was never produced
+    (the caller surfaces the data via retry_log there instead).
+
+    A JSONDecodeError on the existing findings is logged at ERROR
+    (corrupted findings is operator-actionable) and the function
+    returns without writing — modifying a corrupt JSON file would
+    only make recovery harder.
+    """
+    if not undeclared or not findings_path.exists():
+        return
+    try:
+        findings = json.loads(findings_path.read_text())
+    except json.JSONDecodeError as exc:
+        logger.error(
+            "_record_undeclared_writes_in_findings: findings.json at %s "
+            "is not valid JSON (%s); the undeclared-writes list will not "
+            "be persisted. Undeclared paths: %s",
+            findings_path, exc, undeclared,
+        )
+        return
+    findings["worktree_uncommitted_writes"] = sorted(set(undeclared))
+    atomic_write(findings_path, json.dumps(findings, indent=2) + "\n")
+
+
+def _detect_undeclared_writes_for_iter(
+    iter_dir: Path,
+    experiment_dir: Path,
+) -> list[str]:
+    """Detect undeclared writes in ``experiment_dir`` and log a WARNING
+    if any are found (#230). Returns the list so the caller can decide
+    where to persist it (findings.json on success, retry_log on
+    incomplete). Pure tripwire — never raises, never blocks cleanup."""
+    from orchestrator.worktree import detect_undeclared_writes
+    declared = _declared_code_change_paths(iter_dir / "bundle.yaml")
+    undeclared = detect_undeclared_writes(experiment_dir, declared)
+    if undeclared:
+        logger.warning(
+            "Executor wrote %d files in the experiment worktree "
+            "without declaring them in bundle.arms[].code_changes; "
+            "they will be lost on cleanup: %s",
+            len(undeclared), undeclared[:20],
+        )
+    return undeclared
 DEFAULTS_PATH = Path(__file__).resolve().parent / "defaults.yaml"
 _ARM_TYPE_RE = re.compile(r"^[a-zA-Z0-9_-]+$")
 
@@ -960,8 +1051,9 @@ def _max_turns_for(phase_key: str) -> int:
                 create_experiment_worktree,
                 remove_experiment_worktree,
             )
+            extras = campaign.get("target_system", {}).get("worktree_extras") or []
             experiment_dir, experiment_id = create_experiment_worktree(
-                Path(repo_path), iteration,
+                Path(repo_path), iteration, extras=extras,
             )
             (iter_dir / ".experiment_id").write_text(experiment_id)
             print(f"  Experiment worktree: {experiment_dir}")
@@ -1007,13 +1099,22 @@ def _max_turns_for(phase_key: str) -> int:
         # "X not found" from validate_execution.
         missing = _missing_execute_artifacts(iter_dir)
         if missing:
+            # #230: even on incomplete, the executor may have written
+            # partial code into the worktree — exactly the case where
+            # undeclared writes matter most. Capture before cleanup.
+            incomplete_undeclared: list[str] = []
+            if repo_path and experiment_id and experiment_dir is not None:
+                incomplete_undeclared = _detect_undeclared_writes_for_iter(
+                    iter_dir, experiment_dir,
+                )
             from orchestrator.metrics import log_retry_event
             log_retry_event(work_dir / "llm_metrics.jsonl", {
                 "iteration": iteration,
                 "phase": "execute-analyze",
                 "failure_type": "execute_incomplete",
                 "missing_artifacts": missing,
                 "max_turns": _max_turns_for("execute_analyze"),
+                "undeclared_writes": incomplete_undeclared,
             })
             # Clean up the experiment worktree so a re-run isn't blocked.
             if repo_path and experiment_id:
@@ -1031,6 +1132,18 @@ def _max_turns_for(phase_key: str) -> int:
                 "Executor artifacts failed post-check validation: %s",
                 result["errors"],
             )
+        # #230: surface undeclared writes that would be lost when the
+        # worktree is removed below. Persist into findings.json so the
+        # design agent on iter-N+1 can see what to declare in
+        # ``code_changes``. Tripwire only — never blocks cleanup.
+        if repo_path and experiment_id and experiment_dir is not None:
+            undeclared = _detect_undeclared_writes_for_iter(
+                iter_dir, experiment_dir,
+            )
+            if undeclared:
+                _record_undeclared_writes_in_findings(
+                    iter_dir / "findings.json", undeclared,
+                )
         # Clean up worktree only on success
         if repo_path and experiment_id:
             remove_experiment_worktree(Path(repo_path), experiment_id)
 
@@ -60,9 +60,21 @@ def load(self, template_name: str, context: dict[str, str]) -> str:
 
         remaining = _PLACEHOLDER_RE.findall(text)
         if remaining:
+            missing = sorted(set(remaining))
+            # #232: forensic logging on the resume-time placeholder bug.
+            # The error message names the missing placeholders; we add
+            # what keys WERE present in the context so the next
+            # occurrence produces evidence pointing at the actual cause
+            # (phase string mismatch, stale loader, resume-path field
+            # uninitialized, ...).
+            logger.error(
+                "prompt render failed: template=%s resolved_path=%s "
+                "missing_placeholders=%s context_keys=%s",
+                template_name, path, missing, sorted(context.keys()),
+            )
             raise ValueError(
                 f"Unreplaced placeholders in {template_name}.md: "
-                f"{', '.join(sorted(set(remaining)))}"
+                f"{', '.join(missing)}"
             )
 
         logger.debug("Loaded prompt %s (%d chars)", template_name, len(text))
 
@@ -84,6 +84,23 @@ properties:
         type: ["string", "null"]
         minLength: 1
         description: "Path to target system git repo. Used by CLIDispatcher for code-access agents. If set, experiments run in isolated worktrees."
+      worktree_extras:
+        type: array
+        items:
+          type: string
+          minLength: 1
+        uniqueItems: true
+        description: >
+          Paths (relative to repo_path) to symlink into each experiment
+          worktree on creation (#229). Use this for gitignored assets
+          the executor needs from main — virtualenvs, pre-fetched data
+          dirs, prior-iteration outputs, build artifacts. Without this
+          mechanism, executors discover that the worktree is missing
+          their tooling and `cd` back to the parent repo, silently
+          breaking isolation. Each entry must be a relative path that
+          resolves under repo_path; absolute paths and ``..`` traversal
+          are rejected at worktree creation. Source must exist in main
+          at the time of worktree creation.
 
   models:
     type: object
 
@@ -24,6 +24,12 @@
       "minimum": 0,
       "maximum": 100,
       "description": "Percentage of total effect from single dominant component, if detected."
+    },
+    "worktree_uncommitted_writes": {
+      "type": "array",
+      "items": { "type": "string", "minLength": 1 },
+      "uniqueItems": true,
+      "description": "#230 — paths the executor wrote inside the experiment worktree without declaring them in the bundle's `code_changes`. Surfaced just before worktree cleanup; logged at WARNING. Empty array (or absent) means the executor declared everything it wrote, or wrote nothing untracked. The orchestrator does not block cleanup on undeclared writes — this is a tripwire, not a gate."
     }
   },
   "$defs": {