diff --git a/README.md b/README.md index 706c21e..07f0ecc 100644 --- a/README.md +++ b/README.md @@ -123,6 +123,8 @@ When `repo_path` is set, the campaign directory is created inside the target rep The planner explores the codebase to discover metrics, knobs, and execution methods. You can optionally provide `observable_metrics` and `controllable_knobs` as hints — see [examples/campaign.yaml](examples/campaign.yaml) for all options. +If your target is a *running* system rather than a codebase (a cluster, a deployed service, a scratch directory that isn't a git repo), set `target_system.live_target: true`. The executor then runs directly in `repo_path` with no per-iteration `git worktree`, and the planner is told up front that arms must be probes — see [docs/quickstart.md#live-target-campaigns-live_target-true](docs/quickstart.md#live-target-campaigns-live_target-true) for details. + ### 5. Run a campaign ```bash diff --git a/docs/quickstart.md b/docs/quickstart.md index 69e0fd2..aa9e91e 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -125,6 +125,43 @@ After a campaign, your working directory contains: - **`runs/iter-N/inputs/`** — Agent-created input files (configs, workloads) - **`runs/iter-N/results/`** — Experiment output files +## Live-target campaigns (`live_target: true`) + +By default Nous treats `repo_path` as a git repo and creates a fresh `git worktree` per iteration so that any source-code patches are isolated. For some campaigns there is no codebase to evolve — the thing you want to study is a *running* system: a Kubernetes cluster, a deployed service, a dataset on disk, a non-git scratch directory. Setting `live_target: true` tells Nous to skip worktree creation and run the executor directly inside `repo_path`. + +Use it when: + +- The target is a live system you are probing, not a codebase you are mutating (e.g. a GPU cluster, a production-like service, a workload generator). +- `repo_path` points at a directory that is not a git repo, or is a git repo whose working tree must not be branched. +- The bundle should only contain probe-style arms (config tweaks, command-line invocations, observation runs) — never `code_changes`. + +Example: + +```yaml +research_question: > + Why does p99 latency spike when the cluster autoscaler kicks in? + +target_system: + name: "Staging GPU cluster" + description: > + Live Kubernetes cluster running our inference workload. + The agent probes the cluster via kubectl and Prometheus; it does + not modify source code. + repo_path: /scratch/cluster-probe # any working directory; need not be a git repo + live_target: true + +prompts: + methodology_layer: "prompts/methodology" + domain_adapter_layer: null +``` + +How `live_target` differs from regular observe-mode arms: + +- **Observe mode** is a *bundle-level* property — an individual arm has no `code_changes`, so the executor skips patching and just runs commands. The campaign can still mix observe arms and evolve arms in the same bundle, and a worktree is still created. +- **`live_target: true`** is a *campaign-level* property — it controls the *executor environment* (no worktree, run in `repo_path` directly) and tells the planner up front that the target is a shared running system, so every arm must be a probe. Bundles with `code_changes` arms are incoherent in this mode. + +Pick `live_target: true` when there is nothing meaningful to branch from; pick observe-mode arms when you have a real codebase but a particular iteration only needs to measure, not patch. + ## Choosing a model Defaults (from `defaults.yaml`): diff --git a/orchestrator/iteration.py b/orchestrator/iteration.py index 29e9712..203cccf 100644 --- a/orchestrator/iteration.py +++ b/orchestrator/iteration.py @@ -255,6 +255,13 @@ def run_iteration( Returns: An IterationOutcome value: COMPLETED, CONTINUE, ABORTED, or REDESIGN. """ + # Validate the campaign once, up front. The staticmethod on LLMDispatcher + # is also called from its constructor, but inline-agent mode never builds + # an LLMDispatcher — without this call, a non-bool `live_target` value + # would slip past validation and silently coerce via bool() below. + from orchestrator.llm_dispatch import validate_campaign + validate_campaign(campaign) + engine = Engine(work_dir) repo_path = campaign.get("target_system", {}).get("repo_path") @@ -370,7 +377,10 @@ def _max_turns_for(phase_key: str) -> int: cli_dispatcher.model = _model_for("execute_analyze") cli_dispatcher.max_turns = _max_turns_for("execute_analyze") exec_dispatcher = cli_dispatcher or llm_dispatcher - if repo_path: + live_target = bool( + campaign.get("target_system", {}).get("live_target", False) + ) + if repo_path and not live_target: from orchestrator.worktree import ( create_experiment_worktree, remove_experiment_worktree, @@ -380,6 +390,12 @@ def _max_turns_for(phase_key: str) -> int: ) (iter_dir / ".experiment_id").write_text(experiment_id) print(f" Experiment worktree: {experiment_dir}") + elif repo_path: + # Live-target mode: executor runs directly in repo_path. The + # target system is running (cluster, service, dataset) and there + # is nothing to isolate — bundles must contain no code_changes arms. + experiment_dir = Path(repo_path) + print(f" Live target: executor runs in {experiment_dir}") if cli_dispatcher: import contextlib ctx = cli_dispatcher.override_cwd(experiment_dir) if experiment_dir else contextlib.nullcontext() diff --git a/orchestrator/llm_dispatch.py b/orchestrator/llm_dispatch.py index d4f4ece..506a2eb 100644 --- a/orchestrator/llm_dispatch.py +++ b/orchestrator/llm_dispatch.py @@ -35,6 +35,85 @@ # Schema cache: schema_name -> parsed schema dict _schema_cache: dict[str, dict] = {} +# Prompt fragments that swap based on target_system.live_target. Worktree +# mode is the default — code-evolution campaigns get an isolated git worktree +# per iteration. Live-target mode is for running systems (clusters, services, +# datasets) that the executor probes without per-iteration code mutation. +# (The flag is `live_target` rather than `observational` to avoid colliding +# with the existing "observe mode" in execute_analyze.md, which means +# "the bundle has no code_changes arms.") +_WORKTREE_EXECUTION_ENV = ( + "You are running inside an isolated git worktree of the target system. " + "You own this worktree — reset it yourself with `git checkout -- .` " + "between conditions." +) +_LIVE_TARGET_EXECUTION_ENV = ( + "You are running directly against a live target system, in its working " + "directory. There is no per-iteration git isolation, and your bundle " + "must contain no `code_changes` arms. Do not mutate the target system's " + "persistent state — your job is to probe, measure, and report. Treat " + "any files you create as scratch artifacts that belong under " + "`{{iter_dir}}/inputs/` or `{{iter_dir}}/results/`, not in the target " + "directory." +) +_WORKTREE_DESIGN_CONSTRAINT = ( + "**Worktree isolation assumed.** The executor runs in a clean git " + "worktree. Each condition starts from clean state (`git checkout -- .` " + "runs between conditions). Design your experimental conditions assuming " + "this — don't include manual cleanup steps." +) +_LIVE_TARGET_DESIGN_CONSTRAINT = ( + "**Live target system.** The executor runs directly against a running " + "system — no git worktree, no code-change arms. All arms must be pure " + "observations of system state (probes, metrics, log scrapes). Do not " + "include `code_changes` in any arm; do not assume mutation is possible " + "without explicit consent gates." +) + +# Per-condition reset step in execute_analyze.md Phase 2. Worktree mode resets +# tracked files between conditions; live-target mode has no checkout to +# revert and instead reminds the agent not to mutate the live target. +_WORKTREE_CONDITION_RESET = "Reset worktree: `git checkout -- .`" +_LIVE_TARGET_CONDITION_RESET = ( + "Do not mutate the target system between conditions. Any files you " + "wrote to the target directory during the previous condition must be " + "removed before the next one runs (this is your responsibility — " + "there is no automatic checkout)." +) + + +def validate_campaign(campaign: dict) -> None: + """Validate campaign config. Module-level so it can be called before any + dispatcher is constructed (e.g., from `run_iteration` in inline-agent mode, + where no LLMDispatcher is built and the staticmethod path is never taken). + """ + ts = campaign.get("target_system") + if not isinstance(ts, dict): + raise ValueError( + "Campaign config missing 'target_system' section. " + "See examples/campaign.yaml for the expected format." + ) + required = ["name", "description"] + missing = [k for k in required if k not in ts] + if missing: + raise ValueError( + f"Campaign 'target_system' missing required keys: {missing}. " + f"See examples/campaign.yaml for the expected format." + ) + for field in ("observable_metrics", "controllable_knobs"): + val = ts.get(field) + if val is not None: + if not isinstance(val, list) or not all(isinstance(x, str) for x in val): + raise ValueError( + f"Campaign 'target_system.{field}' must be a list of strings. " + f"Got: {val!r}" + ) + if "live_target" in ts and not isinstance(ts["live_target"], bool): + raise ValueError( + f"Campaign 'target_system.live_target' must be a bool. " + f"Got: {ts['live_target']!r}" + ) + class LLMDispatcher: """Dispatch agent roles to an LLM and produce schema-conformant artifacts.""" @@ -50,7 +129,7 @@ def __init__( completion_fn: Callable | None = None, ) -> None: self.work_dir = Path(work_dir) - self._validate_campaign(campaign) + validate_campaign(campaign) self.campaign = campaign self.model = model self.loader = PromptLoader( @@ -84,29 +163,7 @@ def __init__( dal, ) - @staticmethod - def _validate_campaign(campaign: dict) -> None: - ts = campaign.get("target_system") - if not isinstance(ts, dict): - raise ValueError( - "Campaign config missing 'target_system' section. " - "See examples/campaign.yaml for the expected format." - ) - required = ["name", "description"] - missing = [k for k in required if k not in ts] - if missing: - raise ValueError( - f"Campaign 'target_system' missing required keys: {missing}. " - f"See examples/campaign.yaml for the expected format." - ) - for field in ("observable_metrics", "controllable_knobs"): - val = ts.get(field) - if val is not None: - if not isinstance(val, list) or not all(isinstance(x, str) for x in val): - raise ValueError( - f"Campaign 'target_system.{field}' must be a list of strings. " - f"Got: {val!r}" - ) + _validate_campaign = staticmethod(validate_campaign) # ------------------------------------------------------------------ # Public interface (satisfies Dispatcher protocol) @@ -212,6 +269,7 @@ def _build_context( perspective: str | None, ) -> dict[str, str]: ts = self.campaign["target_system"] + live_target = bool(ts.get("live_target", False)) ctx: dict[str, str] = { "target_system": ts["name"], "system_description": ts["description"], @@ -219,6 +277,9 @@ def _build_context( "controllable_knobs": ", ".join(ts["controllable_knobs"]) if ts.get("controllable_knobs") else "Not specified — planner should discover from code", "active_principles": self._format_principles(), "iteration": str(iteration), + "execution_environment": _LIVE_TARGET_EXECUTION_ENV if live_target else _WORKTREE_EXECUTION_ENV, + "worktree_constraint": _LIVE_TARGET_DESIGN_CONSTRAINT if live_target else _WORKTREE_DESIGN_CONSTRAINT, + "condition_reset": _LIVE_TARGET_CONDITION_RESET if live_target else _WORKTREE_CONDITION_RESET, } if phase == "design": diff --git a/orchestrator/schemas/campaign.schema.yaml b/orchestrator/schemas/campaign.schema.yaml index 4ca0be1..af3d9df 100644 --- a/orchestrator/schemas/campaign.schema.yaml +++ b/orchestrator/schemas/campaign.schema.yaml @@ -53,6 +53,9 @@ properties: type: ["string", "null"] minLength: 1 description: "Path to target system git repo. Used by CLIDispatcher for code-access agents. If set, experiments run in isolated worktrees." + live_target: + type: boolean + description: "If true, the executor runs directly in repo_path with no per-iteration git worktree. Use for campaigns that probe a running system (cluster, service, dataset) where there is no code to evolve. Bundles must contain no code_changes arms." models: type: object diff --git a/prompts/methodology/design.md b/prompts/methodology/design.md index 8a99173..4fb7ae0 100644 --- a/prompts/methodology/design.md +++ b/prompts/methodology/design.md @@ -158,7 +158,7 @@ Now design a hypothesis bundle based on what you actually observed and verified: - Predictions must be directional, falsifiable, and reference specific observable metrics. Do not invent arbitrary numeric thresholds unless campaign.yaml specifies them. - Base all experiment parameters on verified system behavior — if you didn't probe it, don't assume it. - **No `sed`/`awk` for code changes.** When describing code modifications in problem framing or bundle arms, describe the *intent* (what to change and why). The executor agent will implement changes properly via file edits, verify they compile, and create reusable `git diff` patches. Never suggest inline shell regex as an implementation strategy. -- **Worktree isolation assumed.** The executor runs in a clean git worktree. Each condition starts from clean state (`git checkout -- .` runs between conditions). Design your experimental conditions assuming this — don't include manual cleanup steps. +- {{worktree_constraint}} ## Output — Write Files Directly diff --git a/prompts/methodology/execute_analyze.md b/prompts/methodology/execute_analyze.md index 008310a..1ddd34a 100644 --- a/prompts/methodology/execute_analyze.md +++ b/prompts/methodology/execute_analyze.md @@ -1,6 +1,6 @@ You are a scientific executor for the Nous hypothesis-driven experimentation framework. -You have **shell access**. You are running inside an isolated git worktree of the target system. You own this worktree — reset it yourself with `git checkout -- .` between conditions. +You have **shell access**. {{execution_environment}} Your job has FIVE phases — all in one session with full context: 1. **Prepare** — build, create patches, validate ALL commands @@ -105,7 +105,7 @@ arms: ``` **Important:** -- All output paths MUST use absolute paths under `{{iter_dir}}/results/`. Do NOT use relative paths — the experiment runs in a worktree that gets cleaned up. +- All output paths MUST use absolute paths under `{{iter_dir}}/results/`. Do NOT use relative paths — only files under `{{iter_dir}}/` are guaranteed to persist past this session. - Create per-arm result subdirectories before writing output: `mkdir -p {{iter_dir}}/results/` (the top-level `results/` already exists, but per-arm subdirectories like `results/h-main/` do not). - If you create ANY input files for the experiment (config files, workload specs, policy definitions, parameter files), write them to `{{iter_dir}}/inputs/` and list them in the condition's `inputs` array. Do NOT write input files to `/tmp/` or other temporary locations — they will be lost and the experiment will not be reproducible. @@ -114,13 +114,13 @@ arms: Run the experiment plan you wrote in Step 4 — execute every command exactly as written. The plan is the source of truth. For each condition: -1. Reset worktree: `git checkout -- .` +1. {{condition_reset}} 2. Run the `cmd` from the plan 3. Verify the `output` file was created at the expected path After each baseline+treatment pair with the same seed, compare key metrics. If they are byte-identical, STOP and investigate — the patch may not be affecting the code path. -**All results must land in `{{iter_dir}}/results/`.** The worktree is temporary — anything written there will be lost. +**All results must land in `{{iter_dir}}/results/`.** Only files under `{{iter_dir}}/` are guaranteed to persist — anything written elsewhere may be lost. ## Phase 3: Analyze and Write Findings diff --git a/tests/test_live_target.py b/tests/test_live_target.py new file mode 100644 index 0000000..87db9ac --- /dev/null +++ b/tests/test_live_target.py @@ -0,0 +1,309 @@ +"""Tests for live-target mode — campaigns where the executor probes a running +target system instead of evolving code in a git worktree. +""" +import contextlib +import json +import shutil +import warnings +from pathlib import Path +from unittest.mock import MagicMock + +import pytest + +from orchestrator.dispatch import StubDispatcher +from orchestrator.engine import Engine +from orchestrator.iteration import IterationOutcome, run_iteration +from orchestrator.llm_dispatch import ( + LLMDispatcher, + _LIVE_TARGET_DESIGN_CONSTRAINT, + _LIVE_TARGET_EXECUTION_ENV, + _WORKTREE_DESIGN_CONSTRAINT, + _WORKTREE_EXECUTION_ENV, +) + + +class _CLIStub(StubDispatcher): + """StubDispatcher with the CLIDispatcher surface area iteration.py + needs (override_cwd context manager, model/max_turns attrs). + """ + + def __init__(self, work_dir, **_kw): + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + super().__init__(work_dir) + self.model = "stub" + self.max_turns = 1 + + @contextlib.contextmanager + def override_cwd(self, _cwd): + yield + + +TEMPLATES_DIR = ( + Path(__file__).resolve().parent.parent / "orchestrator" / "templates" +) + + +def _campaign(live_target: bool, repo_path: Path | None = None) -> dict: + target = { + "name": "TestSystem", + "description": "A live target with no code to evolve.", + "observable_metrics": ["latency_ms"], + "controllable_knobs": ["config"], + } + if live_target: + target["live_target"] = True + if repo_path is not None: + target["repo_path"] = str(repo_path) + return { + "research_question": "Does the live target behave?", + "target_system": target, + "prompts": { + "methodology_layer": "prompts/methodology", + "domain_adapter_layer": None, + }, + } + + +# --------------------------------------------------------------------------- +# _validate_campaign +# --------------------------------------------------------------------------- + + +class TestCampaignValidation: + def test_live_target_true_accepted(self, tmp_path): + campaign = _campaign(live_target=True) + # Must not raise. + LLMDispatcher._validate_campaign(campaign) + + def test_live_target_false_accepted(self, tmp_path): + campaign = _campaign(live_target=False) + LLMDispatcher._validate_campaign(campaign) + + def test_live_target_omitted_accepted(self, tmp_path): + campaign = _campaign(live_target=False) + assert "live_target" not in campaign["target_system"] + LLMDispatcher._validate_campaign(campaign) + + def test_live_target_non_bool_rejected(self): + campaign = _campaign(live_target=False) + campaign["target_system"]["live_target"] = "yes" + with pytest.raises(ValueError, match="live_target"): + LLMDispatcher._validate_campaign(campaign) + + +# --------------------------------------------------------------------------- +# _build_context — prompt fragment selection +# --------------------------------------------------------------------------- + + +class TestPromptFragmentSelection: + """The execution_environment and worktree_constraint placeholders swap + based on target_system.live_target. The prompt loader will substitute + them into the design and execute_analyze templates. + """ + + def _dispatcher(self, tmp_path, live_target: bool) -> LLMDispatcher: + # Seed the work_dir with the run_id only — no API key needed because + # _build_context never calls the LLM. + work_dir = tmp_path / "work" + work_dir.mkdir() + (work_dir / "runs" / "iter-1").mkdir(parents=True) + return LLMDispatcher( + work_dir=work_dir, + campaign=_campaign(live_target=live_target), + completion_fn=lambda **kw: None, + ) + + def test_default_is_worktree(self, tmp_path): + d = self._dispatcher(tmp_path, live_target=False) + ctx = d._build_context("planner", "design", iteration=1, perspective=None) + assert ctx["execution_environment"] == _WORKTREE_EXECUTION_ENV + assert ctx["worktree_constraint"] == _WORKTREE_DESIGN_CONSTRAINT + + def test_live_target_swaps_text(self, tmp_path): + d = self._dispatcher(tmp_path, live_target=True) + ctx = d._build_context("planner", "design", iteration=1, perspective=None) + # _LIVE_TARGET_EXECUTION_ENV embeds {{iter_dir}}, so context-level + # equality holds (substitution happens later, in the loader). + assert ctx["execution_environment"] == _LIVE_TARGET_EXECUTION_ENV + assert ctx["worktree_constraint"] == _LIVE_TARGET_DESIGN_CONSTRAINT + + def test_design_template_renders_with_live_target_constraint(self, tmp_path): + """End-to-end: the real design.md picks up the live-target constraint + and drops the worktree variant. + """ + d = self._dispatcher(tmp_path, live_target=True) + ctx = d._build_context("planner", "design", iteration=1, perspective=None) + rendered = d.loader.load("design", ctx) + assert _WORKTREE_DESIGN_CONSTRAINT not in rendered + assert _LIVE_TARGET_DESIGN_CONSTRAINT in rendered + + def test_execute_analyze_template_renders_with_live_target_env(self, tmp_path): + """End-to-end: execute_analyze.md picks up the live-target execution + environment. The {{iter_dir}} embedded in the live-target text must + be substituted by the loader's sequential pass. + """ + d = self._dispatcher(tmp_path, live_target=True) + # _build_context for execute-analyze needs a bundle.yaml and handoff.md + bundle_path = d.work_dir / "runs" / "iter-1" / "bundle.yaml" + bundle_path.write_text("metadata:\n iteration: 1\n") + (d.work_dir / "handoff.md").write_text("(stub handoff)") + (d.work_dir / "runs" / "iter-1" / "problem.md").write_text("(stub problem)") + ctx = d._build_context( + "executor", "execute-analyze", iteration=1, perspective=None, + ) + rendered = d.loader.load("execute_analyze", ctx) + assert _WORKTREE_EXECUTION_ENV not in rendered + # The live-target fragment is rendered AFTER {{iter_dir}} substitution, + # so we assert against the post-substitution version. + iter_dir = str((d.work_dir / "runs" / "iter-1").resolve()) + assert _LIVE_TARGET_EXECUTION_ENV.replace("{{iter_dir}}", iter_dir) in rendered + assert "{{iter_dir}}" not in rendered # no leftover placeholders + + +# --------------------------------------------------------------------------- +# Iteration loop: live-target mode skips worktree creation +# --------------------------------------------------------------------------- + + +def _setup_iteration( + tmp_path: Path, + monkeypatch, + *, + repo_path: Path, + live_target: bool, +): + """Prepare a work_dir + campaign for an iteration test. Stubs the LLM and + CLI dispatchers and the human gate so run_iteration completes without an + API key. Use `live_target=True` to test the live-target path, + `live_target=False` to test the worktree path. + """ + work_dir = tmp_path / "work" + work_dir.mkdir() + for t in ("state.json", "ledger.json", "principles.json"): + shutil.copy(TEMPLATES_DIR / t, work_dir / t) + state = json.loads((work_dir / "state.json").read_text()) + state["run_id"] = "test" + (work_dir / "state.json").write_text(json.dumps(state, indent=2)) + + campaign = _campaign(live_target=live_target, repo_path=repo_path) + + import orchestrator.iteration as ri + + def stub_factory(work_dir, campaign, model=None): + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + return StubDispatcher(work_dir) + + monkeypatch.setattr(ri, "LLMDispatcher", stub_factory) + # When repo_path is set, iteration.py would normally instantiate a + # CLIDispatcher. Replace it with a stub that exposes the same surface + # iteration.py touches (override_cwd, model, max_turns). + monkeypatch.setattr( + "orchestrator.cli_dispatch.CLIDispatcher", + lambda **kw: _CLIStub(kw["work_dir"]), + ) + monkeypatch.setattr( + ri, "HumanGate", + lambda: MagicMock(prompt=MagicMock(return_value=("approve", None))), + ) + return work_dir, campaign + + +def _setup_live_target_iteration(tmp_path: Path, monkeypatch, *, repo_path: Path): + """Convenience wrapper for the live-target path.""" + return _setup_iteration( + tmp_path, monkeypatch, repo_path=repo_path, live_target=True, + ) + + +class TestLiveTargetIterationFlow: + def test_runs_without_git_repo(self, tmp_path, monkeypatch): + """A non-git repo_path + live_target=true must not raise + FileNotFoundError('Not a git repository') and must complete the + iteration. This is the regression for the magic.yaml campaign. + """ + repo = tmp_path / "live-target" + repo.mkdir() # NOT a git repo — no .git/ here. + + work_dir, campaign = _setup_live_target_iteration( + tmp_path, monkeypatch, repo_path=repo, + ) + result = run_iteration(campaign, work_dir, iteration=1) + assert result == IterationOutcome.COMPLETED + assert Engine(work_dir).phase == "DONE" + + def test_no_experiment_worktree_created(self, tmp_path, monkeypatch): + repo = tmp_path / "live-target" + repo.mkdir() + work_dir, campaign = _setup_live_target_iteration( + tmp_path, monkeypatch, repo_path=repo, + ) + + # Replace create_experiment_worktree with a sentinel that fails the + # test if it is ever called. The iteration import is local, so patch + # at the source module. + called = {"n": 0} + + def must_not_call(*a, **kw): + called["n"] += 1 + raise AssertionError( + "create_experiment_worktree must not be called in " + "live-target mode" + ) + + monkeypatch.setattr( + "orchestrator.worktree.create_experiment_worktree", must_not_call, + ) + + run_iteration(campaign, work_dir, iteration=1) + + assert called["n"] == 0 + # No .experiment_id file should be written in live-target mode. + assert not (work_dir / "runs" / "iter-1" / ".experiment_id").exists() + # No .nous-experiments/ directory should appear in the target. + assert not (repo / ".nous-experiments").exists() + + +class TestWorktreeIterationFlow: + """Regression: with live_target=False (or omitted), repo_path must + still trigger create_experiment_worktree. Without this test, inverting + the gate at iteration.py would only break live-target tests. + """ + + def test_worktree_created_when_not_live_target(self, tmp_path, monkeypatch): + repo = tmp_path / "code-target" + repo.mkdir() + work_dir, campaign = _setup_iteration( + tmp_path, monkeypatch, repo_path=repo, live_target=False, + ) + + create_calls: list[tuple] = [] + remove_calls: list[tuple] = [] + + def fake_create(repo_path, iteration): + create_calls.append((Path(repo_path), iteration)) + experiment_dir = tmp_path / "fake-worktree" + experiment_dir.mkdir(exist_ok=True) + return experiment_dir, "fake-experiment-id" + + def fake_remove(repo_path, experiment_id): + remove_calls.append((Path(repo_path), experiment_id)) + + monkeypatch.setattr( + "orchestrator.worktree.create_experiment_worktree", fake_create, + ) + monkeypatch.setattr( + "orchestrator.worktree.remove_experiment_worktree", fake_remove, + ) + + result = run_iteration(campaign, work_dir, iteration=1) + + assert result == IterationOutcome.COMPLETED + assert create_calls == [(repo, 1)] + assert remove_calls == [(repo, "fake-experiment-id")] + # .experiment_id file should be written in worktree mode. + assert ( + work_dir / "runs" / "iter-1" / ".experiment_id" + ).read_text() == "fake-experiment-id"