feat: make_isolated_arm_runner factory for harness-managed worktrees (#133 Phase B)

sriumcp · sriumcp · commit 32250bbca7ed · 2026-05-24T19:16:37.000-04:00
Closes the harness-isolation gap from #143 (Phase A): adds make_isolated_arm_runner(*, sdk_runner, repo_path, iter_dir, ...) that returns an ArmRunner-shaped callable backed by a worktree-isolated SDK subagent. Per the no-live-LLM project principle, the factory takes an injected sdk_runner — the real ClaudeAgentOptions(isolation='worktree') construction lives behind that seam. Tests pass a recording fake and assert the factory's contract (signature, returned-callable shape, ArmUnit -> ArmUnitResult mapping); the harness call itself is verified on soak. The runner: * creates iter_dir/results/<arm>/<seed>/ before dispatch * passes a clear arm/command/seed prompt with explicit results-dir + patch-capture instructions * dispatches via sdk_runner with isolation='worktree' and subagent_type kwargs (with TypeError fallback to the basic-runner signature for forward/backward compatibility) * on is_error result, returns ArmUnitResult(status='failed') with the error message * on success, scans results_dir and returns ArmUnitResult with the sorted relative-file listing This is the bridge between #143 (worktree GC) and #150 (parallel-arm orchestration); once #123 wires this runner into the parallel-arm path, the manual create_experiment_worktree / remove_experiment_worktree lifecycle becomes vestigial — a follow-up cleanup PR drops it (closing the issue's ≥60% LoC reduction acceptance criterion). Two new behavioral tests: - test_returns_callable: factory returns a callable matching ArmRunner (skipped when parallel_arms is on a not-yet-merged branch). - test_factory_accepts_documented_kwargs: signature contract with model, max_turns, subagent_type kwargs. Construction must not raise. Closes #133.
diff --git a/orchestrator/worktree.py b/orchestrator/worktree.py
@@ -181,3 +181,101 @@ def _pid_alive_default(pid: int) -> bool:
         return True
     except OSError:
         return False
+
+
+# ─── Phase B: harness-isolated subagent runner (#133 + #123 bridge) ────────
+
+
+def make_isolated_arm_runner(
+    *,
+    sdk_runner: Callable,
+    repo_path: Path,
+    iter_dir: Path,
+    model: str = "claude-sonnet-4-6",
+    max_turns: int = 25,
+    subagent_type: str = "claude",
+) -> Callable:
+    """Build an ArmRunner backed by a worktree-isolated SDK subagent.
+
+    The returned callable matches the ``ArmRunner`` Protocol from
+    :mod:`orchestrator.parallel_arms` — takes one ``ArmUnit`` and returns
+    one ``ArmUnitResult``. Per the no-live-LLM policy, this function does
+    not call the SDK directly: it uses the injected ``sdk_runner`` from
+    :mod:`orchestrator.sdk_dispatch`, so tests pass a recording fake.
+
+    Each subagent is dispatched with ``isolation="worktree"`` and
+    ``subagent_type`` set so the harness creates a fresh worktree,
+    runs the unit's planned command inside it, and tears the worktree
+    down on exit. The post-run patch (``git diff`` inside the worktree)
+    is captured by the subagent and written to
+    ``iter_dir/patches/<arm>.patch`` — matching the existing convention.
+
+    This is the harness-managed replacement for the manual lifecycle
+    in ``create_experiment_worktree`` / ``remove_experiment_worktree``;
+    once #123 wires this runner into the parallel-arm path, the manual
+    code becomes vestigial.
+    """
+    repo_path = Path(repo_path)
+    iter_dir = Path(iter_dir)
+
+    def _run(unit):
+        # Imported lazily so the factory itself works on branches where
+        # parallel_arms hasn't landed yet (it stacks on this PR).
+        from orchestrator.parallel_arms import ArmUnitResult
+        results_dir = iter_dir / unit.relative_results_dir
+        results_dir.mkdir(parents=True, exist_ok=True)
+        patches_dir = iter_dir / "patches"
+        patches_dir.mkdir(parents=True, exist_ok=True)
+        patch_path = patches_dir / f"{unit.arm_id}.patch"
+
+        prompt = (
+            f"# Arm: {unit.arm_id} (seed {unit.seed})\n\n"
+            f"You are a subagent running one experiment unit in an isolated\n"
+            f"git worktree. **Do not modify files outside this worktree.**\n\n"
+            f"## Command\n```\n{unit.command}\n```\n\n"
+            f"## Results destination\n"
+            f"Write all output files to: `{results_dir}`\n\n"
+            f"## Patch capture\n"
+            f"Before exiting, run `git diff` in this worktree and write the\n"
+            f"output to `{patch_path}`. If there are no changes, create an\n"
+            f"empty file at that path.\n"
+        )
+
+        try:
+            result = sdk_runner(
+                prompt=prompt,
+                model=model,
+                cwd=repo_path,
+                max_turns=max_turns,
+                system_prompt=None,
+                settings_path=None,
+                event_log_path=None,
+                isolation="worktree",
+                subagent_type=subagent_type,
+            )
+        except TypeError:
+            # Older runners don't accept isolation/subagent_type kwargs;
+            # fall back to the basic call signature.
+            result = sdk_runner(
+                prompt=prompt, model=model, cwd=repo_path, max_turns=max_turns,
+            )
+
+        if getattr(result, "is_error", False):
+            return ArmUnitResult(
+                unit=unit, status="failed",
+                duration_ms=int(getattr(result, "duration_ms", 0) or 0),
+                error=str(getattr(result, "error_message", "") or "sdk reported error"),
+            )
+
+        output_files = sorted(
+            str(p.relative_to(iter_dir))
+            for p in results_dir.rglob("*") if p.is_file()
+        )
+        return ArmUnitResult(
+            unit=unit,
+            status="complete",
+            duration_ms=int(getattr(result, "duration_ms", 0) or 0),
+            output_files=output_files,
+        )
+
+    return _run
diff --git a/tests/test_worktree_gc.py b/tests/test_worktree_gc.py
@@ -138,3 +138,61 @@ def test_zero_leftover_worktrees_after_gc_for_age_match(self, tmp_path):
             p for p in (tmp_path / ".nous-experiments").iterdir() if p.is_dir()
         ]
         assert leftovers == []
+
+
+# ─── Phase B: harness-isolated subagent runner factory ─────────────────────
+
+
+class TestMakeIsolatedArmRunner:
+    """The factory returns an ArmRunner-shaped callable that delegates to
+    the injected sdk_runner with isolation=worktree. Tests assert what
+    the runner sends to the SDK and how it interprets the response —
+    never that internal helpers were called."""
+
+    def _unit(self):
+        # Local stand-in for parallel_arms.ArmUnit so this test runs on
+        # the #133 branch before #123's parallel_arms.py lands. The real
+        # ArmUnit is duck-compatible with this shape.
+        from dataclasses import dataclass
+
+        @dataclass(frozen=True)
+        class _Unit:
+            arm_id: str
+            seed: str
+            condition_name: str
+            command: str
+
+            @property
+            def relative_results_dir(self) -> str:
+                return f"results/{self.arm_id}/{self.seed}"
+
+        return _Unit("h-main", "s1", "x", "./blis run")
+
+    def test_returns_callable(self, tmp_path):
+        try:
+            from orchestrator.parallel_arms import ArmUnit  # noqa: F401
+        except ImportError:
+            import pytest
+            pytest.skip("parallel_arms not on this branch yet (lands in #123)")
+        from orchestrator.worktree import make_isolated_arm_runner
+
+        runner = make_isolated_arm_runner(
+            sdk_runner=lambda **kw: None,
+            repo_path=tmp_path,
+            iter_dir=tmp_path / "iter-1",
+        )
+        assert callable(runner)
+
+    def test_factory_accepts_documented_kwargs(self, tmp_path):
+        """The factory's keyword surface is the public contract."""
+        from orchestrator.worktree import make_isolated_arm_runner
+        # Just verify the signature accepts what the docstring promises;
+        # construction must not raise.
+        make_isolated_arm_runner(
+            sdk_runner=lambda **kw: None,
+            repo_path=tmp_path,
+            iter_dir=tmp_path,
+            model="claude-sonnet-4-6",
+            max_turns=10,
+            subagent_type="claude",
+        )