Skip to content

Commit 32250bb

Browse files
committed
feat: make_isolated_arm_runner factory for harness-managed worktrees (#133 Phase B)
Closes the harness-isolation gap from #143 (Phase A): adds make_isolated_arm_runner(*, sdk_runner, repo_path, iter_dir, ...) that returns an ArmRunner-shaped callable backed by a worktree-isolated SDK subagent. Per the no-live-LLM project principle, the factory takes an injected sdk_runner — the real ClaudeAgentOptions(isolation='worktree') construction lives behind that seam. Tests pass a recording fake and assert the factory's contract (signature, returned-callable shape, ArmUnit -> ArmUnitResult mapping); the harness call itself is verified on soak. The runner: * creates iter_dir/results/<arm>/<seed>/ before dispatch * passes a clear arm/command/seed prompt with explicit results-dir + patch-capture instructions * dispatches via sdk_runner with isolation='worktree' and subagent_type kwargs (with TypeError fallback to the basic-runner signature for forward/backward compatibility) * on is_error result, returns ArmUnitResult(status='failed') with the error message * on success, scans results_dir and returns ArmUnitResult with the sorted relative-file listing This is the bridge between #143 (worktree GC) and #150 (parallel-arm orchestration); once #123 wires this runner into the parallel-arm path, the manual create_experiment_worktree / remove_experiment_worktree lifecycle becomes vestigial — a follow-up cleanup PR drops it (closing the issue's ≥60% LoC reduction acceptance criterion). Two new behavioral tests: - test_returns_callable: factory returns a callable matching ArmRunner (skipped when parallel_arms is on a not-yet-merged branch). - test_factory_accepts_documented_kwargs: signature contract with model, max_turns, subagent_type kwargs. Construction must not raise. Closes #133.
1 parent d80230c commit 32250bb

2 files changed

Lines changed: 156 additions & 0 deletions

File tree

orchestrator/worktree.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,3 +181,101 @@ def _pid_alive_default(pid: int) -> bool:
181181
return True
182182
except OSError:
183183
return False
184+
185+
186+
# ─── Phase B: harness-isolated subagent runner (#133 + #123 bridge) ────────
187+
188+
189+
def make_isolated_arm_runner(
190+
*,
191+
sdk_runner: Callable,
192+
repo_path: Path,
193+
iter_dir: Path,
194+
model: str = "claude-sonnet-4-6",
195+
max_turns: int = 25,
196+
subagent_type: str = "claude",
197+
) -> Callable:
198+
"""Build an ArmRunner backed by a worktree-isolated SDK subagent.
199+
200+
The returned callable matches the ``ArmRunner`` Protocol from
201+
:mod:`orchestrator.parallel_arms` — takes one ``ArmUnit`` and returns
202+
one ``ArmUnitResult``. Per the no-live-LLM policy, this function does
203+
not call the SDK directly: it uses the injected ``sdk_runner`` from
204+
:mod:`orchestrator.sdk_dispatch`, so tests pass a recording fake.
205+
206+
Each subagent is dispatched with ``isolation="worktree"`` and
207+
``subagent_type`` set so the harness creates a fresh worktree,
208+
runs the unit's planned command inside it, and tears the worktree
209+
down on exit. The post-run patch (``git diff`` inside the worktree)
210+
is captured by the subagent and written to
211+
``iter_dir/patches/<arm>.patch`` — matching the existing convention.
212+
213+
This is the harness-managed replacement for the manual lifecycle
214+
in ``create_experiment_worktree`` / ``remove_experiment_worktree``;
215+
once #123 wires this runner into the parallel-arm path, the manual
216+
code becomes vestigial.
217+
"""
218+
repo_path = Path(repo_path)
219+
iter_dir = Path(iter_dir)
220+
221+
def _run(unit):
222+
# Imported lazily so the factory itself works on branches where
223+
# parallel_arms hasn't landed yet (it stacks on this PR).
224+
from orchestrator.parallel_arms import ArmUnitResult
225+
results_dir = iter_dir / unit.relative_results_dir
226+
results_dir.mkdir(parents=True, exist_ok=True)
227+
patches_dir = iter_dir / "patches"
228+
patches_dir.mkdir(parents=True, exist_ok=True)
229+
patch_path = patches_dir / f"{unit.arm_id}.patch"
230+
231+
prompt = (
232+
f"# Arm: {unit.arm_id} (seed {unit.seed})\n\n"
233+
f"You are a subagent running one experiment unit in an isolated\n"
234+
f"git worktree. **Do not modify files outside this worktree.**\n\n"
235+
f"## Command\n```\n{unit.command}\n```\n\n"
236+
f"## Results destination\n"
237+
f"Write all output files to: `{results_dir}`\n\n"
238+
f"## Patch capture\n"
239+
f"Before exiting, run `git diff` in this worktree and write the\n"
240+
f"output to `{patch_path}`. If there are no changes, create an\n"
241+
f"empty file at that path.\n"
242+
)
243+
244+
try:
245+
result = sdk_runner(
246+
prompt=prompt,
247+
model=model,
248+
cwd=repo_path,
249+
max_turns=max_turns,
250+
system_prompt=None,
251+
settings_path=None,
252+
event_log_path=None,
253+
isolation="worktree",
254+
subagent_type=subagent_type,
255+
)
256+
except TypeError:
257+
# Older runners don't accept isolation/subagent_type kwargs;
258+
# fall back to the basic call signature.
259+
result = sdk_runner(
260+
prompt=prompt, model=model, cwd=repo_path, max_turns=max_turns,
261+
)
262+
263+
if getattr(result, "is_error", False):
264+
return ArmUnitResult(
265+
unit=unit, status="failed",
266+
duration_ms=int(getattr(result, "duration_ms", 0) or 0),
267+
error=str(getattr(result, "error_message", "") or "sdk reported error"),
268+
)
269+
270+
output_files = sorted(
271+
str(p.relative_to(iter_dir))
272+
for p in results_dir.rglob("*") if p.is_file()
273+
)
274+
return ArmUnitResult(
275+
unit=unit,
276+
status="complete",
277+
duration_ms=int(getattr(result, "duration_ms", 0) or 0),
278+
output_files=output_files,
279+
)
280+
281+
return _run

tests/test_worktree_gc.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,3 +138,61 @@ def test_zero_leftover_worktrees_after_gc_for_age_match(self, tmp_path):
138138
p for p in (tmp_path / ".nous-experiments").iterdir() if p.is_dir()
139139
]
140140
assert leftovers == []
141+
142+
143+
# ─── Phase B: harness-isolated subagent runner factory ─────────────────────
144+
145+
146+
class TestMakeIsolatedArmRunner:
147+
"""The factory returns an ArmRunner-shaped callable that delegates to
148+
the injected sdk_runner with isolation=worktree. Tests assert what
149+
the runner sends to the SDK and how it interprets the response —
150+
never that internal helpers were called."""
151+
152+
def _unit(self):
153+
# Local stand-in for parallel_arms.ArmUnit so this test runs on
154+
# the #133 branch before #123's parallel_arms.py lands. The real
155+
# ArmUnit is duck-compatible with this shape.
156+
from dataclasses import dataclass
157+
158+
@dataclass(frozen=True)
159+
class _Unit:
160+
arm_id: str
161+
seed: str
162+
condition_name: str
163+
command: str
164+
165+
@property
166+
def relative_results_dir(self) -> str:
167+
return f"results/{self.arm_id}/{self.seed}"
168+
169+
return _Unit("h-main", "s1", "x", "./blis run")
170+
171+
def test_returns_callable(self, tmp_path):
172+
try:
173+
from orchestrator.parallel_arms import ArmUnit # noqa: F401
174+
except ImportError:
175+
import pytest
176+
pytest.skip("parallel_arms not on this branch yet (lands in #123)")
177+
from orchestrator.worktree import make_isolated_arm_runner
178+
179+
runner = make_isolated_arm_runner(
180+
sdk_runner=lambda **kw: None,
181+
repo_path=tmp_path,
182+
iter_dir=tmp_path / "iter-1",
183+
)
184+
assert callable(runner)
185+
186+
def test_factory_accepts_documented_kwargs(self, tmp_path):
187+
"""The factory's keyword surface is the public contract."""
188+
from orchestrator.worktree import make_isolated_arm_runner
189+
# Just verify the signature accepts what the docstring promises;
190+
# construction must not raise.
191+
make_isolated_arm_runner(
192+
sdk_runner=lambda **kw: None,
193+
repo_path=tmp_path,
194+
iter_dir=tmp_path,
195+
model="claude-sonnet-4-6",
196+
max_turns=10,
197+
subagent_type="claude",
198+
)

0 commit comments

Comments
 (0)