Skip to content

Commit 265018a

Browse files
susiejojoclaude
andauthored
feat: capture campaign metadata in enriched campaign.yaml at init (#116)
At campaign init, writes an enriched copy of campaign.yaml into the work directory with a `runtime:` block containing target_repo, target_commit, nous_version, and started_at. Also adds optional `metadata` field to campaign schema for user-supplied tags/goals. Each git/importlib call is individually failure-tolerant — missing git or non-repo targets degrade gracefully to null. The enriched copy is only written on fresh init (not resume). Closes #115 Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 49510b3 commit 265018a

5 files changed

Lines changed: 278 additions & 6 deletions

File tree

orchestrator/campaign.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -393,7 +393,10 @@ def main() -> None:
393393

394394
run_id = args.run_id or campaign.get("run_id") or campaign_path.parent.name + "-run"
395395
repo_path = campaign.get("target_system", {}).get("repo_path")
396-
work_dir = setup_work_dir(run_id, repo_path=repo_path)
396+
work_dir = setup_work_dir(
397+
run_id, repo_path=repo_path,
398+
campaign_path=campaign_path, campaign=campaign,
399+
)
397400
print(f"Working directory: {work_dir.resolve()}")
398401
print(f"Max iterations: {max_iter}")
399402

orchestrator/cli.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,10 @@ def _cmd_run(args):
9696
)
9797
sys.exit(1)
9898

99-
work_dir = setup_work_dir(run_id, repo_path=repo_path)
99+
work_dir = setup_work_dir(
100+
run_id, repo_path=repo_path,
101+
campaign_path=campaign_path, campaign=campaign,
102+
)
100103

101104
max_iterations = args.max_iterations if args.max_iterations is not None else campaign.get("max_iterations", 10)
102105
run_campaign(

orchestrator/iteration.py

Lines changed: 89 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,12 @@
1717
--agent inline: Prompts emitted to stdout for the calling agent.
1818
"""
1919
import argparse
20+
import importlib.metadata as importlib_metadata
2021
import json
2122
import logging
2223
import re
2324
import shutil
25+
import subprocess
2426
import sys
2527
from datetime import datetime, timezone
2628
from enum import Enum
@@ -187,12 +189,79 @@ def _merge_principles(work_dir: Path, iter_dir: Path) -> None:
187189
atomic_write(principles_path, json.dumps(store, indent=2) + "\n")
188190

189191

190-
def setup_work_dir(run_id: str, repo_path: str | None = None) -> Path:
192+
def _capture_runtime_meta(repo_path: str | None) -> dict:
193+
"""Capture runtime metadata at campaign init time.
194+
195+
Returns a dict with target_repo, target_commit, nous_version, started_at.
196+
Each git/importlib call is wrapped individually — failures log a warning
197+
and yield null for that field.
198+
"""
199+
meta: dict = {
200+
"target_repo": None,
201+
"target_commit": None,
202+
"nous_version": None,
203+
"started_at": datetime.now(timezone.utc).isoformat(),
204+
}
205+
206+
# Target repo commit
207+
if repo_path:
208+
try:
209+
meta["target_commit"] = subprocess.check_output(
210+
["git", "-C", repo_path, "rev-parse", "HEAD"],
211+
text=True, stderr=subprocess.DEVNULL,
212+
).strip() or None
213+
except (subprocess.CalledProcessError, FileNotFoundError, OSError):
214+
logger.warning("Could not capture target_commit from %s", repo_path)
215+
216+
# Target repo remote (org/repo identifier)
217+
try:
218+
remote = subprocess.check_output(
219+
["git", "-C", repo_path, "remote", "get-url", "origin"],
220+
text=True, stderr=subprocess.DEVNULL,
221+
).strip()
222+
if remote.startswith("git@github.com:"):
223+
# SSH: git@github.com:org/repo.git
224+
meta["target_repo"] = remote.split(":")[-1].removesuffix(".git")
225+
elif "github.com/" in remote:
226+
# HTTPS: https://github.com/org/repo.git
227+
meta["target_repo"] = remote.split("github.com/")[-1].removesuffix(".git")
228+
else:
229+
meta["target_repo"] = remote or None
230+
except (subprocess.CalledProcessError, FileNotFoundError, OSError):
231+
logger.warning("Could not capture target_repo from %s", repo_path)
232+
233+
# Nous version: prefer package metadata, fall back to git SHA
234+
try:
235+
meta["nous_version"] = importlib_metadata.version("nous")
236+
except importlib_metadata.PackageNotFoundError:
237+
nous_dir = Path(__file__).resolve().parent
238+
try:
239+
meta["nous_version"] = subprocess.check_output(
240+
["git", "-C", str(nous_dir), "rev-parse", "HEAD"],
241+
text=True, stderr=subprocess.DEVNULL,
242+
).strip() or None
243+
except (subprocess.CalledProcessError, FileNotFoundError, OSError):
244+
logger.warning("Could not determine nous_version")
245+
246+
return meta
247+
248+
249+
def setup_work_dir(
250+
run_id: str,
251+
repo_path: str | None = None,
252+
campaign_path: Path | None = None,
253+
campaign: dict | None = None,
254+
) -> Path:
191255
"""Create and initialize a working directory from templates.
192256
193257
If repo_path is provided, the campaign directory is created inside
194258
the target repo at .nous/<run_id>/. Otherwise falls back to creating
195259
<run_id>/ in the current directory.
260+
261+
If campaign_path is provided, writes an enriched copy of campaign.yaml
262+
into the work directory with a runtime: block (target_repo, target_commit,
263+
nous_version, started_at). Only written on fresh init to avoid clobbering
264+
on resume.
196265
"""
197266
if repo_path:
198267
work_dir = Path(repo_path) / ".nous" / run_id
@@ -206,6 +275,21 @@ def setup_work_dir(run_id: str, repo_path: str | None = None) -> Path:
206275
state = json.loads((work_dir / "state.json").read_text())
207276
state["run_id"] = run_id
208277
atomic_write(work_dir / "state.json", json.dumps(state, indent=2) + "\n")
278+
279+
# Write enriched campaign.yaml copy on fresh init only
280+
enriched_path = work_dir / "campaign.yaml"
281+
if campaign_path and campaign and not enriched_path.exists():
282+
try:
283+
runtime_meta = _capture_runtime_meta(repo_path)
284+
enriched = dict(campaign)
285+
enriched["runtime"] = runtime_meta
286+
atomic_write(
287+
enriched_path,
288+
yaml.safe_dump(enriched, default_flow_style=False, sort_keys=False),
289+
)
290+
except (OSError, yaml.YAMLError) as exc:
291+
logger.warning("Could not write enriched campaign.yaml: %s", exc)
292+
209293
return work_dir
210294

211295

@@ -526,7 +610,10 @@ def main() -> None:
526610

527611
run_id = args.run_id or campaign.get("run_id") or campaign_path.parent.name + "-run"
528612
repo_path = campaign.get("target_system", {}).get("repo_path")
529-
work_dir = setup_work_dir(run_id, repo_path=repo_path)
613+
work_dir = setup_work_dir(
614+
run_id, repo_path=repo_path,
615+
campaign_path=campaign_path, campaign=campaign,
616+
)
530617
print(f"Working directory: {work_dir.resolve()}")
531618

532619
run_iteration(

orchestrator/schemas/campaign.schema.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,11 @@ properties:
5454
minLength: 1
5555
description: "Path to target system git repo. Used by CLIDispatcher for code-access agents. If set, experiments run in isolated worktrees."
5656

57+
metadata:
58+
type: object
59+
additionalProperties: true
60+
description: "User-defined metadata (tags, goal, etc.). Copied to work dir at init."
61+
5762
models:
5863
type: object
5964
additionalProperties: false

tests/test_campaign.py

Lines changed: 176 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
"""Tests for multi-iteration campaign loop."""
2+
import importlib.metadata as importlib_metadata
23
import json
34
import shutil
5+
import subprocess
46
import warnings
57
from pathlib import Path
6-
from unittest.mock import MagicMock
8+
from unittest.mock import MagicMock, patch
79

810
import jsonschema
911
import pytest
@@ -12,7 +14,12 @@
1214
from orchestrator.dispatch import StubDispatcher
1315
from orchestrator.engine import Engine
1416
from orchestrator.campaign import run_campaign
15-
from orchestrator.iteration import IterationOutcome, _save_human_feedback
17+
from orchestrator.iteration import (
18+
IterationOutcome,
19+
_capture_runtime_meta,
20+
_save_human_feedback,
21+
setup_work_dir,
22+
)
1623

1724
SCHEMAS_DIR = Path(__file__).resolve().parent.parent / "orchestrator" / "schemas"
1825
TEMPLATES_DIR = Path(__file__).resolve().parent.parent / "orchestrator" / "templates"
@@ -449,3 +456,170 @@ def test_multiple_phases_independent(self, tmp_path):
449456
fb = json.loads((tmp_path / "human_feedback.json").read_text())
450457
assert len(fb["design"]) == 1
451458
assert len(fb["findings"]) == 1
459+
460+
461+
class TestMetadataEnrichment:
462+
"""Tests for campaign metadata enrichment (runtime block in campaign.yaml copy)."""
463+
464+
CAMPAIGN_WITH_META = {
465+
**SAMPLE_CAMPAIGN,
466+
"metadata": {
467+
"tags": ["prefix-caching", "ttft"],
468+
"goal": "Determine prefix ratio effect on TTFT",
469+
},
470+
}
471+
472+
def test_setup_work_dir_writes_enriched_campaign_yaml(self, tmp_path):
473+
"""setup_work_dir writes an enriched campaign.yaml with runtime block."""
474+
campaign_path = tmp_path / "campaign.yaml"
475+
campaign_path.write_text(yaml.safe_dump(self.CAMPAIGN_WITH_META))
476+
477+
work_dir = setup_work_dir(
478+
"test-run", repo_path=None,
479+
campaign_path=campaign_path, campaign=self.CAMPAIGN_WITH_META,
480+
)
481+
482+
enriched_path = work_dir / "campaign.yaml"
483+
assert enriched_path.exists()
484+
485+
enriched = yaml.safe_load(enriched_path.read_text())
486+
assert "runtime" in enriched
487+
assert "started_at" in enriched["runtime"]
488+
assert "nous_version" in enriched["runtime"]
489+
assert "target_repo" in enriched["runtime"]
490+
assert "target_commit" in enriched["runtime"]
491+
492+
def test_user_metadata_passes_through(self, tmp_path):
493+
"""User-defined metadata from campaign.yaml appears in the enriched copy."""
494+
campaign_path = tmp_path / "campaign.yaml"
495+
campaign_path.write_text(yaml.safe_dump(self.CAMPAIGN_WITH_META))
496+
497+
work_dir = setup_work_dir(
498+
"test-run", repo_path=None,
499+
campaign_path=campaign_path, campaign=self.CAMPAIGN_WITH_META,
500+
)
501+
502+
enriched = yaml.safe_load((work_dir / "campaign.yaml").read_text())
503+
assert enriched["metadata"]["tags"] == ["prefix-caching", "ttft"]
504+
assert enriched["metadata"]["goal"] == "Determine prefix ratio effect on TTFT"
505+
506+
def test_enriched_copy_not_overwritten_on_resume(self, tmp_path):
507+
"""Re-calling setup_work_dir does not clobber the enriched campaign.yaml."""
508+
campaign_path = tmp_path / "campaign.yaml"
509+
campaign_path.write_text(yaml.safe_dump(self.CAMPAIGN_WITH_META))
510+
511+
work_dir = setup_work_dir(
512+
"test-run", repo_path=None,
513+
campaign_path=campaign_path, campaign=self.CAMPAIGN_WITH_META,
514+
)
515+
516+
# Modify the enriched file to prove it's not overwritten
517+
enriched_path = work_dir / "campaign.yaml"
518+
enriched = yaml.safe_load(enriched_path.read_text())
519+
enriched["runtime"]["marker"] = "original"
520+
enriched_path.write_text(yaml.safe_dump(enriched))
521+
522+
# Call setup_work_dir again (simulating resume)
523+
setup_work_dir(
524+
"test-run", repo_path=None,
525+
campaign_path=campaign_path, campaign=self.CAMPAIGN_WITH_META,
526+
)
527+
528+
reloaded = yaml.safe_load(enriched_path.read_text())
529+
assert reloaded["runtime"]["marker"] == "original"
530+
531+
def test_runtime_meta_tolerates_no_git(self, tmp_path):
532+
"""_capture_runtime_meta returns nulls gracefully when git is unavailable."""
533+
with patch("orchestrator.iteration.subprocess.check_output", side_effect=FileNotFoundError):
534+
meta = _capture_runtime_meta(str(tmp_path))
535+
536+
assert meta["target_repo"] is None
537+
assert meta["target_commit"] is None
538+
# nous_version may still be set via importlib.metadata
539+
assert "started_at" in meta
540+
541+
def test_runtime_meta_captures_target_commit_from_git_repo(self, tmp_path):
542+
"""_capture_runtime_meta captures target_commit from a real git repo."""
543+
import subprocess
544+
repo = tmp_path / "target"
545+
repo.mkdir()
546+
subprocess.run(["git", "init"], cwd=repo, capture_output=True, check=True)
547+
subprocess.run(["git", "config", "user.email", "test@test.com"], cwd=repo, capture_output=True)
548+
subprocess.run(["git", "config", "user.name", "Test"], cwd=repo, capture_output=True)
549+
(repo / "f.txt").write_text("x")
550+
subprocess.run(["git", "add", "."], cwd=repo, capture_output=True, check=True)
551+
subprocess.run(["git", "commit", "-m", "init"], cwd=repo, capture_output=True, check=True)
552+
553+
meta = _capture_runtime_meta(str(repo))
554+
555+
assert meta["target_commit"] is not None
556+
assert len(meta["target_commit"]) == 40 # full SHA
557+
# No remote configured, so target_repo should be None
558+
assert meta["target_repo"] is None
559+
560+
def test_no_enriched_copy_without_campaign_path(self, tmp_path, monkeypatch):
561+
"""If campaign_path is not provided, no enriched copy is written."""
562+
monkeypatch.chdir(tmp_path)
563+
work_dir = setup_work_dir("test-run", repo_path=None)
564+
assert not (work_dir / "campaign.yaml").exists()
565+
566+
@pytest.mark.parametrize("remote,expected", [
567+
("git@github.com:org/repo.git", "org/repo"),
568+
("git@github.com:org/repo", "org/repo"),
569+
("https://github.com/org/repo.git", "org/repo"),
570+
("https://github.com/org/repo", "org/repo"),
571+
("ssh://git@github.com/org/repo.git", "org/repo"),
572+
("https://gitlab.com/org/repo.git", "https://gitlab.com/org/repo.git"),
573+
("git@gitlab.com:org/repo.git", "git@gitlab.com:org/repo.git"),
574+
])
575+
def test_remote_url_parsing(self, remote, expected, monkeypatch):
576+
"""_capture_runtime_meta correctly parses various remote URL formats."""
577+
def fake_check_output(cmd, **kwargs):
578+
if "rev-parse" in cmd:
579+
return "a" * 40 + "\n"
580+
if "get-url" in cmd:
581+
return remote + "\n"
582+
raise subprocess.CalledProcessError(1, cmd)
583+
584+
import subprocess as real_subprocess
585+
monkeypatch.setattr("orchestrator.iteration.subprocess.check_output", fake_check_output)
586+
meta = _capture_runtime_meta("/fake/repo")
587+
assert meta["target_repo"] == expected
588+
589+
def test_nous_version_git_sha_fallback(self, monkeypatch):
590+
"""When importlib.metadata fails, nous_version falls back to git SHA."""
591+
fake_sha = "b" * 40
592+
593+
monkeypatch.setattr(
594+
"orchestrator.iteration.importlib_metadata.version",
595+
lambda _: (_ for _ in ()).throw(importlib_metadata.PackageNotFoundError()),
596+
)
597+
598+
def fake_check_output(cmd, **kwargs):
599+
if "rev-parse" in cmd:
600+
return fake_sha + "\n"
601+
raise subprocess.CalledProcessError(1, cmd)
602+
603+
monkeypatch.setattr("orchestrator.iteration.subprocess.check_output", fake_check_output)
604+
meta = _capture_runtime_meta(None)
605+
assert meta["nous_version"] == fake_sha
606+
607+
def test_enrichment_with_repo_path(self, tmp_path):
608+
"""Enriched campaign.yaml is written inside .nous/<run_id>/ when repo_path is set."""
609+
campaign_path = tmp_path / "campaign.yaml"
610+
campaign_path.write_text(yaml.safe_dump(self.CAMPAIGN_WITH_META))
611+
612+
repo = tmp_path / "target_repo"
613+
repo.mkdir()
614+
615+
work_dir = setup_work_dir(
616+
"test-run", repo_path=str(repo),
617+
campaign_path=campaign_path, campaign=self.CAMPAIGN_WITH_META,
618+
)
619+
620+
assert work_dir == repo / ".nous" / "test-run"
621+
enriched_path = work_dir / "campaign.yaml"
622+
assert enriched_path.exists()
623+
enriched = yaml.safe_load(enriched_path.read_text())
624+
assert "runtime" in enriched
625+
assert enriched["metadata"]["tags"] == ["prefix-caching", "ttft"]

0 commit comments

Comments
 (0)