feat: capture campaign metadata in enriched campaign.yaml at init (#116)

susiejojo · claude · web-flow · commit 265018ae5cfd · 2026-06-01T09:37:24.000-04:00
At campaign init, writes an enriched copy of campaign.yaml into the work directory with a `runtime:` block containing target_repo, target_commit, nous_version, and started_at. Also adds optional `metadata` field to campaign schema for user-supplied tags/goals. Each git/importlib call is individually failure-tolerant — missing git or non-repo targets degrade gracefully to null. The enriched copy is only written on fresh init (not resume). Closes #115 Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
diff --git a/orchestrator/campaign.py b/orchestrator/campaign.py
@@ -393,7 +393,10 @@ def main() -> None:
 
     run_id = args.run_id or campaign.get("run_id") or campaign_path.parent.name + "-run"
     repo_path = campaign.get("target_system", {}).get("repo_path")
-    work_dir = setup_work_dir(run_id, repo_path=repo_path)
+    work_dir = setup_work_dir(
+        run_id, repo_path=repo_path,
+        campaign_path=campaign_path, campaign=campaign,
+    )
     print(f"Working directory: {work_dir.resolve()}")
     print(f"Max iterations: {max_iter}")
 
diff --git a/orchestrator/cli.py b/orchestrator/cli.py
@@ -96,7 +96,10 @@ def _cmd_run(args):
                 )
                 sys.exit(1)
 
-    work_dir = setup_work_dir(run_id, repo_path=repo_path)
+    work_dir = setup_work_dir(
+        run_id, repo_path=repo_path,
+        campaign_path=campaign_path, campaign=campaign,
+    )
 
     max_iterations = args.max_iterations if args.max_iterations is not None else campaign.get("max_iterations", 10)
     run_campaign(
diff --git a/orchestrator/iteration.py b/orchestrator/iteration.py
@@ -17,10 +17,12 @@
     --agent inline: Prompts emitted to stdout for the calling agent.
 """
 import argparse
+import importlib.metadata as importlib_metadata
 import json
 import logging
 import re
 import shutil
+import subprocess
 import sys
 from datetime import datetime, timezone
 from enum import Enum
@@ -187,12 +189,79 @@ def _merge_principles(work_dir: Path, iter_dir: Path) -> None:
     atomic_write(principles_path, json.dumps(store, indent=2) + "\n")
 
 
-def setup_work_dir(run_id: str, repo_path: str | None = None) -> Path:
+def _capture_runtime_meta(repo_path: str | None) -> dict:
+    """Capture runtime metadata at campaign init time.
+
+    Returns a dict with target_repo, target_commit, nous_version, started_at.
+    Each git/importlib call is wrapped individually — failures log a warning
+    and yield null for that field.
+    """
+    meta: dict = {
+        "target_repo": None,
+        "target_commit": None,
+        "nous_version": None,
+        "started_at": datetime.now(timezone.utc).isoformat(),
+    }
+
+    # Target repo commit
+    if repo_path:
+        try:
+            meta["target_commit"] = subprocess.check_output(
+                ["git", "-C", repo_path, "rev-parse", "HEAD"],
+                text=True, stderr=subprocess.DEVNULL,
+            ).strip() or None
+        except (subprocess.CalledProcessError, FileNotFoundError, OSError):
+            logger.warning("Could not capture target_commit from %s", repo_path)
+
+        # Target repo remote (org/repo identifier)
+        try:
+            remote = subprocess.check_output(
+                ["git", "-C", repo_path, "remote", "get-url", "origin"],
+                text=True, stderr=subprocess.DEVNULL,
+            ).strip()
+            if remote.startswith("git@github.com:"):
+                # SSH: git@github.com:org/repo.git
+                meta["target_repo"] = remote.split(":")[-1].removesuffix(".git")
+            elif "github.com/" in remote:
+                # HTTPS: https://github.com/org/repo.git
+                meta["target_repo"] = remote.split("github.com/")[-1].removesuffix(".git")
+            else:
+                meta["target_repo"] = remote or None
+        except (subprocess.CalledProcessError, FileNotFoundError, OSError):
+            logger.warning("Could not capture target_repo from %s", repo_path)
+
+    # Nous version: prefer package metadata, fall back to git SHA
+    try:
+        meta["nous_version"] = importlib_metadata.version("nous")
+    except importlib_metadata.PackageNotFoundError:
+        nous_dir = Path(__file__).resolve().parent
+        try:
+            meta["nous_version"] = subprocess.check_output(
+                ["git", "-C", str(nous_dir), "rev-parse", "HEAD"],
+                text=True, stderr=subprocess.DEVNULL,
+            ).strip() or None
+        except (subprocess.CalledProcessError, FileNotFoundError, OSError):
+            logger.warning("Could not determine nous_version")
+
+    return meta
+
+
+def setup_work_dir(
+    run_id: str,
+    repo_path: str | None = None,
+    campaign_path: Path | None = None,
+    campaign: dict | None = None,
+) -> Path:
     """Create and initialize a working directory from templates.
 
     If repo_path is provided, the campaign directory is created inside
     the target repo at .nous/<run_id>/. Otherwise falls back to creating
     <run_id>/ in the current directory.
+
+    If campaign_path is provided, writes an enriched copy of campaign.yaml
+    into the work directory with a runtime: block (target_repo, target_commit,
+    nous_version, started_at). Only written on fresh init to avoid clobbering
+    on resume.
     """
     if repo_path:
         work_dir = Path(repo_path) / ".nous" / run_id
@@ -206,6 +275,21 @@ def setup_work_dir(run_id: str, repo_path: str | None = None) -> Path:
     state = json.loads((work_dir / "state.json").read_text())
     state["run_id"] = run_id
     atomic_write(work_dir / "state.json", json.dumps(state, indent=2) + "\n")
+
+    # Write enriched campaign.yaml copy on fresh init only
+    enriched_path = work_dir / "campaign.yaml"
+    if campaign_path and campaign and not enriched_path.exists():
+        try:
+            runtime_meta = _capture_runtime_meta(repo_path)
+            enriched = dict(campaign)
+            enriched["runtime"] = runtime_meta
+            atomic_write(
+                enriched_path,
+                yaml.safe_dump(enriched, default_flow_style=False, sort_keys=False),
+            )
+        except (OSError, yaml.YAMLError) as exc:
+            logger.warning("Could not write enriched campaign.yaml: %s", exc)
+
     return work_dir
 
 
@@ -526,7 +610,10 @@ def main() -> None:
 
     run_id = args.run_id or campaign.get("run_id") or campaign_path.parent.name + "-run"
     repo_path = campaign.get("target_system", {}).get("repo_path")
-    work_dir = setup_work_dir(run_id, repo_path=repo_path)
+    work_dir = setup_work_dir(
+        run_id, repo_path=repo_path,
+        campaign_path=campaign_path, campaign=campaign,
+    )
     print(f"Working directory: {work_dir.resolve()}")
 
     run_iteration(
diff --git a/orchestrator/schemas/campaign.schema.yaml b/orchestrator/schemas/campaign.schema.yaml
@@ -54,6 +54,11 @@ properties:
         minLength: 1
         description: "Path to target system git repo. Used by CLIDispatcher for code-access agents. If set, experiments run in isolated worktrees."
 
+  metadata:
+    type: object
+    additionalProperties: true
+    description: "User-defined metadata (tags, goal, etc.). Copied to work dir at init."
+
   models:
     type: object
     additionalProperties: false
diff --git a/tests/test_campaign.py b/tests/test_campaign.py
@@ -1,9 +1,11 @@
 """Tests for multi-iteration campaign loop."""
+import importlib.metadata as importlib_metadata
 import json
 import shutil
+import subprocess
 import warnings
 from pathlib import Path
-from unittest.mock import MagicMock
+from unittest.mock import MagicMock, patch
 
 import jsonschema
 import pytest
@@ -12,7 +14,12 @@
 from orchestrator.dispatch import StubDispatcher
 from orchestrator.engine import Engine
 from orchestrator.campaign import run_campaign
-from orchestrator.iteration import IterationOutcome, _save_human_feedback
+from orchestrator.iteration import (
+    IterationOutcome,
+    _capture_runtime_meta,
+    _save_human_feedback,
+    setup_work_dir,
+)
 
 SCHEMAS_DIR = Path(__file__).resolve().parent.parent / "orchestrator" / "schemas"
 TEMPLATES_DIR = Path(__file__).resolve().parent.parent / "orchestrator" / "templates"
@@ -449,3 +456,170 @@ def test_multiple_phases_independent(self, tmp_path):
         fb = json.loads((tmp_path / "human_feedback.json").read_text())
         assert len(fb["design"]) == 1
         assert len(fb["findings"]) == 1
+
+
+class TestMetadataEnrichment:
+    """Tests for campaign metadata enrichment (runtime block in campaign.yaml copy)."""
+
+    CAMPAIGN_WITH_META = {
+        **SAMPLE_CAMPAIGN,
+        "metadata": {
+            "tags": ["prefix-caching", "ttft"],
+            "goal": "Determine prefix ratio effect on TTFT",
+        },
+    }
+
+    def test_setup_work_dir_writes_enriched_campaign_yaml(self, tmp_path):
+        """setup_work_dir writes an enriched campaign.yaml with runtime block."""
+        campaign_path = tmp_path / "campaign.yaml"
+        campaign_path.write_text(yaml.safe_dump(self.CAMPAIGN_WITH_META))
+
+        work_dir = setup_work_dir(
+            "test-run", repo_path=None,
+            campaign_path=campaign_path, campaign=self.CAMPAIGN_WITH_META,
+        )
+
+        enriched_path = work_dir / "campaign.yaml"
+        assert enriched_path.exists()
+
+        enriched = yaml.safe_load(enriched_path.read_text())
+        assert "runtime" in enriched
+        assert "started_at" in enriched["runtime"]
+        assert "nous_version" in enriched["runtime"]
+        assert "target_repo" in enriched["runtime"]
+        assert "target_commit" in enriched["runtime"]
+
+    def test_user_metadata_passes_through(self, tmp_path):
+        """User-defined metadata from campaign.yaml appears in the enriched copy."""
+        campaign_path = tmp_path / "campaign.yaml"
+        campaign_path.write_text(yaml.safe_dump(self.CAMPAIGN_WITH_META))
+
+        work_dir = setup_work_dir(
+            "test-run", repo_path=None,
+            campaign_path=campaign_path, campaign=self.CAMPAIGN_WITH_META,
+        )
+
+        enriched = yaml.safe_load((work_dir / "campaign.yaml").read_text())
+        assert enriched["metadata"]["tags"] == ["prefix-caching", "ttft"]
+        assert enriched["metadata"]["goal"] == "Determine prefix ratio effect on TTFT"
+
+    def test_enriched_copy_not_overwritten_on_resume(self, tmp_path):
+        """Re-calling setup_work_dir does not clobber the enriched campaign.yaml."""
+        campaign_path = tmp_path / "campaign.yaml"
+        campaign_path.write_text(yaml.safe_dump(self.CAMPAIGN_WITH_META))
+
+        work_dir = setup_work_dir(
+            "test-run", repo_path=None,
+            campaign_path=campaign_path, campaign=self.CAMPAIGN_WITH_META,
+        )
+
+        # Modify the enriched file to prove it's not overwritten
+        enriched_path = work_dir / "campaign.yaml"
+        enriched = yaml.safe_load(enriched_path.read_text())
+        enriched["runtime"]["marker"] = "original"
+        enriched_path.write_text(yaml.safe_dump(enriched))
+
+        # Call setup_work_dir again (simulating resume)
+        setup_work_dir(
+            "test-run", repo_path=None,
+            campaign_path=campaign_path, campaign=self.CAMPAIGN_WITH_META,
+        )
+
+        reloaded = yaml.safe_load(enriched_path.read_text())
+        assert reloaded["runtime"]["marker"] == "original"
+
+    def test_runtime_meta_tolerates_no_git(self, tmp_path):
+        """_capture_runtime_meta returns nulls gracefully when git is unavailable."""
+        with patch("orchestrator.iteration.subprocess.check_output", side_effect=FileNotFoundError):
+            meta = _capture_runtime_meta(str(tmp_path))
+
+        assert meta["target_repo"] is None
+        assert meta["target_commit"] is None
+        # nous_version may still be set via importlib.metadata
+        assert "started_at" in meta
+
+    def test_runtime_meta_captures_target_commit_from_git_repo(self, tmp_path):
+        """_capture_runtime_meta captures target_commit from a real git repo."""
+        import subprocess
+        repo = tmp_path / "target"
+        repo.mkdir()
+        subprocess.run(["git", "init"], cwd=repo, capture_output=True, check=True)
+        subprocess.run(["git", "config", "user.email", "test@test.com"], cwd=repo, capture_output=True)
+        subprocess.run(["git", "config", "user.name", "Test"], cwd=repo, capture_output=True)
+        (repo / "f.txt").write_text("x")
+        subprocess.run(["git", "add", "."], cwd=repo, capture_output=True, check=True)
+        subprocess.run(["git", "commit", "-m", "init"], cwd=repo, capture_output=True, check=True)
+
+        meta = _capture_runtime_meta(str(repo))
+
+        assert meta["target_commit"] is not None
+        assert len(meta["target_commit"]) == 40  # full SHA
+        # No remote configured, so target_repo should be None
+        assert meta["target_repo"] is None
+
+    def test_no_enriched_copy_without_campaign_path(self, tmp_path, monkeypatch):
+        """If campaign_path is not provided, no enriched copy is written."""
+        monkeypatch.chdir(tmp_path)
+        work_dir = setup_work_dir("test-run", repo_path=None)
+        assert not (work_dir / "campaign.yaml").exists()
+
+    @pytest.mark.parametrize("remote,expected", [
+        ("git@github.com:org/repo.git", "org/repo"),
+        ("git@github.com:org/repo", "org/repo"),
+        ("https://github.com/org/repo.git", "org/repo"),
+        ("https://github.com/org/repo", "org/repo"),
+        ("ssh://git@github.com/org/repo.git", "org/repo"),
+        ("https://gitlab.com/org/repo.git", "https://gitlab.com/org/repo.git"),
+        ("git@gitlab.com:org/repo.git", "git@gitlab.com:org/repo.git"),
+    ])
+    def test_remote_url_parsing(self, remote, expected, monkeypatch):
+        """_capture_runtime_meta correctly parses various remote URL formats."""
+        def fake_check_output(cmd, **kwargs):
+            if "rev-parse" in cmd:
+                return "a" * 40 + "\n"
+            if "get-url" in cmd:
+                return remote + "\n"
+            raise subprocess.CalledProcessError(1, cmd)
+
+        import subprocess as real_subprocess
+        monkeypatch.setattr("orchestrator.iteration.subprocess.check_output", fake_check_output)
+        meta = _capture_runtime_meta("/fake/repo")
+        assert meta["target_repo"] == expected
+
+    def test_nous_version_git_sha_fallback(self, monkeypatch):
+        """When importlib.metadata fails, nous_version falls back to git SHA."""
+        fake_sha = "b" * 40
+
+        monkeypatch.setattr(
+            "orchestrator.iteration.importlib_metadata.version",
+            lambda _: (_ for _ in ()).throw(importlib_metadata.PackageNotFoundError()),
+        )
+
+        def fake_check_output(cmd, **kwargs):
+            if "rev-parse" in cmd:
+                return fake_sha + "\n"
+            raise subprocess.CalledProcessError(1, cmd)
+
+        monkeypatch.setattr("orchestrator.iteration.subprocess.check_output", fake_check_output)
+        meta = _capture_runtime_meta(None)
+        assert meta["nous_version"] == fake_sha
+
+    def test_enrichment_with_repo_path(self, tmp_path):
+        """Enriched campaign.yaml is written inside .nous/<run_id>/ when repo_path is set."""
+        campaign_path = tmp_path / "campaign.yaml"
+        campaign_path.write_text(yaml.safe_dump(self.CAMPAIGN_WITH_META))
+
+        repo = tmp_path / "target_repo"
+        repo.mkdir()
+
+        work_dir = setup_work_dir(
+            "test-run", repo_path=str(repo),
+            campaign_path=campaign_path, campaign=self.CAMPAIGN_WITH_META,
+        )
+
+        assert work_dir == repo / ".nous" / "test-run"
+        enriched_path = work_dir / "campaign.yaml"
+        assert enriched_path.exists()
+        enriched = yaml.safe_load(enriched_path.read_text())
+        assert "runtime" in enriched
+        assert enriched["metadata"]["tags"] == ["prefix-caching", "ttft"]