Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions orchestrator/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ def _render_schema_markdown(schema: dict, *, artifact: str) -> str:
lines.append("- `nous create-campaign --to ./campaign.yaml` — scaffold a heavily-commented starting point.")
lines.append("- `nous run campaign.yaml` — run a campaign (default `--agent sdk`).")
lines.append("- `nous run campaign.yaml --bundle ./bundle.yaml` — skip DESIGN with a pre-authored bundle (#188).")
lines.append("- `nous stop <target>` — ask a running campaign to halt at the next iteration boundary.")
lines.append("- `nous stop <target>` — ask a running campaign to halt at the next phase boundary (#198).")
lines.append("- `nous status --watch <target>` — live progress, including a STUCK marker after 5 min of silence.")
return "\n".join(lines)

Expand Down Expand Up @@ -879,7 +879,17 @@ def main():
p_stop = subparsers.add_parser(
"stop",
help="Ask a running campaign to halt cleanly at the next "
"iteration boundary by writing a STOP sentinel.",
"phase boundary by writing a STOP sentinel (#198: honoured "
"at DESIGN / HUMAN_DESIGN_GATE / EXECUTE_ANALYZE / "
"HUMAN_FINDINGS_GATE transitions, not just between iterations).",
description=(
"Write a STOP sentinel that the running campaign honours at "
"the next phase boundary (#198). Phase boundaries are DESIGN, "
"HUMAN_DESIGN_GATE, EXECUTE_ANALYZE, and HUMAN_FINDINGS_GATE — "
"so the operator can halt cleanly without waiting for the "
"next iteration. Mid-phase interruption (a wedged BLIS "
"subprocess, a stuck SDK turn) is still SIGINT's job."
),
)
p_stop.add_argument(
"target",
Expand Down
15 changes: 15 additions & 0 deletions orchestrator/schemas/campaign.schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,21 @@ properties:
Additional filenames allowed at the iter root (in addition to
the global whitelist in orchestrator/validate.py). Names only,
not paths — files in subdirectories are always allowed.
required_iter_root:
type: array
items:
type: string
minLength: 1
uniqueItems: true
description: >
Filenames that MUST exist at the iter root after EXECUTE_ANALYZE
completes (#199 v2). validate_execution emits a "required
iter-root file missing: X" error for each missing entry,
turning silent omissions into structured failures (sister to
#187's DesignIncompleteError and #200's
ExecuteAnalyzeIncompleteError). Required files are implicitly
allowed at the iter root — no need to also list them in
iter_root_extensions. Names only, not paths.

max_turns:
type: object
Expand Down
54 changes: 48 additions & 6 deletions orchestrator/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,40 @@ def _campaign_iter_root_extensions(campaign: dict | None) -> frozenset[str]:
return frozenset(str(x) for x in extensions if x)


def _campaign_required_iter_root(campaign: dict | None) -> frozenset[str]:
"""Read ``campaign.validation.required_iter_root`` (#199 v2).

Files declared here are treated as MUST-EXIST at validate_execution
time. Required ⊆ allowed: a required file is also implicitly an
iter-root extension, so campaigns don't need to list it twice.
"""
if not campaign:
return frozenset()
validation = campaign.get("validation") or {}
required = validation.get("required_iter_root") or []
return frozenset(str(x) for x in required if x)


def _check_required_iter_root(
iter_dir: Path, required: set[str] | frozenset[str],
) -> list[str]:
"""Return one error per required iter-root file that's missing.

#199 v2: campaign.validation.required_iter_root declares files the
campaign promises to produce by EXECUTE_ANALYZE end. Missing entries
are surfaced with a clear "required iter-root file missing: X"
message so the operator (or a future incomplete-iteration diagnostic
in the spirit of #187 / #200) sees what the campaign committed to.
"""
errors: list[str] = []
if not iter_dir.is_dir():
return errors
for name in sorted(required):
if not (iter_dir / name).exists():
errors.append(f"required iter-root file missing: {name}")
return errors


def _validate_ground_truth_independence(bundle: dict) -> list[str]:
"""Cross-field check that the ground truth can disagree with the detector (issue #85).

Expand Down Expand Up @@ -279,9 +313,13 @@ def validate_design(iter_dir: Path, campaign: dict | None = None) -> dict:
elif handoff_path.stat().st_size == 0:
errors.append("handoff_snapshot.md is empty")

errors.extend(
_check_unexpected_files(iter_dir, _campaign_iter_root_extensions(campaign))
)
# #199 v2: required ⊆ allowed at design time too. We don't enforce
# required-presence here (most required files are written during
# EXECUTE, not DESIGN), but if the campaign agent does write one
# during DESIGN, the unexpected-file check must not reject it.
extensions = _campaign_iter_root_extensions(campaign)
required = _campaign_required_iter_root(campaign)
errors.extend(_check_unexpected_files(iter_dir, extensions | required))

if errors:
return {"status": "fail", "errors": errors}
Expand Down Expand Up @@ -399,9 +437,13 @@ def validate_execution(iter_dir: Path, campaign: dict | None = None) -> dict:
except KeyError as exc:
errors.append(f"bundle.yaml arm missing required field: {exc}")

errors.extend(
_check_unexpected_files(iter_dir, _campaign_iter_root_extensions(campaign))
)
# #199 v2: required ⊆ allowed (a required file is also implicitly
# allowed at iter-root, so campaigns don't have to declare it
# twice). Merge before the unexpected-file check.
extensions = _campaign_iter_root_extensions(campaign)
required = _campaign_required_iter_root(campaign)
errors.extend(_check_unexpected_files(iter_dir, extensions | required))
errors.extend(_check_required_iter_root(iter_dir, required))

if errors:
return {"status": "fail", "errors": errors}
Expand Down
58 changes: 58 additions & 0 deletions tests/test_nous_stop.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,3 +395,61 @@ def test_sentinel_after_design_halts_before_design_gate(
campaign, work_dir, iteration=1, agent="inline",
auto_approve=True,
)


# ─── Documentation parity (#198) ─────────────────────────────────────────
#
# The behavior of `nous stop` is honoured at every phase boundary
# post-#198 (see TestEnterPhaseHonorsSentinel above). User-facing
# documentation must reflect that — the legacy "iteration boundary"
# wording lags the post-#198 reality and misleads operators.


class TestStopDocumentationReflectsPhaseAwareness:
"""#198: every user-facing surface must describe `nous stop` as
halting at a phase boundary, not an iteration boundary."""

def test_argparse_help_says_phase_boundary(
self, monkeypatch, capsys,
) -> None:
"""`nous stop --help` (the argparse text) must announce phase
boundary, not iteration boundary."""
import sys
from orchestrator.cli import main

monkeypatch.setattr(sys, "argv", ["nous", "stop", "--help"])
with pytest.raises(SystemExit):
main() # --help triggers SystemExit(0)
out = capsys.readouterr().out
# argparse line-wraps long descriptions, so normalize whitespace
# before substring-matching.
normalized = " ".join(out.split())
assert "phase boundary" in normalized, (
f"#198: nous stop help must say 'phase boundary'; got: "
f"{normalized!r}"
)
assert "iteration boundary" not in normalized, (
f"#198: legacy 'iteration boundary' phrasing leaked into "
f"argparse help: {normalized!r}"
)

def test_schema_doc_render_says_phase_boundary(self, capsys) -> None:
"""`nous schema` markdown render references `nous stop` in its
See-also section. That string must announce phase boundary too."""
from orchestrator.cli import _cmd_schema

ns = argparse.Namespace(artifact="campaign", format="md")
_cmd_schema(ns)
out = capsys.readouterr().out
stop_lines = [ln for ln in out.splitlines() if "`nous stop" in ln]
assert stop_lines, (
"schema doc render should describe `nous stop` in See-also"
)
joined = "\n".join(stop_lines)
assert "phase boundary" in joined, (
f"#198: schema doc must say 'phase boundary'; got: {joined!r}"
)
assert "iteration boundary" not in joined, (
f"#198: legacy 'iteration boundary' leaked into schema doc: "
f"{joined!r}"
)
195 changes: 195 additions & 0 deletions tests/test_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,3 +453,198 @@ def test_subdirectories_ignored(self, tmp_path: Path) -> None:
(d / "results").mkdir()
(d / "inputs").mkdir()
assert _check_unexpected_files(d) == []


# ─── #199 v2 — required_iter_root ────────────────────────────────────────
#
# Sister field to iter_root_extensions: campaigns can declare files that
# MUST exist at the iter root after EXECUTE_ANALYZE completes. Missing
# entries fail validate_execution. Mirrors the #187 / #200 pattern of
# turning silent omissions into structured errors.


class TestRequiredIterRoot:
"""#199 v2: campaign.validation.required_iter_root makes campaign-
specific iter-root files mandatory at validate_execution time. The
validator fails with a clear ``required iter-root file missing: X``
error so operators see what the campaign promised to produce."""

def test_missing_required_file_fails_execution(
self, tmp_path: Path,
) -> None:
d = tmp_path / "iter-1"
_setup_execution(d)
# No analysis_summary.json on disk.
campaign = {
"validation": {
"required_iter_root": ["analysis_summary.json"],
# Required files are also implicitly allowed (so a
# campaign doesn't have to list them in both blocks).
},
}
result = validate_execution(d, campaign=campaign)
assert result["status"] == "fail"
assert any(
"required iter-root file missing" in e
and "analysis_summary.json" in e
for e in result["errors"]
), result["errors"]

def test_present_required_file_passes(self, tmp_path: Path) -> None:
d = tmp_path / "iter-1"
_setup_execution(d)
(d / "analysis_summary.json").write_text("{}")
campaign = {
"validation": {
"required_iter_root": ["analysis_summary.json"],
},
}
result = validate_execution(d, campaign=campaign)
assert result["status"] == "pass", result

def test_required_file_implicitly_allowed_at_iter_root(
self, tmp_path: Path,
) -> None:
"""A required file should NOT need to be listed in
iter_root_extensions to avoid the unexpected-file rejection.
Required ⊆ allowed — the validator merges them automatically."""
d = tmp_path / "iter-1"
_setup_execution(d)
(d / "probe_report.md").write_text("# Probe report\n")
campaign = {
"validation": {
"required_iter_root": ["probe_report.md"],
# Note: iter_root_extensions intentionally omitted.
},
}
result = validate_execution(d, campaign=campaign)
assert result["status"] == "pass", result
# Negative: the unexpected-file error must not have fired.
if result.get("errors"):
assert not any(
"unexpected file" in e and "probe_report.md" in e
for e in result["errors"]
), result["errors"]

def test_missing_required_listed_alongside_other_errors(
self, tmp_path: Path,
) -> None:
"""Required-file check runs even when other errors exist —
validation collects all errors so the operator sees the full
picture in one pass."""
d = tmp_path / "iter-1"
_setup_execution(d)
(d / "findings.json").unlink() # primary error
campaign = {
"validation": {
"required_iter_root": ["analysis_summary.json"],
},
}
result = validate_execution(d, campaign=campaign)
assert result["status"] == "fail"
joined = " | ".join(result["errors"])
assert "findings.json" in joined
assert "analysis_summary.json" in joined

def test_no_required_block_keeps_default_behavior(
self, tmp_path: Path,
) -> None:
"""Campaigns without required_iter_root behave exactly as
before this PR (backward-compat)."""
d = tmp_path / "iter-1"
_setup_execution(d)
# No analysis_summary.json on disk; no required_iter_root either.
result = validate_execution(d, campaign={})
assert result["status"] == "pass", result

def test_required_combined_with_extensions(
self, tmp_path: Path,
) -> None:
"""A campaign declares both required and optional iter-root
files; only the required one is enforced for presence."""
d = tmp_path / "iter-1"
_setup_execution(d)
(d / "analysis_summary.json").write_text("{}") # required, present
# manifest.json is "extensions" (optional) and absent — no error.
campaign = {
"validation": {
"required_iter_root": ["analysis_summary.json"],
"iter_root_extensions": ["manifest.json"],
},
}
result = validate_execution(d, campaign=campaign)
assert result["status"] == "pass", result

def test_required_file_at_design_time_is_allowed_not_required(
self, tmp_path: Path,
) -> None:
"""validate_design merges required ⊆ allowed (so a campaign that
writes a required file during DESIGN doesn't get rejected by the
unexpected-file check), but does NOT enforce required-presence
at design time — most required artifacts are EXECUTE-phase
outputs (e.g. probe_report.md is written during EXECUTE for
paper-* campaigns).

Pins both invariants:
(a) present-during-design → no "unexpected file" rejection
(b) absent-during-design → still passes design (no required-presence enforcement)
"""
campaign = {
"validation": {"required_iter_root": ["probe_report.md"]},
}

# (a) Present during DESIGN — required ⊆ allowed must let it through.
d1 = tmp_path / "iter-1"
_setup_design(d1)
(d1 / "probe_report.md").write_text("# Probe report\n")
result = validate_design(d1, campaign=campaign)
assert result["status"] == "pass", result

# (b) Absent during DESIGN — must still pass (only validate_execution
# enforces required-presence).
d2 = tmp_path / "iter-2"
_setup_design(d2)
result = validate_design(d2, campaign=campaign)
assert result["status"] == "pass", result

def test_required_overlapping_known_root_file_still_enforced(
self, tmp_path: Path,
) -> None:
"""A campaign may declare a file already in _KNOWN_ROOT_FILES
(e.g. findings.json) as required. The required-presence check
must still fire when the file is missing, even though the
unexpected-file check would never have flagged it.
"""
d = tmp_path / "iter-1"
_setup_execution(d)
(d / "findings.json").unlink() # in _KNOWN_ROOT_FILES, now absent.
campaign = {
"validation": {"required_iter_root": ["findings.json"]},
}
result = validate_execution(d, campaign=campaign)
assert result["status"] == "fail"
assert any(
"required iter-root file missing" in e and "findings.json" in e
for e in result["errors"]
), result["errors"]

def test_schema_accepts_required_iter_root(self) -> None:
"""The campaign.schema.yaml must accept the new field — without
a schema entry, jsonschema validation in nous run would reject
the campaign before the validator ever sees it."""
from orchestrator.validate import _load_yaml_schema
import jsonschema

schema = _load_yaml_schema("campaign.schema.yaml")
campaign = {
"research_question": "Does X work?",
"run_id": "demo",
"target_system": {"name": "T", "description": "d"},
"prompts": {"methodology_layer": "p"},
"validation": {
"required_iter_root": ["probe_report.md"],
"iter_root_extensions": ["analysis_summary.json"],
},
}
# Should not raise.
jsonschema.validate(campaign, schema)
Loading