diff --git a/orchestrator/cli.py b/orchestrator/cli.py index 9db68e4..bd10a1d 100644 --- a/orchestrator/cli.py +++ b/orchestrator/cli.py @@ -413,7 +413,7 @@ def _render_schema_markdown(schema: dict, *, artifact: str) -> str: lines.append("- `nous create-campaign --to ./campaign.yaml` — scaffold a heavily-commented starting point.") lines.append("- `nous run campaign.yaml` — run a campaign (default `--agent sdk`).") lines.append("- `nous run campaign.yaml --bundle ./bundle.yaml` — skip DESIGN with a pre-authored bundle (#188).") - lines.append("- `nous stop ` — ask a running campaign to halt at the next iteration boundary.") + lines.append("- `nous stop ` — ask a running campaign to halt at the next phase boundary (#198).") lines.append("- `nous status --watch ` — live progress, including a STUCK marker after 5 min of silence.") return "\n".join(lines) @@ -879,7 +879,17 @@ def main(): p_stop = subparsers.add_parser( "stop", help="Ask a running campaign to halt cleanly at the next " - "iteration boundary by writing a STOP sentinel.", + "phase boundary by writing a STOP sentinel (#198: honoured " + "at DESIGN / HUMAN_DESIGN_GATE / EXECUTE_ANALYZE / " + "HUMAN_FINDINGS_GATE transitions, not just between iterations).", + description=( + "Write a STOP sentinel that the running campaign honours at " + "the next phase boundary (#198). Phase boundaries are DESIGN, " + "HUMAN_DESIGN_GATE, EXECUTE_ANALYZE, and HUMAN_FINDINGS_GATE — " + "so the operator can halt cleanly without waiting for the " + "next iteration. Mid-phase interruption (a wedged BLIS " + "subprocess, a stuck SDK turn) is still SIGINT's job." + ), ) p_stop.add_argument( "target", diff --git a/orchestrator/schemas/campaign.schema.yaml b/orchestrator/schemas/campaign.schema.yaml index 9c4a5a8..923224a 100644 --- a/orchestrator/schemas/campaign.schema.yaml +++ b/orchestrator/schemas/campaign.schema.yaml @@ -164,6 +164,21 @@ properties: Additional filenames allowed at the iter root (in addition to the global whitelist in orchestrator/validate.py). Names only, not paths — files in subdirectories are always allowed. + required_iter_root: + type: array + items: + type: string + minLength: 1 + uniqueItems: true + description: > + Filenames that MUST exist at the iter root after EXECUTE_ANALYZE + completes (#199 v2). validate_execution emits a "required + iter-root file missing: X" error for each missing entry, + turning silent omissions into structured failures (sister to + #187's DesignIncompleteError and #200's + ExecuteAnalyzeIncompleteError). Required files are implicitly + allowed at the iter root — no need to also list them in + iter_root_extensions. Names only, not paths. max_turns: type: object diff --git a/orchestrator/validate.py b/orchestrator/validate.py index 33ea5ae..46f80d3 100644 --- a/orchestrator/validate.py +++ b/orchestrator/validate.py @@ -82,6 +82,40 @@ def _campaign_iter_root_extensions(campaign: dict | None) -> frozenset[str]: return frozenset(str(x) for x in extensions if x) +def _campaign_required_iter_root(campaign: dict | None) -> frozenset[str]: + """Read ``campaign.validation.required_iter_root`` (#199 v2). + + Files declared here are treated as MUST-EXIST at validate_execution + time. Required ⊆ allowed: a required file is also implicitly an + iter-root extension, so campaigns don't need to list it twice. + """ + if not campaign: + return frozenset() + validation = campaign.get("validation") or {} + required = validation.get("required_iter_root") or [] + return frozenset(str(x) for x in required if x) + + +def _check_required_iter_root( + iter_dir: Path, required: set[str] | frozenset[str], +) -> list[str]: + """Return one error per required iter-root file that's missing. + + #199 v2: campaign.validation.required_iter_root declares files the + campaign promises to produce by EXECUTE_ANALYZE end. Missing entries + are surfaced with a clear "required iter-root file missing: X" + message so the operator (or a future incomplete-iteration diagnostic + in the spirit of #187 / #200) sees what the campaign committed to. + """ + errors: list[str] = [] + if not iter_dir.is_dir(): + return errors + for name in sorted(required): + if not (iter_dir / name).exists(): + errors.append(f"required iter-root file missing: {name}") + return errors + + def _validate_ground_truth_independence(bundle: dict) -> list[str]: """Cross-field check that the ground truth can disagree with the detector (issue #85). @@ -279,9 +313,13 @@ def validate_design(iter_dir: Path, campaign: dict | None = None) -> dict: elif handoff_path.stat().st_size == 0: errors.append("handoff_snapshot.md is empty") - errors.extend( - _check_unexpected_files(iter_dir, _campaign_iter_root_extensions(campaign)) - ) + # #199 v2: required ⊆ allowed at design time too. We don't enforce + # required-presence here (most required files are written during + # EXECUTE, not DESIGN), but if the campaign agent does write one + # during DESIGN, the unexpected-file check must not reject it. + extensions = _campaign_iter_root_extensions(campaign) + required = _campaign_required_iter_root(campaign) + errors.extend(_check_unexpected_files(iter_dir, extensions | required)) if errors: return {"status": "fail", "errors": errors} @@ -399,9 +437,13 @@ def validate_execution(iter_dir: Path, campaign: dict | None = None) -> dict: except KeyError as exc: errors.append(f"bundle.yaml arm missing required field: {exc}") - errors.extend( - _check_unexpected_files(iter_dir, _campaign_iter_root_extensions(campaign)) - ) + # #199 v2: required ⊆ allowed (a required file is also implicitly + # allowed at iter-root, so campaigns don't have to declare it + # twice). Merge before the unexpected-file check. + extensions = _campaign_iter_root_extensions(campaign) + required = _campaign_required_iter_root(campaign) + errors.extend(_check_unexpected_files(iter_dir, extensions | required)) + errors.extend(_check_required_iter_root(iter_dir, required)) if errors: return {"status": "fail", "errors": errors} diff --git a/tests/test_nous_stop.py b/tests/test_nous_stop.py index ae90f0c..70dd4c1 100644 --- a/tests/test_nous_stop.py +++ b/tests/test_nous_stop.py @@ -395,3 +395,61 @@ def test_sentinel_after_design_halts_before_design_gate( campaign, work_dir, iteration=1, agent="inline", auto_approve=True, ) + + +# ─── Documentation parity (#198) ───────────────────────────────────────── +# +# The behavior of `nous stop` is honoured at every phase boundary +# post-#198 (see TestEnterPhaseHonorsSentinel above). User-facing +# documentation must reflect that — the legacy "iteration boundary" +# wording lags the post-#198 reality and misleads operators. + + +class TestStopDocumentationReflectsPhaseAwareness: + """#198: every user-facing surface must describe `nous stop` as + halting at a phase boundary, not an iteration boundary.""" + + def test_argparse_help_says_phase_boundary( + self, monkeypatch, capsys, + ) -> None: + """`nous stop --help` (the argparse text) must announce phase + boundary, not iteration boundary.""" + import sys + from orchestrator.cli import main + + monkeypatch.setattr(sys, "argv", ["nous", "stop", "--help"]) + with pytest.raises(SystemExit): + main() # --help triggers SystemExit(0) + out = capsys.readouterr().out + # argparse line-wraps long descriptions, so normalize whitespace + # before substring-matching. + normalized = " ".join(out.split()) + assert "phase boundary" in normalized, ( + f"#198: nous stop help must say 'phase boundary'; got: " + f"{normalized!r}" + ) + assert "iteration boundary" not in normalized, ( + f"#198: legacy 'iteration boundary' phrasing leaked into " + f"argparse help: {normalized!r}" + ) + + def test_schema_doc_render_says_phase_boundary(self, capsys) -> None: + """`nous schema` markdown render references `nous stop` in its + See-also section. That string must announce phase boundary too.""" + from orchestrator.cli import _cmd_schema + + ns = argparse.Namespace(artifact="campaign", format="md") + _cmd_schema(ns) + out = capsys.readouterr().out + stop_lines = [ln for ln in out.splitlines() if "`nous stop" in ln] + assert stop_lines, ( + "schema doc render should describe `nous stop` in See-also" + ) + joined = "\n".join(stop_lines) + assert "phase boundary" in joined, ( + f"#198: schema doc must say 'phase boundary'; got: {joined!r}" + ) + assert "iteration boundary" not in joined, ( + f"#198: legacy 'iteration boundary' leaked into schema doc: " + f"{joined!r}" + ) diff --git a/tests/test_validate.py b/tests/test_validate.py index 000c984..2f7cae1 100644 --- a/tests/test_validate.py +++ b/tests/test_validate.py @@ -453,3 +453,198 @@ def test_subdirectories_ignored(self, tmp_path: Path) -> None: (d / "results").mkdir() (d / "inputs").mkdir() assert _check_unexpected_files(d) == [] + + +# ─── #199 v2 — required_iter_root ──────────────────────────────────────── +# +# Sister field to iter_root_extensions: campaigns can declare files that +# MUST exist at the iter root after EXECUTE_ANALYZE completes. Missing +# entries fail validate_execution. Mirrors the #187 / #200 pattern of +# turning silent omissions into structured errors. + + +class TestRequiredIterRoot: + """#199 v2: campaign.validation.required_iter_root makes campaign- + specific iter-root files mandatory at validate_execution time. The + validator fails with a clear ``required iter-root file missing: X`` + error so operators see what the campaign promised to produce.""" + + def test_missing_required_file_fails_execution( + self, tmp_path: Path, + ) -> None: + d = tmp_path / "iter-1" + _setup_execution(d) + # No analysis_summary.json on disk. + campaign = { + "validation": { + "required_iter_root": ["analysis_summary.json"], + # Required files are also implicitly allowed (so a + # campaign doesn't have to list them in both blocks). + }, + } + result = validate_execution(d, campaign=campaign) + assert result["status"] == "fail" + assert any( + "required iter-root file missing" in e + and "analysis_summary.json" in e + for e in result["errors"] + ), result["errors"] + + def test_present_required_file_passes(self, tmp_path: Path) -> None: + d = tmp_path / "iter-1" + _setup_execution(d) + (d / "analysis_summary.json").write_text("{}") + campaign = { + "validation": { + "required_iter_root": ["analysis_summary.json"], + }, + } + result = validate_execution(d, campaign=campaign) + assert result["status"] == "pass", result + + def test_required_file_implicitly_allowed_at_iter_root( + self, tmp_path: Path, + ) -> None: + """A required file should NOT need to be listed in + iter_root_extensions to avoid the unexpected-file rejection. + Required ⊆ allowed — the validator merges them automatically.""" + d = tmp_path / "iter-1" + _setup_execution(d) + (d / "probe_report.md").write_text("# Probe report\n") + campaign = { + "validation": { + "required_iter_root": ["probe_report.md"], + # Note: iter_root_extensions intentionally omitted. + }, + } + result = validate_execution(d, campaign=campaign) + assert result["status"] == "pass", result + # Negative: the unexpected-file error must not have fired. + if result.get("errors"): + assert not any( + "unexpected file" in e and "probe_report.md" in e + for e in result["errors"] + ), result["errors"] + + def test_missing_required_listed_alongside_other_errors( + self, tmp_path: Path, + ) -> None: + """Required-file check runs even when other errors exist — + validation collects all errors so the operator sees the full + picture in one pass.""" + d = tmp_path / "iter-1" + _setup_execution(d) + (d / "findings.json").unlink() # primary error + campaign = { + "validation": { + "required_iter_root": ["analysis_summary.json"], + }, + } + result = validate_execution(d, campaign=campaign) + assert result["status"] == "fail" + joined = " | ".join(result["errors"]) + assert "findings.json" in joined + assert "analysis_summary.json" in joined + + def test_no_required_block_keeps_default_behavior( + self, tmp_path: Path, + ) -> None: + """Campaigns without required_iter_root behave exactly as + before this PR (backward-compat).""" + d = tmp_path / "iter-1" + _setup_execution(d) + # No analysis_summary.json on disk; no required_iter_root either. + result = validate_execution(d, campaign={}) + assert result["status"] == "pass", result + + def test_required_combined_with_extensions( + self, tmp_path: Path, + ) -> None: + """A campaign declares both required and optional iter-root + files; only the required one is enforced for presence.""" + d = tmp_path / "iter-1" + _setup_execution(d) + (d / "analysis_summary.json").write_text("{}") # required, present + # manifest.json is "extensions" (optional) and absent — no error. + campaign = { + "validation": { + "required_iter_root": ["analysis_summary.json"], + "iter_root_extensions": ["manifest.json"], + }, + } + result = validate_execution(d, campaign=campaign) + assert result["status"] == "pass", result + + def test_required_file_at_design_time_is_allowed_not_required( + self, tmp_path: Path, + ) -> None: + """validate_design merges required ⊆ allowed (so a campaign that + writes a required file during DESIGN doesn't get rejected by the + unexpected-file check), but does NOT enforce required-presence + at design time — most required artifacts are EXECUTE-phase + outputs (e.g. probe_report.md is written during EXECUTE for + paper-* campaigns). + + Pins both invariants: + (a) present-during-design → no "unexpected file" rejection + (b) absent-during-design → still passes design (no required-presence enforcement) + """ + campaign = { + "validation": {"required_iter_root": ["probe_report.md"]}, + } + + # (a) Present during DESIGN — required ⊆ allowed must let it through. + d1 = tmp_path / "iter-1" + _setup_design(d1) + (d1 / "probe_report.md").write_text("# Probe report\n") + result = validate_design(d1, campaign=campaign) + assert result["status"] == "pass", result + + # (b) Absent during DESIGN — must still pass (only validate_execution + # enforces required-presence). + d2 = tmp_path / "iter-2" + _setup_design(d2) + result = validate_design(d2, campaign=campaign) + assert result["status"] == "pass", result + + def test_required_overlapping_known_root_file_still_enforced( + self, tmp_path: Path, + ) -> None: + """A campaign may declare a file already in _KNOWN_ROOT_FILES + (e.g. findings.json) as required. The required-presence check + must still fire when the file is missing, even though the + unexpected-file check would never have flagged it. + """ + d = tmp_path / "iter-1" + _setup_execution(d) + (d / "findings.json").unlink() # in _KNOWN_ROOT_FILES, now absent. + campaign = { + "validation": {"required_iter_root": ["findings.json"]}, + } + result = validate_execution(d, campaign=campaign) + assert result["status"] == "fail" + assert any( + "required iter-root file missing" in e and "findings.json" in e + for e in result["errors"] + ), result["errors"] + + def test_schema_accepts_required_iter_root(self) -> None: + """The campaign.schema.yaml must accept the new field — without + a schema entry, jsonschema validation in nous run would reject + the campaign before the validator ever sees it.""" + from orchestrator.validate import _load_yaml_schema + import jsonschema + + schema = _load_yaml_schema("campaign.schema.yaml") + campaign = { + "research_question": "Does X work?", + "run_id": "demo", + "target_system": {"name": "T", "description": "d"}, + "prompts": {"methodology_layer": "p"}, + "validation": { + "required_iter_root": ["probe_report.md"], + "iter_root_extensions": ["analysis_summary.json"], + }, + } + # Should not raise. + jsonschema.validate(campaign, schema)