|
9 | 9 | sys.path.insert(0, str(Path(__file__).parent)) |
10 | 10 |
|
11 | 11 | from eval_harness import EvalHarness # noqa: E402 |
12 | | -from prompt_eval_helpers import assert_human_review_preamble, prompts_with_human_review # noqa: E402 |
| 12 | +from prompt_eval_helpers import ( # noqa: E402 |
| 13 | + assert_human_review_preamble, |
| 14 | + has_human_review_preamble, |
| 15 | + prompts_with_human_review, |
| 16 | +) |
13 | 17 |
|
14 | 18 | CORE_DIR = Path(__file__).parent.parent.parent / "00-core" |
15 | 19 | REPO_ROOT = str(Path(__file__).parent.parent.parent.parent) |
|
23 | 27 | ) |
24 | 28 |
|
25 | 29 | CORE_PROMPTS = tuple(sorted(CORE_DIR.glob("*.md"))) |
| 30 | +CORE_HUMAN_REVIEW_REQUIRED = frozenset({ |
| 31 | + "core-full.md", |
| 32 | + "core-minimal.md", |
| 33 | + "core-short.md", |
| 34 | + "custom-instruction-en.md", |
| 35 | + "custom-instruction-zh.md", |
| 36 | + "important-task-full.md", |
| 37 | +}) |
| 38 | +CORE_HUMAN_REVIEW_EXEMPT = frozenset({ |
| 39 | + "daily-minimal.md", |
| 40 | + "global-controller.md", |
| 41 | + "master-prompt.md", |
| 42 | +}) |
| 43 | + |
26 | 44 | CORE_PROMPTS_WITH_HUMAN_REVIEW = prompts_with_human_review(CORE_PROMPTS) |
27 | 45 |
|
28 | 46 |
|
@@ -76,3 +94,24 @@ def test_core_prompts_have_primary_workflow_surfaces_line(): |
76 | 94 | def test_core_prompt_has_human_review_section(prompt_path: Path): |
77 | 95 | """Risk-bearing 00-core prompts declare Human Review escalation outside zh-TW templates.""" |
78 | 96 | assert_human_review_preamble(prompt_path) |
| 97 | + |
| 98 | + |
| 99 | +def test_core_human_review_required_set_matches_detection(): |
| 100 | + """Frozen required set must match prompts that declare ## Human Review in preambles.""" |
| 101 | + detected = {p.name for p in CORE_PROMPTS_WITH_HUMAN_REVIEW} |
| 102 | + assert detected == CORE_HUMAN_REVIEW_REQUIRED |
| 103 | + |
| 104 | + |
| 105 | +def test_core_human_review_exempt_prompts_have_no_preamble_section(): |
| 106 | + """L1 opener prompts keep Human Review cues in fenced templates only.""" |
| 107 | + for name in CORE_HUMAN_REVIEW_EXEMPT: |
| 108 | + assert not has_human_review_preamble(CORE_DIR / name), ( |
| 109 | + f"{name} should not declare ## Human Review in preamble (exempt opener)" |
| 110 | + ) |
| 111 | + |
| 112 | + |
| 113 | +def test_core_human_review_sets_partition_core_prompts(): |
| 114 | + """Required + exempt sets must cover all 00-core prompts without overlap.""" |
| 115 | + all_names = {p.name for p in CORE_PROMPTS} |
| 116 | + assert CORE_HUMAN_REVIEW_REQUIRED | CORE_HUMAN_REVIEW_EXEMPT == all_names |
| 117 | + assert not CORE_HUMAN_REVIEW_REQUIRED & CORE_HUMAN_REVIEW_EXEMPT |
0 commit comments