Skip to content

Commit 10873ce

Browse files
authored
Add deterministic fixture manifest
Add deterministic fixture manifest - Add deterministic fixture manifest for coding-workflow degradation fixtures - Add manifest validation coverage - Document manifest registration requirements - Align layered admissibility artifact with generator output
1 parent 728c127 commit 10873ce

5 files changed

Lines changed: 217 additions & 1 deletion

File tree

artifacts/layered_admissibility_results.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@
6161
"governance_score": 1.0,
6262
"observed_admissible": false,
6363
"operational_score": 1.0,
64-
"overall_admissibility_score": 0.8333333333333334,
64+
"overall_admissibility_score": 0.8333333333333333,
6565
"passed_contracts": [
6666
"no_orphan_tool_calls",
6767
"pre_merge_review"

docs/FIXTURE_TEMPLATE_v1.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,14 @@ Recommended fields:
7878

7979
For positive fixtures, `expected_failures` and `allowed_failures` should be empty arrays.
8080

81+
82+
## Fixture manifest
83+
84+
- All fixture bundles must be listed in `fixtures/manifest.json`.
85+
- The manifest is deterministic and hand-reviewable.
86+
- `fixture_id`, `fixture_version`, `contracts`, `expected_failure_labels`, and `path` must match committed fixture metadata.
87+
- Benchmark artifacts should reference only registered fixtures from the manifest.
88+
8189
## Positive and negative fixtures
8290

8391
- Positive fixtures should pass all must-hold contracts.

docs/benchmarks/layered_admissibility.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
Deterministically compare admissibility outcomes across fixture bundles using ContractValidator and AdmissibilityScorer.
66

7+
All benchmarked fixtures are indexed in `fixtures/manifest.json` and benchmark artifact references should resolve only to registered manifest entries.
8+
79
## Fixture results
810

911
| fixture_id | expected_admissible | observed_admissible | structural_score | relational_score | operational_score | governance_score | overall_admissibility_score | failure_labels |

fixtures/manifest.json

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
{
2+
"manifest_version": "1.0",
3+
"fixtures": [
4+
{
5+
"fixture_id": "coding_workflow_pr_review_v1",
6+
"fixture_version": "1.0.0",
7+
"category": "coding_workflow",
8+
"family": "coding_workflow_pr_review",
9+
"degradation_level": "baseline",
10+
"path": "fixtures/coding_workflow_pr_review_v1",
11+
"expected_admissible": true,
12+
"contracts": [
13+
"no_orphan_tool_calls",
14+
"pre_merge_review",
15+
"recovery_path_available",
16+
"security_causal_block"
17+
],
18+
"expected_failure_labels": []
19+
},
20+
{
21+
"fixture_id": "coding_workflow_pr_review_mild_v1",
22+
"fixture_version": "1.0.0",
23+
"category": "coding_workflow",
24+
"family": "coding_workflow_pr_review",
25+
"degradation_level": "mild",
26+
"path": "fixtures/coding_workflow_pr_review_mild_v1",
27+
"expected_admissible": false,
28+
"contracts": [
29+
"no_orphan_tool_calls",
30+
"pre_merge_review",
31+
"recovery_path_available",
32+
"security_causal_block"
33+
],
34+
"expected_failure_labels": [
35+
"RECOVERY_PATH_INVALID"
36+
]
37+
},
38+
{
39+
"fixture_id": "coding_workflow_pr_review_moderate_v1",
40+
"fixture_version": "1.0.0",
41+
"category": "coding_workflow",
42+
"family": "coding_workflow_pr_review",
43+
"degradation_level": "moderate",
44+
"path": "fixtures/coding_workflow_pr_review_moderate_v1",
45+
"expected_admissible": false,
46+
"contracts": [
47+
"no_orphan_tool_calls",
48+
"pre_merge_review",
49+
"recovery_path_available",
50+
"security_causal_block"
51+
],
52+
"expected_failure_labels": [
53+
"CAUSAL_DEPENDENCY_LOSS",
54+
"RECOVERY_PATH_INVALID"
55+
]
56+
},
57+
{
58+
"fixture_id": "coding_workflow_pr_review_degraded_v1",
59+
"fixture_version": "1.0.0",
60+
"category": "coding_workflow",
61+
"family": "coding_workflow_pr_review",
62+
"degradation_level": "severe",
63+
"path": "fixtures/coding_workflow_pr_review_degraded_v1",
64+
"expected_admissible": false,
65+
"contracts": [
66+
"no_orphan_tool_calls",
67+
"pre_merge_review",
68+
"recovery_path_available",
69+
"security_causal_block"
70+
],
71+
"expected_failure_labels": [
72+
"CAUSAL_DEPENDENCY_LOSS",
73+
"INVARIANT_VIOLATION",
74+
"POLICY_ORDER_BROKEN",
75+
"RECOVERY_PATH_INVALID"
76+
]
77+
}
78+
]
79+
}

tests/test_fixture_manifest.py

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
from __future__ import annotations
2+
3+
import json
4+
from pathlib import Path
5+
6+
ROOT = Path(__file__).resolve().parent.parent
7+
MANIFEST_PATH = ROOT / "fixtures" / "manifest.json"
8+
ALLOWED_DEGRADATION_LEVELS = ["baseline", "mild", "moderate", "severe"]
9+
EXPECTED_FIXTURE_ORDER = [
10+
"coding_workflow_pr_review_v1",
11+
"coding_workflow_pr_review_mild_v1",
12+
"coding_workflow_pr_review_moderate_v1",
13+
"coding_workflow_pr_review_degraded_v1",
14+
]
15+
16+
17+
def _load_json(path: Path) -> dict:
18+
with path.open("r", encoding="utf-8") as f:
19+
return json.load(f)
20+
21+
22+
def _load_manifest() -> dict:
23+
return _load_json(MANIFEST_PATH)
24+
25+
26+
def test_manifest_is_json_serializable_and_sorted() -> None:
27+
manifest = _load_manifest()
28+
json.dumps(manifest, sort_keys=True)
29+
30+
fixture_ids = [entry["fixture_id"] for entry in manifest["fixtures"]]
31+
assert fixture_ids == EXPECTED_FIXTURE_ORDER
32+
33+
34+
def test_manifest_paths_exist() -> None:
35+
manifest = _load_manifest()
36+
required_paths = [
37+
Path("original/trace.json"),
38+
Path("original/state.json"),
39+
Path("original/dependency_graph.json"),
40+
Path("original/contracts"),
41+
Path("reconstructed/trace.json"),
42+
Path("reconstructed/state.json"),
43+
Path("reconstructed/dependency_graph.json"),
44+
Path("expected/admissibility.json"),
45+
Path("expected/failures.json"),
46+
Path("README.md"),
47+
]
48+
49+
for entry in manifest["fixtures"]:
50+
fixture_dir = ROOT / entry["path"]
51+
assert fixture_dir.exists(), f"Missing fixture directory: {fixture_dir}"
52+
for rel_path in required_paths:
53+
assert (fixture_dir / rel_path).exists(), f"Missing path: {fixture_dir / rel_path}"
54+
55+
56+
def test_manifest_matches_fixture_admissibility_metadata() -> None:
57+
manifest = _load_manifest()
58+
59+
for entry in manifest["fixtures"]:
60+
admissibility = _load_json(ROOT / entry["path"] / "expected" / "admissibility.json")
61+
assert entry["fixture_id"] == admissibility["fixture_id"]
62+
assert entry["fixture_version"] == admissibility["fixture_version"]
63+
assert entry["expected_admissible"] == admissibility["expected_admissible"]
64+
assert entry["expected_failure_labels"] == sorted(admissibility.get("expected_failure_labels", []))
65+
66+
67+
def test_manifest_contracts_match_contract_files() -> None:
68+
manifest = _load_manifest()
69+
70+
for entry in manifest["fixtures"]:
71+
contracts_dir = ROOT / entry["path"] / "original" / "contracts"
72+
contract_ids = []
73+
for contract_file in sorted(contracts_dir.glob("*.json")):
74+
contract_ids.append(_load_json(contract_file)["contract_id"])
75+
assert sorted(contract_ids) == entry["contracts"]
76+
77+
78+
def test_manifest_expected_failure_labels_match_failures_file() -> None:
79+
manifest = _load_manifest()
80+
81+
for entry in manifest["fixtures"]:
82+
failures = _load_json(ROOT / entry["path"] / "expected" / "failures.json")
83+
assert entry["expected_failure_labels"] == sorted(failures.get("expected_failures", []))
84+
85+
86+
def test_benchmark_artifact_references_only_manifest_fixtures() -> None:
87+
manifest = _load_manifest()
88+
benchmark = _load_json(ROOT / "artifacts" / "layered_admissibility_results.json")
89+
90+
manifest_index = {
91+
entry["fixture_id"]: {
92+
"fixture_version": entry["fixture_version"],
93+
"path": entry["path"],
94+
}
95+
for entry in manifest["fixtures"]
96+
}
97+
98+
for point in benchmark["points"]:
99+
fixture_id = point["fixture_id"]
100+
assert fixture_id in manifest_index
101+
assert point["fixture_version"] == manifest_index[fixture_id]["fixture_version"]
102+
assert point["fixture_path"] == manifest_index[fixture_id]["path"]
103+
104+
105+
def test_degradation_levels_are_known_and_unique_per_family() -> None:
106+
manifest = _load_manifest()
107+
family_to_levels: dict[str, set[str]] = {}
108+
109+
for entry in manifest["fixtures"]:
110+
level = entry["degradation_level"]
111+
family = entry["family"]
112+
assert level in ALLOWED_DEGRADATION_LEVELS
113+
family_to_levels.setdefault(family, set())
114+
assert level not in family_to_levels[family]
115+
family_to_levels[family].add(level)
116+
117+
118+
def test_no_unregistered_fixture_directories() -> None:
119+
manifest = _load_manifest()
120+
registered_paths = {entry["path"] for entry in manifest["fixtures"]}
121+
122+
discovered_fixture_paths = {
123+
str(path.parent.parent.relative_to(ROOT)).replace("\\", "/")
124+
for path in (ROOT / "fixtures").glob("*/expected/admissibility.json")
125+
}
126+
127+
assert discovered_fixture_paths.issubset(registered_paths)

0 commit comments

Comments
 (0)