Skip to content

Commit 764aaa4

Browse files
authored
Add deterministic multi-family admissibility artifact generation
* Add deterministic multi-family admissibility artifact * Make incident response mild degradation distinct from moderate * Regenerate multi-family artifact after mild fixture fix
1 parent 6d5750b commit 764aaa4

6 files changed

Lines changed: 375 additions & 1 deletion

File tree

Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
{
2+
"artifact_id": "multi_family_admissibility_results_v1",
3+
"families": [
4+
{
5+
"curve": {
6+
"curve_id": "coding_workflow_pr_review_curve_v1",
7+
"generated_by": "DegradationCurveGenerator",
8+
"points": [
9+
{
10+
"expected_admissible": true,
11+
"failed_contracts": [],
12+
"failure_labels": [],
13+
"fixture_id": "coding_workflow_pr_review_v1",
14+
"fixture_path": "fixtures/coding_workflow_pr_review_v1",
15+
"fixture_version": "1.0.0",
16+
"governance_score": 1.0,
17+
"observed_admissible": true,
18+
"operational_score": 1.0,
19+
"overall_admissibility_score": 1.0,
20+
"passed_contracts": [
21+
"no_orphan_tool_calls",
22+
"pre_merge_review",
23+
"recovery_path_available",
24+
"security_causal_block"
25+
],
26+
"relational_score": 1.0,
27+
"structural_score": 1.0
28+
},
29+
{
30+
"expected_admissible": false,
31+
"failed_contracts": [
32+
"recovery_path_available"
33+
],
34+
"failure_labels": [
35+
"RECOVERY_PATH_INVALID"
36+
],
37+
"fixture_id": "coding_workflow_pr_review_mild_v1",
38+
"fixture_path": "fixtures/coding_workflow_pr_review_mild_v1",
39+
"fixture_version": "1.0.0",
40+
"governance_score": 1.0,
41+
"observed_admissible": false,
42+
"operational_score": 1.0,
43+
"overall_admissibility_score": 0.9166666666666666,
44+
"passed_contracts": [
45+
"no_orphan_tool_calls",
46+
"pre_merge_review",
47+
"security_causal_block"
48+
],
49+
"relational_score": 0.6666666666666666,
50+
"structural_score": 1.0
51+
},
52+
{
53+
"expected_admissible": false,
54+
"failed_contracts": [
55+
"recovery_path_available",
56+
"security_causal_block"
57+
],
58+
"failure_labels": [
59+
"CAUSAL_DEPENDENCY_LOSS",
60+
"RECOVERY_PATH_INVALID"
61+
],
62+
"fixture_id": "coding_workflow_pr_review_moderate_v1",
63+
"fixture_path": "fixtures/coding_workflow_pr_review_moderate_v1",
64+
"fixture_version": "1.0.0",
65+
"governance_score": 1.0,
66+
"observed_admissible": false,
67+
"operational_score": 1.0,
68+
"overall_admissibility_score": 0.8333333333333334,
69+
"passed_contracts": [
70+
"no_orphan_tool_calls",
71+
"pre_merge_review"
72+
],
73+
"relational_score": 0.3333333333333333,
74+
"structural_score": 1.0
75+
},
76+
{
77+
"expected_admissible": false,
78+
"failed_contracts": [
79+
"no_orphan_tool_calls",
80+
"pre_merge_review",
81+
"recovery_path_available",
82+
"security_causal_block"
83+
],
84+
"failure_labels": [
85+
"CAUSAL_DEPENDENCY_LOSS",
86+
"INVARIANT_VIOLATION",
87+
"POLICY_ORDER_BROKEN",
88+
"RECOVERY_PATH_INVALID"
89+
],
90+
"fixture_id": "coding_workflow_pr_review_degraded_v1",
91+
"fixture_path": "fixtures/coding_workflow_pr_review_degraded_v1",
92+
"fixture_version": "1.0.0",
93+
"governance_score": 1.0,
94+
"observed_admissible": false,
95+
"operational_score": 0.0,
96+
"overall_admissibility_score": 0.5,
97+
"passed_contracts": [],
98+
"relational_score": 0.0,
99+
"structural_score": 1.0
100+
}
101+
],
102+
"version": "1.0"
103+
},
104+
"family": "coding_workflow_pr_review"
105+
},
106+
{
107+
"curve": {
108+
"curve_id": "incident_response_page_triage_curve_v1",
109+
"generated_by": "DegradationCurveGenerator",
110+
"points": [
111+
{
112+
"expected_admissible": true,
113+
"failed_contracts": [],
114+
"failure_labels": [],
115+
"fixture_id": "incident_response_page_triage_v1",
116+
"fixture_path": "fixtures/incident_response_page_triage_v1",
117+
"fixture_version": "1.0.0",
118+
"governance_score": 1.0,
119+
"observed_admissible": true,
120+
"operational_score": 1.0,
121+
"overall_admissibility_score": 1.0,
122+
"passed_contracts": [
123+
"alert_ack_before_mitigation",
124+
"no_orphan_mitigation_steps",
125+
"rollback_reachable",
126+
"root_cause_links_incident"
127+
],
128+
"relational_score": 1.0,
129+
"structural_score": 1.0
130+
},
131+
{
132+
"expected_admissible": false,
133+
"failed_contracts": [
134+
"rollback_reachable"
135+
],
136+
"failure_labels": [
137+
"RECOVERY_PATH_INVALID"
138+
],
139+
"fixture_id": "incident_response_page_triage_mild_v1",
140+
"fixture_path": "fixtures/incident_response_page_triage_mild_v1",
141+
"fixture_version": "1.0.0",
142+
"governance_score": 1.0,
143+
"observed_admissible": false,
144+
"operational_score": 1.0,
145+
"overall_admissibility_score": 0.9166666666666666,
146+
"passed_contracts": [
147+
"alert_ack_before_mitigation",
148+
"no_orphan_mitigation_steps",
149+
"root_cause_links_incident"
150+
],
151+
"relational_score": 0.6666666666666666,
152+
"structural_score": 1.0
153+
},
154+
{
155+
"expected_admissible": false,
156+
"failed_contracts": [
157+
"no_orphan_mitigation_steps",
158+
"rollback_reachable"
159+
],
160+
"failure_labels": [
161+
"INVARIANT_VIOLATION",
162+
"RECOVERY_PATH_INVALID"
163+
],
164+
"fixture_id": "incident_response_page_triage_moderate_v1",
165+
"fixture_path": "fixtures/incident_response_page_triage_moderate_v1",
166+
"fixture_version": "1.0.0",
167+
"governance_score": 1.0,
168+
"observed_admissible": false,
169+
"operational_score": 1.0,
170+
"overall_admissibility_score": 0.8333333333333334,
171+
"passed_contracts": [
172+
"alert_ack_before_mitigation",
173+
"root_cause_links_incident"
174+
],
175+
"relational_score": 0.3333333333333333,
176+
"structural_score": 1.0
177+
},
178+
{
179+
"expected_admissible": false,
180+
"failed_contracts": [
181+
"alert_ack_before_mitigation",
182+
"no_orphan_mitigation_steps",
183+
"rollback_reachable",
184+
"root_cause_links_incident"
185+
],
186+
"failure_labels": [
187+
"CAUSAL_DEPENDENCY_LOSS",
188+
"INVARIANT_VIOLATION",
189+
"POLICY_ORDER_BROKEN",
190+
"RECOVERY_PATH_INVALID"
191+
],
192+
"fixture_id": "incident_response_page_triage_degraded_v1",
193+
"fixture_path": "fixtures/incident_response_page_triage_degraded_v1",
194+
"fixture_version": "1.0.0",
195+
"governance_score": 1.0,
196+
"observed_admissible": false,
197+
"operational_score": 0.0,
198+
"overall_admissibility_score": 0.5,
199+
"passed_contracts": [],
200+
"relational_score": 0.0,
201+
"structural_score": 1.0
202+
}
203+
],
204+
"version": "1.0"
205+
},
206+
"family": "incident_response_page_triage"
207+
}
208+
],
209+
"generated_by": "DegradationCurveGenerator",
210+
"version": "1.0"
211+
}

fixtures/incident_response_page_triage_mild_v1/reconstructed/dependency_graph.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,12 @@
8181
"target": "escalate_to_human",
8282
"relation": "RECOVERY",
8383
"metadata": {}
84+
},
85+
{
86+
"source": "incident_classified",
87+
"target": "rollback_available",
88+
"relation": "RECOVERY",
89+
"metadata": {}
8490
}
8591
]
8692
}

package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
"test:replay": "pytest tests/test_paper_replay_bench.py tests/test_agent_trace_replay.py tests/test_replay_continuity.py -q",
1313
"layout": "python scripts/check_repo_layout.py",
1414
"check": "npm run layout && npm run typecheck && npm run validate && npm run build && npm run test",
15-
"generate:layered-admissibility": "python scripts/generate_layered_admissibility_artifact.py"
15+
"generate:layered-admissibility": "python scripts/generate_layered_admissibility_artifact.py",
16+
"generate:multi-family-admissibility": "python scripts/generate_multi_family_admissibility_artifact.py"
1617
}
1718
}
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
"""Deterministic entrypoint for multi-family admissibility artifact regeneration."""
2+
3+
from __future__ import annotations
4+
5+
import json
6+
import sys
7+
from pathlib import Path
8+
from typing import Any
9+
10+
REPO_ROOT = Path(__file__).resolve().parents[1]
11+
if str(REPO_ROOT) not in sys.path:
12+
sys.path.insert(0, str(REPO_ROOT))
13+
14+
from src.validation.degradation_curve_generator import (
15+
LAYERED_CURVE_LEVELS,
16+
MANIFEST_PATH,
17+
DegradationCurveGenerator,
18+
)
19+
20+
ARTIFACT_ID = "multi_family_admissibility_results_v1"
21+
OUTPUT_PATH = Path("artifacts/multi_family_admissibility_results.json")
22+
23+
24+
def _families_with_standard_levels(
25+
manifest_path: Path = MANIFEST_PATH,
26+
levels: tuple[str, ...] = LAYERED_CURVE_LEVELS,
27+
) -> tuple[str, ...]:
28+
manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
29+
fixtures = manifest.get("fixtures")
30+
if not isinstance(fixtures, list):
31+
raise ValueError(f"invalid fixture manifest format: {manifest_path}")
32+
33+
family_to_levels: dict[str, set[str]] = {}
34+
for entry in fixtures:
35+
family = entry.get("family")
36+
level = entry.get("degradation_level")
37+
if not family or not level:
38+
continue
39+
family_to_levels.setdefault(str(family), set()).add(str(level))
40+
41+
return tuple(sorted(family for family, family_levels in family_to_levels.items() if set(levels).issubset(family_levels)))
42+
43+
44+
def generate_multi_family_admissibility_artifact(output_path: Path = OUTPUT_PATH) -> Path:
45+
generator = DegradationCurveGenerator()
46+
families_payload: list[dict[str, Any]] = []
47+
48+
for family in _families_with_standard_levels():
49+
fixtures = generator.fixtures_for_manifest_family(family)
50+
curve = generator.generate(fixtures, curve_id=f"{family}_curve_v1")
51+
families_payload.append({"family": family, "curve": generator.to_dict(curve)})
52+
53+
payload = {
54+
"artifact_id": ARTIFACT_ID,
55+
"generated_by": DegradationCurveGenerator.__name__,
56+
"version": "1.0",
57+
"families": families_payload,
58+
}
59+
60+
output_path.parent.mkdir(parents=True, exist_ok=True)
61+
output_path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8")
62+
return output_path
63+
64+
65+
def main() -> int:
66+
output_path = generate_multi_family_admissibility_artifact()
67+
print(output_path.as_posix())
68+
return 0
69+
70+
71+
if __name__ == "__main__":
72+
raise SystemExit(main())

tests/test_artifact_reproducibility.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
from scripts.generate_layered_admissibility_artifact import (
99
generate_layered_admissibility_artifact,
1010
)
11+
from scripts.generate_multi_family_admissibility_artifact import (
12+
generate_multi_family_admissibility_artifact,
13+
)
1114

1215

1316
@dataclass(frozen=True)
@@ -25,6 +28,11 @@ class DeterministicArtifactSpec:
2528
committed_path=Path("artifacts/layered_admissibility_results.json"),
2629
regenerate=generate_layered_admissibility_artifact,
2730
),
31+
DeterministicArtifactSpec(
32+
name="multi_family_admissibility_results",
33+
committed_path=Path("artifacts/multi_family_admissibility_results.json"),
34+
regenerate=generate_multi_family_admissibility_artifact,
35+
),
2836
)
2937

3038

0 commit comments

Comments
 (0)