Skip to content

Commit b750b2f

Browse files
authored
Add replay semantic integrity artifact
* Add replay semantic integrity artifact * Fix contract-linked replay semantic integrity labels * Simplify replay semantic integrity dictionaries
1 parent a761d95 commit b750b2f

4 files changed

Lines changed: 623 additions & 1 deletion

File tree

Lines changed: 255 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,255 @@
1+
{
2+
"artifact_id": "replay_semantic_integrity_results_v1",
3+
"generated_by": "ReplaySemanticIntegrityArtifactGenerator",
4+
"version": "1.0",
5+
"evaluation_mode": "deterministic",
6+
"llm_judges": "none",
7+
"external_apis": "none",
8+
"families": [
9+
{
10+
"family": "coding_workflow_pr_review",
11+
"fixture_count": 4,
12+
"levels": [
13+
"baseline",
14+
"mild",
15+
"moderate",
16+
"severe"
17+
],
18+
"commitment_classes": {
19+
"evidence": {
20+
"passed": 0,
21+
"failed": 0,
22+
"failure_labels": []
23+
},
24+
"constraints": {
25+
"passed": 0,
26+
"failed": 0,
27+
"failure_labels": []
28+
},
29+
"dependencies": {
30+
"passed": 2,
31+
"failed": 2,
32+
"failure_labels": [
33+
"CAUSAL_DEPENDENCY_LOSS"
34+
]
35+
},
36+
"recovery_paths": {
37+
"passed": 1,
38+
"failed": 3,
39+
"failure_labels": [
40+
"RECOVERY_PATH_INVALID"
41+
]
42+
},
43+
"tool_order": {
44+
"passed": 3,
45+
"failed": 1,
46+
"failure_labels": [
47+
"POLICY_ORDER_BROKEN"
48+
]
49+
},
50+
"capability_boundaries": {
51+
"passed": 0,
52+
"failed": 0,
53+
"failure_labels": []
54+
},
55+
"governance_or_policy": {
56+
"passed": 0,
57+
"failed": 0,
58+
"failure_labels": []
59+
},
60+
"invariants": {
61+
"passed": 3,
62+
"failed": 1,
63+
"failure_labels": [
64+
"INVARIANT_VIOLATION"
65+
]
66+
}
67+
}
68+
},
69+
{
70+
"family": "incident_response_page_triage",
71+
"fixture_count": 4,
72+
"levels": [
73+
"baseline",
74+
"mild",
75+
"moderate",
76+
"severe"
77+
],
78+
"commitment_classes": {
79+
"evidence": {
80+
"passed": 0,
81+
"failed": 0,
82+
"failure_labels": []
83+
},
84+
"constraints": {
85+
"passed": 0,
86+
"failed": 0,
87+
"failure_labels": []
88+
},
89+
"dependencies": {
90+
"passed": 3,
91+
"failed": 1,
92+
"failure_labels": [
93+
"CAUSAL_DEPENDENCY_LOSS"
94+
]
95+
},
96+
"recovery_paths": {
97+
"passed": 1,
98+
"failed": 3,
99+
"failure_labels": [
100+
"RECOVERY_PATH_INVALID"
101+
]
102+
},
103+
"tool_order": {
104+
"passed": 3,
105+
"failed": 1,
106+
"failure_labels": [
107+
"POLICY_ORDER_BROKEN"
108+
]
109+
},
110+
"capability_boundaries": {
111+
"passed": 0,
112+
"failed": 0,
113+
"failure_labels": []
114+
},
115+
"governance_or_policy": {
116+
"passed": 0,
117+
"failed": 0,
118+
"failure_labels": []
119+
},
120+
"invariants": {
121+
"passed": 2,
122+
"failed": 2,
123+
"failure_labels": [
124+
"INVARIANT_VIOLATION"
125+
]
126+
}
127+
}
128+
},
129+
{
130+
"family": "cross_domain_operational_dependency_workflow",
131+
"fixture_count": 4,
132+
"levels": [
133+
"baseline",
134+
"mild",
135+
"moderate",
136+
"severe"
137+
],
138+
"commitment_classes": {
139+
"evidence": {
140+
"passed": 0,
141+
"failed": 0,
142+
"failure_labels": []
143+
},
144+
"constraints": {
145+
"passed": 0,
146+
"failed": 0,
147+
"failure_labels": []
148+
},
149+
"dependencies": {
150+
"passed": 2,
151+
"failed": 2,
152+
"failure_labels": [
153+
"CAUSAL_DEPENDENCY_LOSS"
154+
]
155+
},
156+
"recovery_paths": {
157+
"passed": 1,
158+
"failed": 3,
159+
"failure_labels": [
160+
"RECOVERY_PATH_INVALID"
161+
]
162+
},
163+
"tool_order": {
164+
"passed": 0,
165+
"failed": 0,
166+
"failure_labels": []
167+
},
168+
"capability_boundaries": {
169+
"passed": 0,
170+
"failed": 0,
171+
"failure_labels": []
172+
},
173+
"governance_or_policy": {
174+
"passed": 3,
175+
"failed": 1,
176+
"failure_labels": [
177+
"POLICY_ORDER_BROKEN"
178+
]
179+
},
180+
"invariants": {
181+
"passed": 1,
182+
"failed": 3,
183+
"failure_labels": [
184+
"INVARIANT_VIOLATION"
185+
]
186+
}
187+
}
188+
},
189+
{
190+
"family": "mcp_trace_replay",
191+
"fixture_count": 4,
192+
"levels": [
193+
"baseline",
194+
"mild",
195+
"moderate",
196+
"severe"
197+
],
198+
"commitment_classes": {
199+
"evidence": {
200+
"passed": 0,
201+
"failed": 0,
202+
"failure_labels": []
203+
},
204+
"constraints": {
205+
"passed": 4,
206+
"failed": 0,
207+
"failure_labels": []
208+
},
209+
"dependencies": {
210+
"passed": 2,
211+
"failed": 2,
212+
"failure_labels": [
213+
"CAUSAL_DEPENDENCY_LOSS"
214+
]
215+
},
216+
"recovery_paths": {
217+
"passed": 2,
218+
"failed": 2,
219+
"failure_labels": [
220+
"RECOVERY_PATH_INVALID"
221+
]
222+
},
223+
"tool_order": {
224+
"passed": 4,
225+
"failed": 0,
226+
"failure_labels": []
227+
},
228+
"capability_boundaries": {
229+
"passed": 1,
230+
"failed": 3,
231+
"failure_labels": [
232+
"INVARIANT_VIOLATION"
233+
]
234+
},
235+
"governance_or_policy": {
236+
"passed": 0,
237+
"failed": 0,
238+
"failure_labels": []
239+
},
240+
"invariants": {
241+
"passed": 0,
242+
"failed": 0,
243+
"failure_labels": []
244+
}
245+
}
246+
}
247+
],
248+
"global_summary": {
249+
"family_count": 4,
250+
"fixture_count": 16,
251+
"deterministic_evaluation": true,
252+
"llm_judges": "none",
253+
"external_apis": "none"
254+
}
255+
}

package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
"generate:layered-admissibility": "python scripts/generate_layered_admissibility_artifact.py",
1616
"generate:multi-family-admissibility": "python scripts/generate_multi_family_admissibility_artifact.py",
1717
"generate:multi-family-svg": "python scripts/render_multi_family_admissibility_svg.py",
18-
"generate:mcp-trace-replay": "python scripts/generate_mcp_trace_replay_artifact.py"
18+
"generate:mcp-trace-replay": "python scripts/generate_mcp_trace_replay_artifact.py",
19+
"generate:replay-semantic-integrity": "python scripts/generate_replay_semantic_integrity_artifact.py"
1920
}
2021
}

0 commit comments

Comments
 (0)