Skip to content

Commit 7d454e2

Browse files
authored
Add MCP trace replay fixture family
* Add MCP trace replay fixture family * Fix MCP negative fixture admissibility expectations * Fix MCP negative fixture admissibility expectations * Fix MCP negative fixture admissibility expectations * Sync MCP manifest expected failure labels * Align MCP expected failure metadata * Align MCP manifest with observed failures * Align degraded MCP admissibility labels
1 parent 25a7076 commit 7d454e2

61 files changed

Lines changed: 1940 additions & 2 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

artifacts/multi_family_admissibility_curves.svg

Lines changed: 8 additions & 1 deletion
Loading

artifacts/multi_family_admissibility_results.json

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,113 @@
308308
"version": "1.0"
309309
},
310310
"family": "incident_response_page_triage"
311+
},
312+
{
313+
"curve": {
314+
"curve_id": "mcp_trace_replay_curve_v1",
315+
"generated_by": "DegradationCurveGenerator",
316+
"points": [
317+
{
318+
"expected_admissible": true,
319+
"failed_contracts": [],
320+
"failure_labels": [],
321+
"fixture_id": "mcp_trace_replay_v1",
322+
"fixture_path": "fixtures/mcp_trace_replay_v1",
323+
"fixture_version": "1.0.0",
324+
"governance_score": 1.0,
325+
"observed_admissible": true,
326+
"operational_score": 1.0,
327+
"overall_admissibility_score": 1.0,
328+
"passed_contracts": [
329+
"capability_boundary_respected",
330+
"dependency_chain_preserved",
331+
"recovery_path_available",
332+
"tool_call_order_preserved",
333+
"validation_before_unsafe_action"
334+
],
335+
"relational_score": 1.0,
336+
"structural_score": 1.0
337+
},
338+
{
339+
"expected_admissible": false,
340+
"failed_contracts": [
341+
"capability_boundary_respected",
342+
"recovery_path_available"
343+
],
344+
"failure_labels": [
345+
"INVARIANT_VIOLATION",
346+
"RECOVERY_PATH_INVALID"
347+
],
348+
"fixture_id": "mcp_trace_replay_mild_v1",
349+
"fixture_path": "fixtures/mcp_trace_replay_mild_v1",
350+
"fixture_version": "1.0.0",
351+
"governance_score": 1.0,
352+
"observed_admissible": false,
353+
"operational_score": 1.0,
354+
"overall_admissibility_score": 0.8333333333333334,
355+
"passed_contracts": [
356+
"dependency_chain_preserved",
357+
"tool_call_order_preserved",
358+
"validation_before_unsafe_action"
359+
],
360+
"relational_score": 0.3333333333333333,
361+
"structural_score": 1.0
362+
},
363+
{
364+
"expected_admissible": false,
365+
"failed_contracts": [
366+
"capability_boundary_respected",
367+
"dependency_chain_preserved"
368+
],
369+
"failure_labels": [
370+
"CAUSAL_DEPENDENCY_LOSS",
371+
"INVARIANT_VIOLATION"
372+
],
373+
"fixture_id": "mcp_trace_replay_moderate_v1",
374+
"fixture_path": "fixtures/mcp_trace_replay_moderate_v1",
375+
"fixture_version": "1.0.0",
376+
"governance_score": 1.0,
377+
"observed_admissible": false,
378+
"operational_score": 1.0,
379+
"overall_admissibility_score": 0.8333333333333334,
380+
"passed_contracts": [
381+
"recovery_path_available",
382+
"tool_call_order_preserved",
383+
"validation_before_unsafe_action"
384+
],
385+
"relational_score": 0.3333333333333333,
386+
"structural_score": 1.0
387+
},
388+
{
389+
"expected_admissible": false,
390+
"failed_contracts": [
391+
"capability_boundary_respected",
392+
"dependency_chain_preserved",
393+
"recovery_path_available"
394+
],
395+
"failure_labels": [
396+
"CAUSAL_DEPENDENCY_LOSS",
397+
"INVARIANT_VIOLATION",
398+
"RECOVERY_PATH_INVALID"
399+
],
400+
"fixture_id": "mcp_trace_replay_degraded_v1",
401+
"fixture_path": "fixtures/mcp_trace_replay_degraded_v1",
402+
"fixture_version": "1.0.0",
403+
"governance_score": 1.0,
404+
"observed_admissible": false,
405+
"operational_score": 1.0,
406+
"overall_admissibility_score": 0.75,
407+
"passed_contracts": [
408+
"tool_call_order_preserved",
409+
"validation_before_unsafe_action"
410+
],
411+
"relational_score": 0.0,
412+
"structural_score": 1.0
413+
}
414+
],
415+
"version": "1.0"
416+
},
417+
"family": "mcp_trace_replay"
311418
}
312419
],
313420
"generated_by": "DegradationCurveGenerator",

fixtures/manifest.json

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,84 @@
223223
"POLICY_ORDER_BROKEN",
224224
"RECOVERY_PATH_INVALID"
225225
]
226+
},
227+
{
228+
"fixture_id": "mcp_trace_replay_v1",
229+
"fixture_version": "1.0.0",
230+
"category": "mcp_trace",
231+
"family": "mcp_trace_replay",
232+
"degradation_level": "baseline",
233+
"path": "fixtures/mcp_trace_replay_v1",
234+
"expected_admissible": true,
235+
"contracts": [
236+
"capability_boundary_respected",
237+
"dependency_chain_preserved",
238+
"recovery_path_available",
239+
"tool_call_order_preserved",
240+
"validation_before_unsafe_action"
241+
],
242+
"expected_failure_labels": []
243+
},
244+
{
245+
"fixture_id": "mcp_trace_replay_mild_v1",
246+
"fixture_version": "1.0.0",
247+
"category": "mcp_trace",
248+
"family": "mcp_trace_replay",
249+
"degradation_level": "mild",
250+
"path": "fixtures/mcp_trace_replay_mild_v1",
251+
"expected_admissible": false,
252+
"contracts": [
253+
"capability_boundary_respected",
254+
"dependency_chain_preserved",
255+
"recovery_path_available",
256+
"tool_call_order_preserved",
257+
"validation_before_unsafe_action"
258+
],
259+
"expected_failure_labels": [
260+
"INVARIANT_VIOLATION",
261+
"RECOVERY_PATH_INVALID"
262+
]
263+
},
264+
{
265+
"fixture_id": "mcp_trace_replay_moderate_v1",
266+
"fixture_version": "1.0.0",
267+
"category": "mcp_trace",
268+
"family": "mcp_trace_replay",
269+
"degradation_level": "moderate",
270+
"path": "fixtures/mcp_trace_replay_moderate_v1",
271+
"expected_admissible": false,
272+
"contracts": [
273+
"capability_boundary_respected",
274+
"dependency_chain_preserved",
275+
"recovery_path_available",
276+
"tool_call_order_preserved",
277+
"validation_before_unsafe_action"
278+
],
279+
"expected_failure_labels": [
280+
"CAUSAL_DEPENDENCY_LOSS",
281+
"INVARIANT_VIOLATION"
282+
]
283+
},
284+
{
285+
"fixture_id": "mcp_trace_replay_degraded_v1",
286+
"fixture_version": "1.0.0",
287+
"category": "mcp_trace",
288+
"family": "mcp_trace_replay",
289+
"degradation_level": "severe",
290+
"path": "fixtures/mcp_trace_replay_degraded_v1",
291+
"expected_admissible": false,
292+
"contracts": [
293+
"capability_boundary_respected",
294+
"dependency_chain_preserved",
295+
"recovery_path_available",
296+
"tool_call_order_preserved",
297+
"validation_before_unsafe_action"
298+
],
299+
"expected_failure_labels": [
300+
"CAUSAL_DEPENDENCY_LOSS",
301+
"INVARIANT_VIOLATION",
302+
"RECOVERY_PATH_INVALID"
303+
]
226304
}
227305
]
228306
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# mcp_trace_replay_degraded_v1
2+
3+
Deterministic fixture for mcp_trace_replay (severe).
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
{
2+
"fixture_id": "mcp_trace_replay_degraded_v1",
3+
"fixture_version": "1.0.0",
4+
"expected_admissible": false,
5+
"must_fail_contracts": [
6+
"dependency_chain_preserved",
7+
"capability_boundary_respected",
8+
"recovery_path_available",
9+
"tool_call_order_preserved",
10+
"validation_before_unsafe_action"
11+
],
12+
"expected_failure_labels": [
13+
"CAUSAL_DEPENDENCY_LOSS",
14+
"INVARIANT_VIOLATION",
15+
"RECOVERY_PATH_INVALID"
16+
]
17+
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"expected_failures": [
3+
"CAUSAL_DEPENDENCY_LOSS",
4+
"INVARIANT_VIOLATION",
5+
"RECOVERY_PATH_INVALID"
6+
],
7+
"allowed_failures": [
8+
"ORPHAN_DEPENDENCY",
9+
"DETACHED_DEPENDENCY",
10+
"GRAPH_FRAGMENTATION",
11+
"TEMPORAL_ORDER_VIOLATION"
12+
],
13+
"disallowed_failures": [
14+
"CYCLE_INTRODUCED",
15+
"REPLAY_NON_REPRODUCIBLE",
16+
"ARTIFACT_INTEGRITY_VIOLATION"
17+
]
18+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{
2+
"contract_id": "capability_boundary_respected",
3+
"layer": "relational",
4+
"type": "invariant",
5+
"definition": {
6+
"rule": "no_orphan_dependencies"
7+
},
8+
"severity": "HIGH"
9+
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{
2+
"contract_id": "dependency_chain_preserved",
3+
"layer": "relational",
4+
"type": "causality",
5+
"definition": {
6+
"required_causal_edges": [
7+
[
8+
"capability_scope_checked",
9+
"validate_external_action"
10+
],
11+
[
12+
"validate_external_action",
13+
"execute_external_action"
14+
],
15+
[
16+
"execute_external_action",
17+
"verify_result"
18+
]
19+
]
20+
},
21+
"severity": "HIGH"
22+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{
2+
"contract_id": "recovery_path_available",
3+
"layer": "relational",
4+
"type": "reachability",
5+
"definition": {
6+
"from": "execute_external_action",
7+
"to": [
8+
"recovery_path_registered"
9+
],
10+
"min_paths": 1
11+
},
12+
"severity": "HIGH"
13+
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
{
2+
"contract_id": "tool_call_order_preserved",
3+
"layer": "operational",
4+
"type": "ordering",
5+
"definition": {
6+
"required_sequence": [
7+
"capability_scope_checked",
8+
"tool_schema_validated",
9+
"read_context",
10+
"validate_external_action",
11+
"execute_external_action",
12+
"verify_result"
13+
]
14+
},
15+
"severity": "CRITICAL"
16+
}

0 commit comments

Comments
 (0)