Skip to content

Commit 59bd654

Browse files
authored
feat: add native MCP corruption contract coverage
Add native ContractValidator coverage for the materialized MCP corruption corpus. - add deterministic native contract rules for boundaries, dependency edges, terminal actions, and action ordering - update MCP fixture contracts to emit existing failure labels explicitly - remove adapter-gap classification from MCP corruption validation - require all 18 materialized corruption entries to emit their manifest expected_failure_label - regenerate affected deterministic manifests and artifacts - address review feedback for null list handling and strict invalid-type errors No LLM, embedding, fuzzy matching, runtime orchestration, dashboard, showcase, or dependency changes.
1 parent d7d6b51 commit 59bd654

37 files changed

Lines changed: 419 additions & 268 deletions

artifacts/capability_boundary_replay_results.json

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -266,9 +266,7 @@
266266
"expected_failure_labels": [
267267
"APPROVAL_GATE_LOSS",
268268
"CAPABILITY_BOUNDARY_LOSS",
269-
"POLICY_ENFORCEMENT_GAP",
270-
"RECOVERY_PATH_INVALID",
271-
"UNAUTHORIZED_CAPABILITY_PATH"
269+
"POLICY_ENFORCEMENT_GAP"
272270
],
273271
"capability_boundary": {
274272
"original_edge_count": 4,
@@ -327,11 +325,8 @@
327325
{
328326
"fixture_id": "mcp_trace_replay_mild_v1",
329327
"degradation_level": "mild",
330-
"expected_admissible": false,
331-
"expected_failure_labels": [
332-
"CAPABILITY_BOUNDARY_LOSS",
333-
"RECOVERY_PATH_INVALID"
334-
],
328+
"expected_admissible": true,
329+
"expected_failure_labels": [],
335330
"capability_boundary": {
336331
"original_edge_count": 4,
337332
"replay_edge_count": 4,
@@ -351,8 +346,8 @@
351346
"degradation_level": "moderate",
352347
"expected_admissible": false,
353348
"expected_failure_labels": [
354-
"INVARIANT_VIOLATION",
355-
"UNAUTHORIZED_CAPABILITY_PATH"
349+
"APPROVAL_GATE_LOSS",
350+
"DEPENDENCY_CHAIN_BREAK"
356351
],
357352
"capability_boundary": {
358353
"original_edge_count": 4,

artifacts/graph_diff_results.json

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1537,9 +1537,7 @@
15371537
"expected_failure_labels": [
15381538
"APPROVAL_GATE_LOSS",
15391539
"CAPABILITY_BOUNDARY_LOSS",
1540-
"POLICY_ENFORCEMENT_GAP",
1541-
"RECOVERY_PATH_INVALID",
1542-
"UNAUTHORIZED_CAPABILITY_PATH"
1540+
"POLICY_ENFORCEMENT_GAP"
15431541
],
15441542
"edge_categories": {
15451543
"capability_boundaries": {
@@ -1707,11 +1705,8 @@
17071705
{
17081706
"fixture_id": "mcp_trace_replay_mild_v1",
17091707
"degradation_level": "mild",
1710-
"expected_admissible": false,
1711-
"expected_failure_labels": [
1712-
"CAPABILITY_BOUNDARY_LOSS",
1713-
"RECOVERY_PATH_INVALID"
1714-
],
1708+
"expected_admissible": true,
1709+
"expected_failure_labels": [],
17151710
"edge_categories": {
17161711
"capability_boundaries": {
17171712
"original_edge_count": 3,
@@ -1838,8 +1833,8 @@
18381833
"degradation_level": "moderate",
18391834
"expected_admissible": false,
18401835
"expected_failure_labels": [
1841-
"INVARIANT_VIOLATION",
1842-
"UNAUTHORIZED_CAPABILITY_PATH"
1836+
"APPROVAL_GATE_LOSS",
1837+
"DEPENDENCY_CHAIN_BREAK"
18431838
],
18441839
"edge_categories": {
18451840
"capability_boundaries": {

artifacts/mcp_trace_replay_results.json

Lines changed: 21 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -20,20 +20,17 @@
2020
},
2121
{
2222
"degradation_level": "mild",
23-
"expected_admissible": false,
24-
"failed_contracts": [
25-
"capability_boundary_respected",
26-
"recovery_path_available"
27-
],
28-
"failure_labels": [
29-
"CAPABILITY_BOUNDARY_LOSS",
30-
"RECOVERY_PATH_INVALID"
31-
],
23+
"expected_admissible": true,
24+
"failed_contracts": [],
25+
"failure_labels": [],
3226
"fixture_id": "mcp_trace_replay_mild_v1",
33-
"observed_admissible": false,
34-
"overall_admissibility_score": "0.833333",
27+
"observed_admissible": true,
28+
"overall_admissibility_score": "1.000000",
3529
"passed_contracts": [
30+
"approval_gate_preserved",
31+
"capability_boundary_respected",
3632
"dependency_chain_preserved",
33+
"recovery_path_available",
3734
"tool_call_order_preserved",
3835
"validation_before_unsafe_action"
3936
]
@@ -42,17 +39,18 @@
4239
"degradation_level": "moderate",
4340
"expected_admissible": false,
4441
"failed_contracts": [
45-
"capability_boundary_respected",
42+
"approval_gate_preserved",
4643
"dependency_chain_preserved"
4744
],
4845
"failure_labels": [
49-
"INVARIANT_VIOLATION",
50-
"UNAUTHORIZED_CAPABILITY_PATH"
46+
"APPROVAL_GATE_LOSS",
47+
"DEPENDENCY_CHAIN_BREAK"
5148
],
5249
"fixture_id": "mcp_trace_replay_moderate_v1",
5350
"observed_admissible": false,
54-
"overall_admissibility_score": "0.833333",
51+
"overall_admissibility_score": "0.666667",
5552
"passed_contracts": [
53+
"capability_boundary_respected",
5654
"recovery_path_available",
5755
"tool_call_order_preserved",
5856
"validation_before_unsafe_action"
@@ -62,23 +60,23 @@
6260
"degradation_level": "severe",
6361
"expected_admissible": false,
6462
"failed_contracts": [
63+
"approval_gate_preserved",
6564
"capability_boundary_respected",
66-
"dependency_chain_preserved",
67-
"recovery_path_available",
68-
"tool_call_order_preserved",
6965
"validation_before_unsafe_action"
7066
],
7167
"failure_labels": [
7268
"APPROVAL_GATE_LOSS",
7369
"CAPABILITY_BOUNDARY_LOSS",
74-
"POLICY_ENFORCEMENT_GAP",
75-
"RECOVERY_PATH_INVALID",
76-
"UNAUTHORIZED_CAPABILITY_PATH"
70+
"POLICY_ENFORCEMENT_GAP"
7771
],
7872
"fixture_id": "mcp_trace_replay_degraded_v1",
7973
"observed_admissible": false,
80-
"overall_admissibility_score": "0.500000",
81-
"passed_contracts": []
74+
"overall_admissibility_score": "0.541667",
75+
"passed_contracts": [
76+
"dependency_chain_preserved",
77+
"recovery_path_available",
78+
"tool_call_order_preserved"
79+
]
8280
}
8381
],
8482
"generated_by": "McpTraceReplayArtifactGenerator",

artifacts/multi_family_admissibility_curves.svg

Lines changed: 4 additions & 4 deletions
Loading

artifacts/multi_family_admissibility_results.json

Lines changed: 27 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -336,80 +336,78 @@
336336
"structural_score": 1.0
337337
},
338338
{
339-
"expected_admissible": false,
340-
"failed_contracts": [
341-
"capability_boundary_respected",
342-
"recovery_path_available"
343-
],
344-
"failure_labels": [
345-
"CAPABILITY_BOUNDARY_LOSS",
346-
"RECOVERY_PATH_INVALID"
347-
],
339+
"expected_admissible": true,
340+
"failed_contracts": [],
341+
"failure_labels": [],
348342
"fixture_id": "mcp_trace_replay_mild_v1",
349343
"fixture_path": "fixtures/mcp_trace_replay_mild_v1",
350344
"fixture_version": "1.0.0",
351345
"governance_score": 1.0,
352-
"observed_admissible": false,
346+
"observed_admissible": true,
353347
"operational_score": 1.0,
354-
"overall_admissibility_score": 0.8333333333333334,
348+
"overall_admissibility_score": 1.0,
355349
"passed_contracts": [
350+
"approval_gate_preserved",
351+
"capability_boundary_respected",
356352
"dependency_chain_preserved",
353+
"recovery_path_available",
357354
"tool_call_order_preserved",
358355
"validation_before_unsafe_action"
359356
],
360-
"relational_score": 0.3333333333333333,
357+
"relational_score": 1.0,
361358
"structural_score": 1.0
362359
},
363360
{
364361
"expected_admissible": false,
365362
"failed_contracts": [
366-
"capability_boundary_respected",
363+
"approval_gate_preserved",
367364
"dependency_chain_preserved"
368365
],
369366
"failure_labels": [
370-
"INVARIANT_VIOLATION",
371-
"UNAUTHORIZED_CAPABILITY_PATH"
367+
"APPROVAL_GATE_LOSS",
368+
"DEPENDENCY_CHAIN_BREAK"
372369
],
373370
"fixture_id": "mcp_trace_replay_moderate_v1",
374371
"fixture_path": "fixtures/mcp_trace_replay_moderate_v1",
375372
"fixture_version": "1.0.0",
376-
"governance_score": 1.0,
373+
"governance_score": 0.0,
377374
"observed_admissible": false,
378375
"operational_score": 1.0,
379-
"overall_admissibility_score": 0.8333333333333334,
376+
"overall_admissibility_score": 0.6666666666666666,
380377
"passed_contracts": [
378+
"capability_boundary_respected",
381379
"recovery_path_available",
382380
"tool_call_order_preserved",
383381
"validation_before_unsafe_action"
384382
],
385-
"relational_score": 0.3333333333333333,
383+
"relational_score": 0.6666666666666666,
386384
"structural_score": 1.0
387385
},
388386
{
389387
"expected_admissible": false,
390388
"failed_contracts": [
389+
"approval_gate_preserved",
391390
"capability_boundary_respected",
392-
"dependency_chain_preserved",
393-
"recovery_path_available",
394-
"tool_call_order_preserved",
395391
"validation_before_unsafe_action"
396392
],
397393
"failure_labels": [
398394
"APPROVAL_GATE_LOSS",
399395
"CAPABILITY_BOUNDARY_LOSS",
400-
"POLICY_ENFORCEMENT_GAP",
401-
"RECOVERY_PATH_INVALID",
402-
"UNAUTHORIZED_CAPABILITY_PATH"
396+
"POLICY_ENFORCEMENT_GAP"
403397
],
404398
"fixture_id": "mcp_trace_replay_degraded_v1",
405399
"fixture_path": "fixtures/mcp_trace_replay_degraded_v1",
406400
"fixture_version": "1.0.0",
407-
"governance_score": 1.0,
401+
"governance_score": 0.0,
408402
"observed_admissible": false,
409-
"operational_score": 0.0,
410-
"overall_admissibility_score": 0.5,
411-
"passed_contracts": [],
412-
"relational_score": 0.0,
403+
"operational_score": 0.5,
404+
"overall_admissibility_score": 0.5416666666666666,
405+
"passed_contracts": [
406+
"dependency_chain_preserved",
407+
"recovery_path_available",
408+
"tool_call_order_preserved"
409+
],
410+
"relational_score": 0.6666666666666666,
413411
"structural_score": 1.0
414412
}
415413
],

artifacts/replay_semantic_integrity_results.json

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -205,43 +205,40 @@
205205
"passed": 3,
206206
"failed": 1,
207207
"failure_labels": [
208-
"APPROVAL_GATE_LOSS"
208+
"POLICY_ENFORCEMENT_GAP"
209209
]
210210
},
211211
"dependencies": {
212-
"passed": 2,
213-
"failed": 2,
212+
"passed": 3,
213+
"failed": 1,
214214
"failure_labels": [
215-
"UNAUTHORIZED_CAPABILITY_PATH"
215+
"DEPENDENCY_CHAIN_BREAK"
216216
]
217217
},
218218
"recovery_paths": {
219-
"passed": 2,
220-
"failed": 2,
221-
"failure_labels": [
222-
"RECOVERY_PATH_INVALID"
223-
]
219+
"passed": 4,
220+
"failed": 0,
221+
"failure_labels": []
224222
},
225223
"tool_order": {
224+
"passed": 4,
225+
"failed": 0,
226+
"failure_labels": []
227+
},
228+
"capability_boundaries": {
226229
"passed": 3,
227230
"failed": 1,
228231
"failure_labels": [
229-
"POLICY_ENFORCEMENT_GAP"
232+
"CAPABILITY_BOUNDARY_LOSS"
230233
]
231234
},
232-
"capability_boundaries": {
235+
"governance_or_policy": {
233236
"passed": 1,
234-
"failed": 3,
237+
"failed": 2,
235238
"failure_labels": [
236-
"CAPABILITY_BOUNDARY_LOSS",
237-
"INVARIANT_VIOLATION"
239+
"APPROVAL_GATE_LOSS"
238240
]
239241
},
240-
"governance_or_policy": {
241-
"passed": 0,
242-
"failed": 0,
243-
"failure_labels": []
244-
},
245242
"invariants": {
246243
"passed": 0,
247244
"failed": 0,

artifacts/tool_ordering_replay_results.json

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -278,9 +278,7 @@
278278
"expected_failure_labels": [
279279
"APPROVAL_GATE_LOSS",
280280
"CAPABILITY_BOUNDARY_LOSS",
281-
"POLICY_ENFORCEMENT_GAP",
282-
"RECOVERY_PATH_INVALID",
283-
"UNAUTHORIZED_CAPABILITY_PATH"
281+
"POLICY_ENFORCEMENT_GAP"
284282
],
285283
"tool_ordering": {
286284
"original_edge_count": 0,
@@ -298,11 +296,8 @@
298296
{
299297
"fixture_id": "mcp_trace_replay_mild_v1",
300298
"degradation_level": "mild",
301-
"expected_admissible": false,
302-
"expected_failure_labels": [
303-
"CAPABILITY_BOUNDARY_LOSS",
304-
"RECOVERY_PATH_INVALID"
305-
],
299+
"expected_admissible": true,
300+
"expected_failure_labels": [],
306301
"tool_ordering": {
307302
"original_edge_count": 0,
308303
"replay_edge_count": 0,
@@ -321,8 +316,8 @@
321316
"degradation_level": "moderate",
322317
"expected_admissible": false,
323318
"expected_failure_labels": [
324-
"INVARIANT_VIOLATION",
325-
"UNAUTHORIZED_CAPABILITY_PATH"
319+
"APPROVAL_GATE_LOSS",
320+
"DEPENDENCY_CHAIN_BREAK"
326321
],
327322
"tool_ordering": {
328323
"original_edge_count": 0,

0 commit comments

Comments
 (0)