From 8917a1fdc47b7b1651f70758eef5114017171bf1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20K=C3=B6lnberger?= <159939812+ProfRandom92@users.noreply.github.com> Date: Wed, 20 May 2026 09:43:52 -0700 Subject: [PATCH 1/4] Harden MCP capability boundary fixtures --- .../capability_boundary_replay_results.json | 142 +++++++++++----- artifacts/graph_diff_results.json | 154 ++++++++++++++---- artifacts/mcp_trace_replay_results.json | 25 +-- .../multi_family_admissibility_curves.svg | 4 +- .../multi_family_admissibility_results.json | 27 +-- .../replay_semantic_integrity_results.json | 19 ++- artifacts/tool_ordering_replay_results.json | 14 +- fixtures/manifest.json | 14 +- .../expected/admissibility.json | 8 +- .../expected/failures.json | 8 +- .../capability_boundary_respected.json | 3 +- .../contracts/dependency_chain_preserved.json | 3 +- .../contracts/tool_call_order_preserved.json | 3 +- .../validation_before_unsafe_action.json | 3 +- .../original/state.json | 42 ++++- .../reconstructed/state.json | 31 +++- .../reconstructed/trace.json | 14 +- .../expected/admissibility.json | 2 +- .../expected/failures.json | 4 +- .../capability_boundary_respected.json | 3 +- .../original/state.json | 42 ++++- .../reconstructed/state.json | 39 ++++- .../expected/admissibility.json | 4 +- .../expected/failures.json | 4 +- .../contracts/dependency_chain_preserved.json | 3 +- .../original/state.json | 42 ++++- .../reconstructed/state.json | 47 +++++- .../mcp_trace_replay_v1/original/state.json | 42 ++++- .../reconstructed/state.json | 42 ++++- src/validation/contract_validator.py | 31 ++-- ...est_capability_boundary_replay_artifact.py | 4 + tests/test_tool_ordering_replay_artifact.py | 4 + 32 files changed, 665 insertions(+), 162 deletions(-) diff --git a/artifacts/capability_boundary_replay_results.json b/artifacts/capability_boundary_replay_results.json index 8935988..cc79107 100644 --- a/artifacts/capability_boundary_replay_results.json +++ b/artifacts/capability_boundary_replay_results.json @@ -264,20 +264,64 @@ "degradation_level": "severe", "expected_admissible": false, "expected_failure_labels": [ - "CAUSAL_DEPENDENCY_LOSS", - "INVARIANT_VIOLATION", - "RECOVERY_PATH_INVALID" + "APPROVAL_GATE_LOSS", + "CAPABILITY_BOUNDARY_LOSS", + "POLICY_ENFORCEMENT_GAP", + "RECOVERY_PATH_INVALID", + "UNAUTHORIZED_CAPABILITY_PATH" ], "capability_boundary": { - "original_edge_count": 0, - "replay_edge_count": 0, - "missing_edges": [], - "added_edges": [], - "original_node_count": 0, - "replay_node_count": 0, - "missing_nodes": [], - "added_nodes": [], - "drift_detected": false + "original_edge_count": 4, + "replay_edge_count": 3, + "missing_edges": [ + [ + "agent", + "capability_scope_checked" + ], + [ + "capability_scope_checked", + "validate_external_action" + ], + [ + "execute_external_action", + "approved_external_resource" + ], + [ + "human_approval", + "execute_external_action" + ] + ], + "added_edges": [ + [ + "agent", + "execute_external_action" + ], + [ + "execute_external_action", + "unapproved_admin_console" + ], + [ + "unapproved_admin_console", + "production_credentials" + ] + ], + "original_node_count": 10, + "replay_node_count": 4, + "missing_nodes": [ + "approved_external_resource", + "capability_scope_checked", + "human_approval", + "read_context", + "requires_human_approval", + "requires_validation_passed", + "validate_external_action", + "verify_result" + ], + "added_nodes": [ + "production_credentials", + "unapproved_admin_console" + ], + "drift_detected": true } }, { @@ -285,19 +329,21 @@ "degradation_level": "mild", "expected_admissible": false, "expected_failure_labels": [ - "INVARIANT_VIOLATION", + "CAPABILITY_BOUNDARY_LOSS", "RECOVERY_PATH_INVALID" ], "capability_boundary": { - "original_edge_count": 0, - "replay_edge_count": 0, + "original_edge_count": 4, + "replay_edge_count": 4, "missing_edges": [], "added_edges": [], - "original_node_count": 0, - "replay_node_count": 0, - "missing_nodes": [], + "original_node_count": 10, + "replay_node_count": 9, + "missing_nodes": [ + "requires_human_approval" + ], "added_nodes": [], - "drift_detected": false + "drift_detected": true } }, { @@ -305,19 +351,39 @@ "degradation_level": "moderate", "expected_admissible": false, "expected_failure_labels": [ - "CAUSAL_DEPENDENCY_LOSS", - "INVARIANT_VIOLATION" + "INVARIANT_VIOLATION", + "UNAUTHORIZED_CAPABILITY_PATH" ], "capability_boundary": { - "original_edge_count": 0, - "replay_edge_count": 0, - "missing_edges": [], - "added_edges": [], - "original_node_count": 0, - "replay_node_count": 0, - "missing_nodes": [], - "added_nodes": [], - "drift_detected": false + "original_edge_count": 4, + "replay_edge_count": 5, + "missing_edges": [ + [ + "human_approval", + "execute_external_action" + ] + ], + "added_edges": [ + [ + "execute_external_action", + "unapproved_admin_console" + ], + [ + "unapproved_admin_console", + "production_credentials" + ] + ], + "original_node_count": 10, + "replay_node_count": 10, + "missing_nodes": [ + "human_approval", + "requires_human_approval" + ], + "added_nodes": [ + "production_credentials", + "unapproved_admin_console" + ], + "drift_detected": true } }, { @@ -326,12 +392,12 @@ "expected_admissible": true, "expected_failure_labels": [], "capability_boundary": { - "original_edge_count": 0, - "replay_edge_count": 0, + "original_edge_count": 4, + "replay_edge_count": 4, "missing_edges": [], "added_edges": [], - "original_node_count": 0, - "replay_node_count": 0, + "original_node_count": 10, + "replay_node_count": 10, "missing_nodes": [], "added_nodes": [], "drift_detected": false @@ -343,10 +409,10 @@ "global_summary": { "family_count": 4, "fixture_count": 16, - "fixtures_with_capability_boundary_data": 0, - "fixtures_with_boundary_drift": 0, - "total_missing_boundary_edges": 0, - "total_added_boundary_edges": 0, + "fixtures_with_capability_boundary_data": 4, + "fixtures_with_boundary_drift": 3, + "total_missing_boundary_edges": 5, + "total_added_boundary_edges": 5, "deterministic_evaluation": true, "llm_judges": "none", "external_apis": "none" diff --git a/artifacts/graph_diff_results.json b/artifacts/graph_diff_results.json index 9b69f84..7db2154 100644 --- a/artifacts/graph_diff_results.json +++ b/artifacts/graph_diff_results.json @@ -1535,20 +1535,60 @@ "degradation_level": "severe", "expected_admissible": false, "expected_failure_labels": [ - "CAUSAL_DEPENDENCY_LOSS", - "INVARIANT_VIOLATION", - "RECOVERY_PATH_INVALID" + "APPROVAL_GATE_LOSS", + "CAPABILITY_BOUNDARY_LOSS", + "POLICY_ENFORCEMENT_GAP", + "RECOVERY_PATH_INVALID", + "UNAUTHORIZED_CAPABILITY_PATH" ], "edge_categories": { "capability_boundaries": { - "original_edge_count": 0, - "replay_edge_count": 0, - "missing_edges": [], - "added_edges": [], - "missing_nodes": [], - "added_nodes": [], - "original_nodes": [], - "replay_nodes": [] + "original_edge_count": 3, + "replay_edge_count": 2, + "missing_edges": [ + [ + "agent", + "capability_scope_checked" + ], + [ + "capability_scope_checked", + "validate_external_action" + ], + [ + "human_approval", + "execute_external_action" + ] + ], + "added_edges": [ + [ + "agent", + "execute_external_action" + ], + [ + "execute_external_action", + "unapproved_admin_console" + ] + ], + "missing_nodes": [ + "capability_scope_checked", + "human_approval", + "validate_external_action" + ], + "added_nodes": [ + "unapproved_admin_console" + ], + "original_nodes": [ + "agent", + "capability_scope_checked", + "execute_external_action", + "human_approval", + "validate_external_action" + ], + "replay_nodes": [ + "agent", + "execute_external_action", + "unapproved_admin_console" + ] }, "causal_dependencies": { "original_edge_count": 0, @@ -1669,19 +1709,31 @@ "degradation_level": "mild", "expected_admissible": false, "expected_failure_labels": [ - "INVARIANT_VIOLATION", + "CAPABILITY_BOUNDARY_LOSS", "RECOVERY_PATH_INVALID" ], "edge_categories": { "capability_boundaries": { - "original_edge_count": 0, - "replay_edge_count": 0, + "original_edge_count": 3, + "replay_edge_count": 3, "missing_edges": [], "added_edges": [], "missing_nodes": [], "added_nodes": [], - "original_nodes": [], - "replay_nodes": [] + "original_nodes": [ + "agent", + "capability_scope_checked", + "execute_external_action", + "human_approval", + "validate_external_action" + ], + "replay_nodes": [ + "agent", + "capability_scope_checked", + "execute_external_action", + "human_approval", + "validate_external_action" + ] }, "causal_dependencies": { "original_edge_count": 0, @@ -1786,19 +1838,45 @@ "degradation_level": "moderate", "expected_admissible": false, "expected_failure_labels": [ - "CAUSAL_DEPENDENCY_LOSS", - "INVARIANT_VIOLATION" + "INVARIANT_VIOLATION", + "UNAUTHORIZED_CAPABILITY_PATH" ], "edge_categories": { "capability_boundaries": { - "original_edge_count": 0, - "replay_edge_count": 0, - "missing_edges": [], - "added_edges": [], - "missing_nodes": [], - "added_nodes": [], - "original_nodes": [], - "replay_nodes": [] + "original_edge_count": 3, + "replay_edge_count": 3, + "missing_edges": [ + [ + "human_approval", + "execute_external_action" + ] + ], + "added_edges": [ + [ + "execute_external_action", + "unapproved_admin_console" + ] + ], + "missing_nodes": [ + "human_approval" + ], + "added_nodes": [ + "unapproved_admin_console" + ], + "original_nodes": [ + "agent", + "capability_scope_checked", + "execute_external_action", + "human_approval", + "validate_external_action" + ], + "replay_nodes": [ + "agent", + "capability_scope_checked", + "execute_external_action", + "unapproved_admin_console", + "validate_external_action" + ] }, "causal_dependencies": { "original_edge_count": 0, @@ -1913,14 +1991,26 @@ "expected_failure_labels": [], "edge_categories": { "capability_boundaries": { - "original_edge_count": 0, - "replay_edge_count": 0, + "original_edge_count": 3, + "replay_edge_count": 3, "missing_edges": [], "added_edges": [], "missing_nodes": [], "added_nodes": [], - "original_nodes": [], - "replay_nodes": [] + "original_nodes": [ + "agent", + "capability_scope_checked", + "execute_external_action", + "human_approval", + "validate_external_action" + ], + "replay_nodes": [ + "agent", + "capability_scope_checked", + "execute_external_action", + "human_approval", + "validate_external_action" + ] }, "causal_dependencies": { "original_edge_count": 0, @@ -2020,8 +2110,8 @@ "global_summary": { "family_count": 4, "fixture_count": 16, - "total_missing_edges": 32, - "total_added_edges": 7, + "total_missing_edges": 36, + "total_added_edges": 10, "deterministic_evaluation": true, "llm_judges": "none", "external_apis": "none" diff --git a/artifacts/mcp_trace_replay_results.json b/artifacts/mcp_trace_replay_results.json index 59afd49..06f89b3 100644 --- a/artifacts/mcp_trace_replay_results.json +++ b/artifacts/mcp_trace_replay_results.json @@ -26,7 +26,7 @@ "recovery_path_available" ], "failure_labels": [ - "INVARIANT_VIOLATION", + "CAPABILITY_BOUNDARY_LOSS", "RECOVERY_PATH_INVALID" ], "fixture_id": "mcp_trace_replay_mild_v1", @@ -46,8 +46,8 @@ "dependency_chain_preserved" ], "failure_labels": [ - "CAUSAL_DEPENDENCY_LOSS", - "INVARIANT_VIOLATION" + "INVARIANT_VIOLATION", + "UNAUTHORIZED_CAPABILITY_PATH" ], "fixture_id": "mcp_trace_replay_moderate_v1", "observed_admissible": false, @@ -64,20 +64,21 @@ "failed_contracts": [ "capability_boundary_respected", "dependency_chain_preserved", - "recovery_path_available" + "recovery_path_available", + "tool_call_order_preserved", + "validation_before_unsafe_action" ], "failure_labels": [ - "CAUSAL_DEPENDENCY_LOSS", - "INVARIANT_VIOLATION", - "RECOVERY_PATH_INVALID" + "APPROVAL_GATE_LOSS", + "CAPABILITY_BOUNDARY_LOSS", + "POLICY_ENFORCEMENT_GAP", + "RECOVERY_PATH_INVALID", + "UNAUTHORIZED_CAPABILITY_PATH" ], "fixture_id": "mcp_trace_replay_degraded_v1", "observed_admissible": false, - "overall_admissibility_score": "0.750000", - "passed_contracts": [ - "tool_call_order_preserved", - "validation_before_unsafe_action" - ] + "overall_admissibility_score": "0.500000", + "passed_contracts": [] } ], "generated_by": "McpTraceReplayArtifactGenerator", diff --git a/artifacts/multi_family_admissibility_curves.svg b/artifacts/multi_family_admissibility_curves.svg index 39c60f4..c84629d 100644 --- a/artifacts/multi_family_admissibility_curves.svg +++ b/artifacts/multi_family_admissibility_curves.svg @@ -38,11 +38,11 @@ incident_response_page_triage - + - + mcp_trace_replay diff --git a/artifacts/multi_family_admissibility_results.json b/artifacts/multi_family_admissibility_results.json index 15ec5bd..f788ce4 100644 --- a/artifacts/multi_family_admissibility_results.json +++ b/artifacts/multi_family_admissibility_results.json @@ -342,7 +342,7 @@ "recovery_path_available" ], "failure_labels": [ - "INVARIANT_VIOLATION", + "CAPABILITY_BOUNDARY_LOSS", "RECOVERY_PATH_INVALID" ], "fixture_id": "mcp_trace_replay_mild_v1", @@ -367,8 +367,8 @@ "dependency_chain_preserved" ], "failure_labels": [ - "CAUSAL_DEPENDENCY_LOSS", - "INVARIANT_VIOLATION" + "INVARIANT_VIOLATION", + "UNAUTHORIZED_CAPABILITY_PATH" ], "fixture_id": "mcp_trace_replay_moderate_v1", "fixture_path": "fixtures/mcp_trace_replay_moderate_v1", @@ -390,24 +390,25 @@ "failed_contracts": [ "capability_boundary_respected", "dependency_chain_preserved", - "recovery_path_available" + "recovery_path_available", + "tool_call_order_preserved", + "validation_before_unsafe_action" ], "failure_labels": [ - "CAUSAL_DEPENDENCY_LOSS", - "INVARIANT_VIOLATION", - "RECOVERY_PATH_INVALID" + "APPROVAL_GATE_LOSS", + "CAPABILITY_BOUNDARY_LOSS", + "POLICY_ENFORCEMENT_GAP", + "RECOVERY_PATH_INVALID", + "UNAUTHORIZED_CAPABILITY_PATH" ], "fixture_id": "mcp_trace_replay_degraded_v1", "fixture_path": "fixtures/mcp_trace_replay_degraded_v1", "fixture_version": "1.0.0", "governance_score": 1.0, "observed_admissible": false, - "operational_score": 1.0, - "overall_admissibility_score": 0.75, - "passed_contracts": [ - "tool_call_order_preserved", - "validation_before_unsafe_action" - ], + "operational_score": 0.0, + "overall_admissibility_score": 0.5, + "passed_contracts": [], "relational_score": 0.0, "structural_score": 1.0 } diff --git a/artifacts/replay_semantic_integrity_results.json b/artifacts/replay_semantic_integrity_results.json index 567be58..172d2b1 100644 --- a/artifacts/replay_semantic_integrity_results.json +++ b/artifacts/replay_semantic_integrity_results.json @@ -202,15 +202,17 @@ "failure_labels": [] }, "constraints": { - "passed": 4, - "failed": 0, - "failure_labels": [] + "passed": 3, + "failed": 1, + "failure_labels": [ + "APPROVAL_GATE_LOSS" + ] }, "dependencies": { "passed": 2, "failed": 2, "failure_labels": [ - "CAUSAL_DEPENDENCY_LOSS" + "UNAUTHORIZED_CAPABILITY_PATH" ] }, "recovery_paths": { @@ -221,14 +223,17 @@ ] }, "tool_order": { - "passed": 4, - "failed": 0, - "failure_labels": [] + "passed": 3, + "failed": 1, + "failure_labels": [ + "POLICY_ENFORCEMENT_GAP" + ] }, "capability_boundaries": { "passed": 1, "failed": 3, "failure_labels": [ + "CAPABILITY_BOUNDARY_LOSS", "INVARIANT_VIOLATION" ] }, diff --git a/artifacts/tool_ordering_replay_results.json b/artifacts/tool_ordering_replay_results.json index aa1fa6c..d4c768a 100644 --- a/artifacts/tool_ordering_replay_results.json +++ b/artifacts/tool_ordering_replay_results.json @@ -276,9 +276,11 @@ "degradation_level": "severe", "expected_admissible": false, "expected_failure_labels": [ - "CAUSAL_DEPENDENCY_LOSS", - "INVARIANT_VIOLATION", - "RECOVERY_PATH_INVALID" + "APPROVAL_GATE_LOSS", + "CAPABILITY_BOUNDARY_LOSS", + "POLICY_ENFORCEMENT_GAP", + "RECOVERY_PATH_INVALID", + "UNAUTHORIZED_CAPABILITY_PATH" ], "tool_ordering": { "original_edge_count": 0, @@ -298,7 +300,7 @@ "degradation_level": "mild", "expected_admissible": false, "expected_failure_labels": [ - "INVARIANT_VIOLATION", + "CAPABILITY_BOUNDARY_LOSS", "RECOVERY_PATH_INVALID" ], "tool_ordering": { @@ -319,8 +321,8 @@ "degradation_level": "moderate", "expected_admissible": false, "expected_failure_labels": [ - "CAUSAL_DEPENDENCY_LOSS", - "INVARIANT_VIOLATION" + "INVARIANT_VIOLATION", + "UNAUTHORIZED_CAPABILITY_PATH" ], "tool_ordering": { "original_edge_count": 0, diff --git a/fixtures/manifest.json b/fixtures/manifest.json index cddd164..2d57d28 100644 --- a/fixtures/manifest.json +++ b/fixtures/manifest.json @@ -257,7 +257,7 @@ "validation_before_unsafe_action" ], "expected_failure_labels": [ - "INVARIANT_VIOLATION", + "CAPABILITY_BOUNDARY_LOSS", "RECOVERY_PATH_INVALID" ] }, @@ -277,8 +277,8 @@ "validation_before_unsafe_action" ], "expected_failure_labels": [ - "CAUSAL_DEPENDENCY_LOSS", - "INVARIANT_VIOLATION" + "INVARIANT_VIOLATION", + "UNAUTHORIZED_CAPABILITY_PATH" ] }, { @@ -297,9 +297,11 @@ "validation_before_unsafe_action" ], "expected_failure_labels": [ - "CAUSAL_DEPENDENCY_LOSS", - "INVARIANT_VIOLATION", - "RECOVERY_PATH_INVALID" + "APPROVAL_GATE_LOSS", + "CAPABILITY_BOUNDARY_LOSS", + "POLICY_ENFORCEMENT_GAP", + "RECOVERY_PATH_INVALID", + "UNAUTHORIZED_CAPABILITY_PATH" ] } ] diff --git a/fixtures/mcp_trace_replay_degraded_v1/expected/admissibility.json b/fixtures/mcp_trace_replay_degraded_v1/expected/admissibility.json index e43729b..b3de76c 100644 --- a/fixtures/mcp_trace_replay_degraded_v1/expected/admissibility.json +++ b/fixtures/mcp_trace_replay_degraded_v1/expected/admissibility.json @@ -10,8 +10,10 @@ "validation_before_unsafe_action" ], "expected_failure_labels": [ - "CAUSAL_DEPENDENCY_LOSS", - "INVARIANT_VIOLATION", - "RECOVERY_PATH_INVALID" + "APPROVAL_GATE_LOSS", + "CAPABILITY_BOUNDARY_LOSS", + "POLICY_ENFORCEMENT_GAP", + "RECOVERY_PATH_INVALID", + "UNAUTHORIZED_CAPABILITY_PATH" ] } diff --git a/fixtures/mcp_trace_replay_degraded_v1/expected/failures.json b/fixtures/mcp_trace_replay_degraded_v1/expected/failures.json index 4526e45..c5a5ba2 100644 --- a/fixtures/mcp_trace_replay_degraded_v1/expected/failures.json +++ b/fixtures/mcp_trace_replay_degraded_v1/expected/failures.json @@ -1,8 +1,10 @@ { "expected_failures": [ - "CAUSAL_DEPENDENCY_LOSS", - "INVARIANT_VIOLATION", - "RECOVERY_PATH_INVALID" + "APPROVAL_GATE_LOSS", + "CAPABILITY_BOUNDARY_LOSS", + "POLICY_ENFORCEMENT_GAP", + "RECOVERY_PATH_INVALID", + "UNAUTHORIZED_CAPABILITY_PATH" ], "allowed_failures": [ "ORPHAN_DEPENDENCY", diff --git a/fixtures/mcp_trace_replay_degraded_v1/original/contracts/capability_boundary_respected.json b/fixtures/mcp_trace_replay_degraded_v1/original/contracts/capability_boundary_respected.json index f10b6e4..5b1a9b5 100644 --- a/fixtures/mcp_trace_replay_degraded_v1/original/contracts/capability_boundary_respected.json +++ b/fixtures/mcp_trace_replay_degraded_v1/original/contracts/capability_boundary_respected.json @@ -5,5 +5,6 @@ "definition": { "rule": "no_orphan_dependencies" }, - "severity": "HIGH" + "severity": "HIGH", + "failure_label_on_violation": "CAPABILITY_BOUNDARY_LOSS" } diff --git a/fixtures/mcp_trace_replay_degraded_v1/original/contracts/dependency_chain_preserved.json b/fixtures/mcp_trace_replay_degraded_v1/original/contracts/dependency_chain_preserved.json index 155afe2..6729d2d 100644 --- a/fixtures/mcp_trace_replay_degraded_v1/original/contracts/dependency_chain_preserved.json +++ b/fixtures/mcp_trace_replay_degraded_v1/original/contracts/dependency_chain_preserved.json @@ -18,5 +18,6 @@ ] ] }, - "severity": "HIGH" + "severity": "HIGH", + "failure_label_on_violation": "UNAUTHORIZED_CAPABILITY_PATH" } diff --git a/fixtures/mcp_trace_replay_degraded_v1/original/contracts/tool_call_order_preserved.json b/fixtures/mcp_trace_replay_degraded_v1/original/contracts/tool_call_order_preserved.json index d9468cb..d46ae6f 100644 --- a/fixtures/mcp_trace_replay_degraded_v1/original/contracts/tool_call_order_preserved.json +++ b/fixtures/mcp_trace_replay_degraded_v1/original/contracts/tool_call_order_preserved.json @@ -12,5 +12,6 @@ "verify_result" ] }, - "severity": "CRITICAL" + "severity": "CRITICAL", + "failure_label_on_violation": "POLICY_ENFORCEMENT_GAP" } diff --git a/fixtures/mcp_trace_replay_degraded_v1/original/contracts/validation_before_unsafe_action.json b/fixtures/mcp_trace_replay_degraded_v1/original/contracts/validation_before_unsafe_action.json index f637b94..27ef6d2 100644 --- a/fixtures/mcp_trace_replay_degraded_v1/original/contracts/validation_before_unsafe_action.json +++ b/fixtures/mcp_trace_replay_degraded_v1/original/contracts/validation_before_unsafe_action.json @@ -8,5 +8,6 @@ "execute_external_action" ] }, - "severity": "CRITICAL" + "severity": "CRITICAL", + "failure_label_on_violation": "APPROVAL_GATE_LOSS" } diff --git a/fixtures/mcp_trace_replay_degraded_v1/original/state.json b/fixtures/mcp_trace_replay_degraded_v1/original/state.json index 19995c3..7597431 100644 --- a/fixtures/mcp_trace_replay_degraded_v1/original/state.json +++ b/fixtures/mcp_trace_replay_degraded_v1/original/state.json @@ -1,4 +1,44 @@ { "state_version": "1.0", - "entities": {} + "entities": {}, + "capability_boundaries": [ + [ + "agent", + "capability_scope_checked" + ], + [ + "capability_scope_checked", + "validate_external_action" + ], + [ + "human_approval", + "execute_external_action" + ] + ], + "allowed_tools": [ + "read_context", + "validate_external_action", + "execute_external_action", + "verify_result" + ], + "resource_boundaries": [ + [ + "execute_external_action", + "approved_external_resource" + ] + ], + "permission_scopes": { + "execute_external_action": [ + "requires_human_approval", + "requires_validation_passed" + ] + }, + "capability_scope": { + "agent": [ + "read_context", + "validate_external_action", + "execute_external_action", + "verify_result" + ] + } } diff --git a/fixtures/mcp_trace_replay_degraded_v1/reconstructed/state.json b/fixtures/mcp_trace_replay_degraded_v1/reconstructed/state.json index 19995c3..7f43e3b 100644 --- a/fixtures/mcp_trace_replay_degraded_v1/reconstructed/state.json +++ b/fixtures/mcp_trace_replay_degraded_v1/reconstructed/state.json @@ -1,4 +1,33 @@ { "state_version": "1.0", - "entities": {} + "entities": {}, + "capability_boundaries": [ + [ + "agent", + "execute_external_action" + ], + [ + "execute_external_action", + "unapproved_admin_console" + ] + ], + "allowed_tools": [ + "execute_external_action", + "unapproved_admin_console" + ], + "resource_boundaries": [ + [ + "unapproved_admin_console", + "production_credentials" + ] + ], + "permission_scopes": { + "execute_external_action": [] + }, + "capability_scope": { + "agent": [ + "execute_external_action", + "unapproved_admin_console" + ] + } } diff --git a/fixtures/mcp_trace_replay_degraded_v1/reconstructed/trace.json b/fixtures/mcp_trace_replay_degraded_v1/reconstructed/trace.json index 25a6205..d556b1b 100644 --- a/fixtures/mcp_trace_replay_degraded_v1/reconstructed/trace.json +++ b/fixtures/mcp_trace_replay_degraded_v1/reconstructed/trace.json @@ -5,24 +5,24 @@ "step": 1 }, { - "action": "capability_scope_checked", + "action": "execute_external_action", "step": 2 }, { - "action": "tool_schema_validated", + "action": "capability_scope_checked", "step": 3 }, { - "action": "read_context", + "action": "tool_schema_validated", "step": 4 }, { - "action": "validate_external_action", - "step": 6 + "action": "read_context", + "step": 5 }, { - "action": "execute_external_action", - "step": 5 + "action": "validate_external_action", + "step": 6 }, { "action": "verify_result", diff --git a/fixtures/mcp_trace_replay_mild_v1/expected/admissibility.json b/fixtures/mcp_trace_replay_mild_v1/expected/admissibility.json index dc89616..1c34a71 100644 --- a/fixtures/mcp_trace_replay_mild_v1/expected/admissibility.json +++ b/fixtures/mcp_trace_replay_mild_v1/expected/admissibility.json @@ -7,7 +7,7 @@ "recovery_path_available" ], "expected_failure_labels": [ - "INVARIANT_VIOLATION", + "CAPABILITY_BOUNDARY_LOSS", "RECOVERY_PATH_INVALID" ] } diff --git a/fixtures/mcp_trace_replay_mild_v1/expected/failures.json b/fixtures/mcp_trace_replay_mild_v1/expected/failures.json index 8712d0e..1f9bea6 100644 --- a/fixtures/mcp_trace_replay_mild_v1/expected/failures.json +++ b/fixtures/mcp_trace_replay_mild_v1/expected/failures.json @@ -1,7 +1,7 @@ { "expected_failures": [ - "RECOVERY_PATH_INVALID", - "INVARIANT_VIOLATION" + "CAPABILITY_BOUNDARY_LOSS", + "RECOVERY_PATH_INVALID" ], "allowed_failures": [ "ORPHAN_DEPENDENCY", diff --git a/fixtures/mcp_trace_replay_mild_v1/original/contracts/capability_boundary_respected.json b/fixtures/mcp_trace_replay_mild_v1/original/contracts/capability_boundary_respected.json index f10b6e4..5b1a9b5 100644 --- a/fixtures/mcp_trace_replay_mild_v1/original/contracts/capability_boundary_respected.json +++ b/fixtures/mcp_trace_replay_mild_v1/original/contracts/capability_boundary_respected.json @@ -5,5 +5,6 @@ "definition": { "rule": "no_orphan_dependencies" }, - "severity": "HIGH" + "severity": "HIGH", + "failure_label_on_violation": "CAPABILITY_BOUNDARY_LOSS" } diff --git a/fixtures/mcp_trace_replay_mild_v1/original/state.json b/fixtures/mcp_trace_replay_mild_v1/original/state.json index 19995c3..7597431 100644 --- a/fixtures/mcp_trace_replay_mild_v1/original/state.json +++ b/fixtures/mcp_trace_replay_mild_v1/original/state.json @@ -1,4 +1,44 @@ { "state_version": "1.0", - "entities": {} + "entities": {}, + "capability_boundaries": [ + [ + "agent", + "capability_scope_checked" + ], + [ + "capability_scope_checked", + "validate_external_action" + ], + [ + "human_approval", + "execute_external_action" + ] + ], + "allowed_tools": [ + "read_context", + "validate_external_action", + "execute_external_action", + "verify_result" + ], + "resource_boundaries": [ + [ + "execute_external_action", + "approved_external_resource" + ] + ], + "permission_scopes": { + "execute_external_action": [ + "requires_human_approval", + "requires_validation_passed" + ] + }, + "capability_scope": { + "agent": [ + "read_context", + "validate_external_action", + "execute_external_action", + "verify_result" + ] + } } diff --git a/fixtures/mcp_trace_replay_mild_v1/reconstructed/state.json b/fixtures/mcp_trace_replay_mild_v1/reconstructed/state.json index 19995c3..c2630f1 100644 --- a/fixtures/mcp_trace_replay_mild_v1/reconstructed/state.json +++ b/fixtures/mcp_trace_replay_mild_v1/reconstructed/state.json @@ -1,4 +1,41 @@ { "state_version": "1.0", - "entities": {} + "entities": {}, + "capability_boundaries": [ + [ + "agent", + "capability_scope_checked" + ], + [ + "capability_scope_checked", + "validate_external_action" + ], + [ + "human_approval", + "execute_external_action" + ] + ], + "allowed_tools": [ + "read_context", + "validate_external_action", + "verify_result" + ], + "resource_boundaries": [ + [ + "execute_external_action", + "approved_external_resource" + ] + ], + "permission_scopes": { + "execute_external_action": [ + "requires_validation_passed" + ] + }, + "capability_scope": { + "agent": [ + "read_context", + "validate_external_action", + "verify_result" + ] + } } diff --git a/fixtures/mcp_trace_replay_moderate_v1/expected/admissibility.json b/fixtures/mcp_trace_replay_moderate_v1/expected/admissibility.json index 0cdb594..a998b0d 100644 --- a/fixtures/mcp_trace_replay_moderate_v1/expected/admissibility.json +++ b/fixtures/mcp_trace_replay_moderate_v1/expected/admissibility.json @@ -7,7 +7,7 @@ "capability_boundary_respected" ], "expected_failure_labels": [ - "CAUSAL_DEPENDENCY_LOSS", - "INVARIANT_VIOLATION" + "INVARIANT_VIOLATION", + "UNAUTHORIZED_CAPABILITY_PATH" ] } diff --git a/fixtures/mcp_trace_replay_moderate_v1/expected/failures.json b/fixtures/mcp_trace_replay_moderate_v1/expected/failures.json index fa47649..29b223f 100644 --- a/fixtures/mcp_trace_replay_moderate_v1/expected/failures.json +++ b/fixtures/mcp_trace_replay_moderate_v1/expected/failures.json @@ -1,7 +1,7 @@ { "expected_failures": [ - "CAUSAL_DEPENDENCY_LOSS", - "INVARIANT_VIOLATION" + "INVARIANT_VIOLATION", + "UNAUTHORIZED_CAPABILITY_PATH" ], "allowed_failures": [ "ORPHAN_DEPENDENCY", diff --git a/fixtures/mcp_trace_replay_moderate_v1/original/contracts/dependency_chain_preserved.json b/fixtures/mcp_trace_replay_moderate_v1/original/contracts/dependency_chain_preserved.json index 155afe2..6729d2d 100644 --- a/fixtures/mcp_trace_replay_moderate_v1/original/contracts/dependency_chain_preserved.json +++ b/fixtures/mcp_trace_replay_moderate_v1/original/contracts/dependency_chain_preserved.json @@ -18,5 +18,6 @@ ] ] }, - "severity": "HIGH" + "severity": "HIGH", + "failure_label_on_violation": "UNAUTHORIZED_CAPABILITY_PATH" } diff --git a/fixtures/mcp_trace_replay_moderate_v1/original/state.json b/fixtures/mcp_trace_replay_moderate_v1/original/state.json index 19995c3..7597431 100644 --- a/fixtures/mcp_trace_replay_moderate_v1/original/state.json +++ b/fixtures/mcp_trace_replay_moderate_v1/original/state.json @@ -1,4 +1,44 @@ { "state_version": "1.0", - "entities": {} + "entities": {}, + "capability_boundaries": [ + [ + "agent", + "capability_scope_checked" + ], + [ + "capability_scope_checked", + "validate_external_action" + ], + [ + "human_approval", + "execute_external_action" + ] + ], + "allowed_tools": [ + "read_context", + "validate_external_action", + "execute_external_action", + "verify_result" + ], + "resource_boundaries": [ + [ + "execute_external_action", + "approved_external_resource" + ] + ], + "permission_scopes": { + "execute_external_action": [ + "requires_human_approval", + "requires_validation_passed" + ] + }, + "capability_scope": { + "agent": [ + "read_context", + "validate_external_action", + "execute_external_action", + "verify_result" + ] + } } diff --git a/fixtures/mcp_trace_replay_moderate_v1/reconstructed/state.json b/fixtures/mcp_trace_replay_moderate_v1/reconstructed/state.json index 19995c3..7b807af 100644 --- a/fixtures/mcp_trace_replay_moderate_v1/reconstructed/state.json +++ b/fixtures/mcp_trace_replay_moderate_v1/reconstructed/state.json @@ -1,4 +1,49 @@ { "state_version": "1.0", - "entities": {} + "entities": {}, + "capability_boundaries": [ + [ + "agent", + "capability_scope_checked" + ], + [ + "capability_scope_checked", + "validate_external_action" + ], + [ + "execute_external_action", + "unapproved_admin_console" + ] + ], + "allowed_tools": [ + "read_context", + "validate_external_action", + "execute_external_action", + "verify_result", + "unapproved_admin_console" + ], + "resource_boundaries": [ + [ + "execute_external_action", + "approved_external_resource" + ], + [ + "unapproved_admin_console", + "production_credentials" + ] + ], + "permission_scopes": { + "execute_external_action": [ + "requires_validation_passed" + ] + }, + "capability_scope": { + "agent": [ + "read_context", + "validate_external_action", + "execute_external_action", + "verify_result", + "unapproved_admin_console" + ] + } } diff --git a/fixtures/mcp_trace_replay_v1/original/state.json b/fixtures/mcp_trace_replay_v1/original/state.json index 19995c3..7597431 100644 --- a/fixtures/mcp_trace_replay_v1/original/state.json +++ b/fixtures/mcp_trace_replay_v1/original/state.json @@ -1,4 +1,44 @@ { "state_version": "1.0", - "entities": {} + "entities": {}, + "capability_boundaries": [ + [ + "agent", + "capability_scope_checked" + ], + [ + "capability_scope_checked", + "validate_external_action" + ], + [ + "human_approval", + "execute_external_action" + ] + ], + "allowed_tools": [ + "read_context", + "validate_external_action", + "execute_external_action", + "verify_result" + ], + "resource_boundaries": [ + [ + "execute_external_action", + "approved_external_resource" + ] + ], + "permission_scopes": { + "execute_external_action": [ + "requires_human_approval", + "requires_validation_passed" + ] + }, + "capability_scope": { + "agent": [ + "read_context", + "validate_external_action", + "execute_external_action", + "verify_result" + ] + } } diff --git a/fixtures/mcp_trace_replay_v1/reconstructed/state.json b/fixtures/mcp_trace_replay_v1/reconstructed/state.json index 19995c3..7597431 100644 --- a/fixtures/mcp_trace_replay_v1/reconstructed/state.json +++ b/fixtures/mcp_trace_replay_v1/reconstructed/state.json @@ -1,4 +1,44 @@ { "state_version": "1.0", - "entities": {} + "entities": {}, + "capability_boundaries": [ + [ + "agent", + "capability_scope_checked" + ], + [ + "capability_scope_checked", + "validate_external_action" + ], + [ + "human_approval", + "execute_external_action" + ] + ], + "allowed_tools": [ + "read_context", + "validate_external_action", + "execute_external_action", + "verify_result" + ], + "resource_boundaries": [ + [ + "execute_external_action", + "approved_external_resource" + ] + ], + "permission_scopes": { + "execute_external_action": [ + "requires_human_approval", + "requires_validation_passed" + ] + }, + "capability_scope": { + "agent": [ + "read_context", + "validate_external_action", + "execute_external_action", + "verify_result" + ] + } } diff --git a/src/validation/contract_validator.py b/src/validation/contract_validator.py index 8a72c7f..ca08a0a 100644 --- a/src/validation/contract_validator.py +++ b/src/validation/contract_validator.py @@ -6,6 +6,7 @@ from typing import Any from src.validation.dependency_graph_comparator import DependencyGraph, DependencyGraphComparator, RelationType +from src.validation.failure_taxonomy import FAILURE_TAXONOMY class Layer(str, Enum): @@ -67,14 +68,20 @@ def validate_contract( raise ContractValidationError(f"contract '{contract_id}' missing required object field: definition") definition = contract["definition"] + failure_label_on_violation = contract.get("failure_label_on_violation") + if failure_label_on_violation is not None and str(failure_label_on_violation) not in FAILURE_TAXONOMY: + raise ContractValidationError( + f"contract '{contract_id}' has unregistered failure_label_on_violation: {failure_label_on_violation}" + ) + if contract_type == ContractType.ORDERING: - return self._validate_ordering(contract_id, layer, severity, definition, reconstructed) + return self._validate_ordering(contract_id, layer, severity, definition, reconstructed, failure_label_on_violation) if contract_type == ContractType.REACHABILITY: - return self._validate_reachability(contract_id, layer, severity, definition, original, reconstructed) + return self._validate_reachability(contract_id, layer, severity, definition, original, reconstructed, failure_label_on_violation) if contract_type == ContractType.CAUSALITY: - return self._validate_causality(contract_id, layer, severity, definition, original, reconstructed) + return self._validate_causality(contract_id, layer, severity, definition, original, reconstructed, failure_label_on_violation) if contract_type == ContractType.INVARIANT: - return self._validate_invariant(contract_id, layer, severity, definition, original, reconstructed) + return self._validate_invariant(contract_id, layer, severity, definition, original, reconstructed, failure_label_on_violation) raise ContractValidationError(f"unsupported contract type: {contract_type.value}") @@ -124,7 +131,7 @@ def _is_ordered_subsequence(self, required: list[str], observed: list[str]) -> b return True return False - def _validate_ordering(self, contract_id: str, layer: Layer, severity: str, definition: dict[str, Any], reconstructed: dict[str, Any]) -> ValidationResult: + def _validate_ordering(self, contract_id: str, layer: Layer, severity: str, definition: dict[str, Any], reconstructed: dict[str, Any], failure_label_on_violation: Any) -> ValidationResult: required_sequence = definition.get("required_sequence") if not isinstance(required_sequence, list) or not all(isinstance(item, str) for item in required_sequence): raise ContractValidationError(f"contract '{contract_id}' requires definition.required_sequence as list[str]") @@ -137,7 +144,7 @@ def _validate_ordering(self, contract_id: str, layer: Layer, severity: str, defi contract_type=ContractType.ORDERING, passed=passed, severity=severity, - failure_label=None if passed else "POLICY_ORDER_BROKEN", + failure_label=None if passed else str(failure_label_on_violation or "POLICY_ORDER_BROKEN"), invariant_category=None if passed else "ordering", deterministic_evidence={"required_sequence": required_sequence, "observed_sequence": observed_sequence}, ) @@ -170,7 +177,7 @@ def _reachable_targets(self, graph: DependencyGraph, source: str, targets: list[ return sorted(target for target in targets if target in visited) - def _validate_reachability(self, contract_id: str, layer: Layer, severity: str, definition: dict[str, Any], original: dict[str, Any], reconstructed: dict[str, Any]) -> ValidationResult: + def _validate_reachability(self, contract_id: str, layer: Layer, severity: str, definition: dict[str, Any], original: dict[str, Any], reconstructed: dict[str, Any], failure_label_on_violation: Any) -> ValidationResult: source = definition.get("from") targets = definition.get("to") min_paths = definition.get("min_paths") @@ -196,7 +203,7 @@ def _validate_reachability(self, contract_id: str, layer: Layer, severity: str, contract_type=ContractType.REACHABILITY, passed=passed, severity=severity, - failure_label=None if passed else "RECOVERY_PATH_INVALID", + failure_label=None if passed else str(failure_label_on_violation or "RECOVERY_PATH_INVALID"), invariant_category=None if passed else "reachability", deterministic_evidence={ "source": source, @@ -209,7 +216,7 @@ def _validate_reachability(self, contract_id: str, layer: Layer, severity: str, }, ) - def _validate_causality(self, contract_id: str, layer: Layer, severity: str, definition: dict[str, Any], original: dict[str, Any], reconstructed: dict[str, Any]) -> ValidationResult: + def _validate_causality(self, contract_id: str, layer: Layer, severity: str, definition: dict[str, Any], original: dict[str, Any], reconstructed: dict[str, Any], failure_label_on_violation: Any) -> ValidationResult: required_edges = definition.get("required_causal_edges") if not isinstance(required_edges, list): raise ContractValidationError(f"contract '{contract_id}' requires definition.required_causal_edges as list") @@ -243,12 +250,12 @@ def _validate_causality(self, contract_id: str, layer: Layer, severity: str, def contract_type=ContractType.CAUSALITY, passed=passed, severity=severity, - failure_label=None if passed else "CAUSAL_DEPENDENCY_LOSS", + failure_label=None if passed else str(failure_label_on_violation or "CAUSAL_DEPENDENCY_LOSS"), invariant_category=None if passed else "causality", deterministic_evidence=evidence, ) - def _validate_invariant(self, contract_id: str, layer: Layer, severity: str, definition: dict[str, Any], original: dict[str, Any], reconstructed: dict[str, Any]) -> ValidationResult: + def _validate_invariant(self, contract_id: str, layer: Layer, severity: str, definition: dict[str, Any], original: dict[str, Any], reconstructed: dict[str, Any], failure_label_on_violation: Any) -> ValidationResult: rule = definition.get("rule") if rule != "no_orphan_dependencies": raise ContractValidationError(f"contract '{contract_id}' supports only invariant rule 'no_orphan_dependencies'") @@ -265,7 +272,7 @@ def _validate_invariant(self, contract_id: str, layer: Layer, severity: str, def contract_type=ContractType.INVARIANT, passed=passed, severity=severity, - failure_label=None if passed else "INVARIANT_VIOLATION", + failure_label=None if passed else str(failure_label_on_violation or "INVARIANT_VIOLATION"), invariant_category=None if passed else "reachability", deterministic_evidence={"rule": rule, "comparator_metrics": metrics, "comparator_failure_labels": labels}, ) diff --git a/tests/test_capability_boundary_replay_artifact.py b/tests/test_capability_boundary_replay_artifact.py index a90a898..f498438 100644 --- a/tests/test_capability_boundary_replay_artifact.py +++ b/tests/test_capability_boundary_replay_artifact.py @@ -13,6 +13,10 @@ "CAUSAL_DEPENDENCY_LOSS", "RECOVERY_PATH_INVALID", "POLICY_ORDER_BROKEN", + "CAPABILITY_BOUNDARY_LOSS", + "UNAUTHORIZED_CAPABILITY_PATH", + "APPROVAL_GATE_LOSS", + "POLICY_ENFORCEMENT_GAP", } diff --git a/tests/test_tool_ordering_replay_artifact.py b/tests/test_tool_ordering_replay_artifact.py index 4605851..92c1338 100644 --- a/tests/test_tool_ordering_replay_artifact.py +++ b/tests/test_tool_ordering_replay_artifact.py @@ -13,6 +13,10 @@ "CAUSAL_DEPENDENCY_LOSS", "RECOVERY_PATH_INVALID", "POLICY_ORDER_BROKEN", + "CAPABILITY_BOUNDARY_LOSS", + "UNAUTHORIZED_CAPABILITY_PATH", + "APPROVAL_GATE_LOSS", + "POLICY_ENFORCEMENT_GAP", } From 239aa6a58a0726d6e6b01dfcf5741f650bf91494 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20K=C3=B6lnberger?= <159939812+ProfRandom92@users.noreply.github.com> Date: Wed, 20 May 2026 10:00:15 -0700 Subject: [PATCH 2/4] Add focused tests for optional contract failure labels --- tests/test_contract_validator.py | 51 ++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/tests/test_contract_validator.py b/tests/test_contract_validator.py index dc17c50..7d2de38 100644 --- a/tests/test_contract_validator.py +++ b/tests/test_contract_validator.py @@ -58,6 +58,57 @@ def test_ordering_contract_fails_with_policy_order_broken_and_evidence() -> None assert result.deterministic_evidence["observed_sequence"] == ["generate_patch", "human_review", "run_tests"] +def test_ordering_contract_without_failure_label_on_violation_keeps_legacy_default_label() -> None: + contract = { + "contract_id": "legacy_ordering_contract", + "layer": "operational", + "type": "ordering", + "definition": {"required_sequence": ["validate", "approve", "deploy"]}, + "severity": "CRITICAL", + } + reconstructed = {"events": [{"action": "validate"}, {"action": "deploy"}]} + + result = ContractValidator().validate_contract({}, reconstructed, contract) + + assert result.passed is False + assert result.failure_label == "POLICY_ORDER_BROKEN" + + +def test_ordering_contract_with_registered_failure_label_on_violation_emits_configured_label() -> None: + contract = { + "contract_id": "configured_ordering_contract", + "layer": "operational", + "type": "ordering", + "definition": {"required_sequence": ["validate", "approve", "deploy"]}, + "severity": "CRITICAL", + "failure_label_on_violation": "APPROVAL_GATE_LOSS", + } + reconstructed = {"events": [{"action": "validate"}, {"action": "deploy"}]} + + result = ContractValidator().validate_contract({}, reconstructed, contract) + + assert result.passed is False + assert result.failure_label == "APPROVAL_GATE_LOSS" + + +def test_unregistered_failure_label_on_violation_raises_contract_validation_error() -> None: + contract = { + "contract_id": "invalid_failure_label_mapping", + "layer": "operational", + "type": "ordering", + "definition": {"required_sequence": ["validate", "approve", "deploy"]}, + "severity": "CRITICAL", + "failure_label_on_violation": "NOT_A_REGISTERED_LABEL", + } + reconstructed = {"events": [{"action": "validate"}, {"action": "deploy"}]} + + with pytest.raises( + ContractValidationError, + match="has unregistered failure_label_on_violation: NOT_A_REGISTERED_LABEL", + ): + ContractValidator().validate_contract({}, reconstructed, contract) + + def test_reachability_contract_passes_when_target_reachable() -> None: contract = { "contract_id": "recovery_path_available", From 4a67892b055dbd5106638b25dd820676c1a93788 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20K=C3=B6lnberger?= <159939812+ProfRandom92@users.noreply.github.com> Date: Wed, 20 May 2026 10:49:04 -0700 Subject: [PATCH 3/4] Validate failure_label_on_violation type strictly --- src/validation/contract_validator.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/validation/contract_validator.py b/src/validation/contract_validator.py index ca08a0a..94d9ac7 100644 --- a/src/validation/contract_validator.py +++ b/src/validation/contract_validator.py @@ -69,10 +69,15 @@ def validate_contract( definition = contract["definition"] failure_label_on_violation = contract.get("failure_label_on_violation") - if failure_label_on_violation is not None and str(failure_label_on_violation) not in FAILURE_TAXONOMY: - raise ContractValidationError( - f"contract '{contract_id}' has unregistered failure_label_on_violation: {failure_label_on_violation}" - ) + if failure_label_on_violation is not None: + if not isinstance(failure_label_on_violation, str): + raise ContractValidationError( + f"contract '{contract_id}' has non-string failure_label_on_violation: {failure_label_on_violation}" + ) + if failure_label_on_violation not in FAILURE_TAXONOMY: + raise ContractValidationError( + f"contract '{contract_id}' has unregistered failure_label_on_violation: {failure_label_on_violation}" + ) if contract_type == ContractType.ORDERING: return self._validate_ordering(contract_id, layer, severity, definition, reconstructed, failure_label_on_violation) @@ -144,7 +149,7 @@ def _validate_ordering(self, contract_id: str, layer: Layer, severity: str, defi contract_type=ContractType.ORDERING, passed=passed, severity=severity, - failure_label=None if passed else str(failure_label_on_violation or "POLICY_ORDER_BROKEN"), + failure_label=None if passed else failure_label_on_violation or "POLICY_ORDER_BROKEN", invariant_category=None if passed else "ordering", deterministic_evidence={"required_sequence": required_sequence, "observed_sequence": observed_sequence}, ) @@ -203,7 +208,7 @@ def _validate_reachability(self, contract_id: str, layer: Layer, severity: str, contract_type=ContractType.REACHABILITY, passed=passed, severity=severity, - failure_label=None if passed else str(failure_label_on_violation or "RECOVERY_PATH_INVALID"), + failure_label=None if passed else failure_label_on_violation or "RECOVERY_PATH_INVALID", invariant_category=None if passed else "reachability", deterministic_evidence={ "source": source, @@ -250,7 +255,7 @@ def _validate_causality(self, contract_id: str, layer: Layer, severity: str, def contract_type=ContractType.CAUSALITY, passed=passed, severity=severity, - failure_label=None if passed else str(failure_label_on_violation or "CAUSAL_DEPENDENCY_LOSS"), + failure_label=None if passed else failure_label_on_violation or "CAUSAL_DEPENDENCY_LOSS", invariant_category=None if passed else "causality", deterministic_evidence=evidence, ) @@ -272,7 +277,7 @@ def _validate_invariant(self, contract_id: str, layer: Layer, severity: str, def contract_type=ContractType.INVARIANT, passed=passed, severity=severity, - failure_label=None if passed else str(failure_label_on_violation or "INVARIANT_VIOLATION"), + failure_label=None if passed else failure_label_on_violation or "INVARIANT_VIOLATION", invariant_category=None if passed else "reachability", deterministic_evidence={"rule": rule, "comparator_metrics": metrics, "comparator_failure_labels": labels}, ) From 10d6683b6327ab22a30005dfd17761eae23eed95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20K=C3=B6lnberger?= <159939812+ProfRandom92@users.noreply.github.com> Date: Wed, 20 May 2026 10:49:36 -0700 Subject: [PATCH 4/4] Test non-string failure_label_on_violation rejection --- tests/test_contract_validator.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/test_contract_validator.py b/tests/test_contract_validator.py index 7d2de38..43a47d6 100644 --- a/tests/test_contract_validator.py +++ b/tests/test_contract_validator.py @@ -109,6 +109,24 @@ def test_unregistered_failure_label_on_violation_raises_contract_validation_erro ContractValidator().validate_contract({}, reconstructed, contract) +def test_non_string_failure_label_on_violation_raises_contract_validation_error() -> None: + contract = { + "contract_id": "non_string_failure_label_mapping", + "layer": "operational", + "type": "ordering", + "definition": {"required_sequence": ["validate", "approve", "deploy"]}, + "severity": "CRITICAL", + "failure_label_on_violation": ["APPROVAL_GATE_LOSS"], + } + reconstructed = {"events": [{"action": "validate"}, {"action": "deploy"}]} + + with pytest.raises( + ContractValidationError, + match=r"has non-string failure_label_on_violation: \['APPROVAL_GATE_LOSS'\]", + ): + ContractValidator().validate_contract({}, reconstructed, contract) + + def test_reachability_contract_passes_when_target_reachable() -> None: contract = { "contract_id": "recovery_path_available",