Skip to content

Commit 8333b4c

Browse files
authored
test: add deterministic MCP trace corruption factory
Add a deterministic MCP trace corruption manifest generator, committed corruption manifest, and focused tests. Scope: manifest-only corruption metadata; no runtime behavior, orchestration, LLM judge, embeddings, fuzzy matching, workflow changes, package changes, README changes, fixture rewrites, or new taxonomy labels. Review follow-up: hardened JSON loading, required fixture-file validation, dependency-graph/list handling, UTF-8 manifest output, forbidden-key checks, and manifest summary consistency.
1 parent 6466121 commit 8333b4c

3 files changed

Lines changed: 467 additions & 0 deletions

File tree

Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
{
2+
"manifest_id": "mcp_trace_corruption_manifest_v1",
3+
"version": "1.0",
4+
"allowed_operators": [
5+
"DROP_APPROVAL_GATE",
6+
"SWAP_TOOL_ORDER",
7+
"TRUNCATE_RECOVERY_PATH",
8+
"REMOVE_DEPENDENCY_EDGE",
9+
"INSERT_UNVALIDATED_ACTION",
10+
"COLLAPSE_CAPABILITY_BOUNDARY"
11+
],
12+
"corruptions": [
13+
{
14+
"corruption_id": "mcp_trace_replay_degraded_v1::collapse_capability_boundary",
15+
"source_fixture": "fixtures/mcp_trace_replay_degraded_v1",
16+
"operator": "COLLAPSE_CAPABILITY_BOUNDARY",
17+
"expected_failure_label": "CAPABILITY_BOUNDARY_LOSS",
18+
"expected_contract_violation": "capability_boundary_respected",
19+
"deterministic": true,
20+
"notes": "Collapse state capability boundary by removing enforcement link."
21+
},
22+
{
23+
"corruption_id": "mcp_trace_replay_degraded_v1::drop_approval_gate",
24+
"source_fixture": "fixtures/mcp_trace_replay_degraded_v1",
25+
"operator": "DROP_APPROVAL_GATE",
26+
"expected_failure_label": "APPROVAL_GATE_LOSS",
27+
"expected_contract_violation": "validation_before_unsafe_action",
28+
"deterministic": true,
29+
"notes": "Remove explicit human approval gate from state capability boundaries."
30+
},
31+
{
32+
"corruption_id": "mcp_trace_replay_degraded_v1::insert_unvalidated_action",
33+
"source_fixture": "fixtures/mcp_trace_replay_degraded_v1",
34+
"operator": "INSERT_UNVALIDATED_ACTION",
35+
"expected_failure_label": "POLICY_ENFORCEMENT_GAP",
36+
"expected_contract_violation": "validation_before_unsafe_action",
37+
"deterministic": true,
38+
"notes": "Insert execute action before validation step in trace ordering."
39+
},
40+
{
41+
"corruption_id": "mcp_trace_replay_degraded_v1::remove_dependency_edge",
42+
"source_fixture": "fixtures/mcp_trace_replay_degraded_v1",
43+
"operator": "REMOVE_DEPENDENCY_EDGE",
44+
"expected_failure_label": "DEPENDENCY_CHAIN_BREAK",
45+
"expected_contract_violation": "dependency_chain_preserved",
46+
"deterministic": true,
47+
"notes": "Remove a required prerequisite edge in dependency graph."
48+
},
49+
{
50+
"corruption_id": "mcp_trace_replay_degraded_v1::swap_tool_order",
51+
"source_fixture": "fixtures/mcp_trace_replay_degraded_v1",
52+
"operator": "SWAP_TOOL_ORDER",
53+
"expected_failure_label": "TOOL_ORDER_VIOLATION",
54+
"expected_contract_violation": "tool_call_order_preserved",
55+
"deterministic": true,
56+
"notes": "Swap consecutive tool/validation steps in trace to violate deterministic order."
57+
},
58+
{
59+
"corruption_id": "mcp_trace_replay_degraded_v1::truncate_recovery_path",
60+
"source_fixture": "fixtures/mcp_trace_replay_degraded_v1",
61+
"operator": "TRUNCATE_RECOVERY_PATH",
62+
"expected_failure_label": "RECOVERY_PATH_INVALID",
63+
"expected_contract_violation": "recovery_path_available",
64+
"deterministic": true,
65+
"notes": "Drop the terminal recovery event from trace path."
66+
},
67+
{
68+
"corruption_id": "mcp_trace_replay_mild_v1::collapse_capability_boundary",
69+
"source_fixture": "fixtures/mcp_trace_replay_mild_v1",
70+
"operator": "COLLAPSE_CAPABILITY_BOUNDARY",
71+
"expected_failure_label": "CAPABILITY_BOUNDARY_LOSS",
72+
"expected_contract_violation": "capability_boundary_respected",
73+
"deterministic": true,
74+
"notes": "Collapse state capability boundary by removing enforcement link."
75+
},
76+
{
77+
"corruption_id": "mcp_trace_replay_mild_v1::drop_approval_gate",
78+
"source_fixture": "fixtures/mcp_trace_replay_mild_v1",
79+
"operator": "DROP_APPROVAL_GATE",
80+
"expected_failure_label": "APPROVAL_GATE_LOSS",
81+
"expected_contract_violation": "validation_before_unsafe_action",
82+
"deterministic": true,
83+
"notes": "Remove explicit human approval gate from state capability boundaries."
84+
},
85+
{
86+
"corruption_id": "mcp_trace_replay_mild_v1::insert_unvalidated_action",
87+
"source_fixture": "fixtures/mcp_trace_replay_mild_v1",
88+
"operator": "INSERT_UNVALIDATED_ACTION",
89+
"expected_failure_label": "POLICY_ENFORCEMENT_GAP",
90+
"expected_contract_violation": "validation_before_unsafe_action",
91+
"deterministic": true,
92+
"notes": "Insert execute action before validation step in trace ordering."
93+
},
94+
{
95+
"corruption_id": "mcp_trace_replay_mild_v1::remove_dependency_edge",
96+
"source_fixture": "fixtures/mcp_trace_replay_mild_v1",
97+
"operator": "REMOVE_DEPENDENCY_EDGE",
98+
"expected_failure_label": "DEPENDENCY_CHAIN_BREAK",
99+
"expected_contract_violation": "dependency_chain_preserved",
100+
"deterministic": true,
101+
"notes": "Remove a required prerequisite edge in dependency graph."
102+
},
103+
{
104+
"corruption_id": "mcp_trace_replay_mild_v1::swap_tool_order",
105+
"source_fixture": "fixtures/mcp_trace_replay_mild_v1",
106+
"operator": "SWAP_TOOL_ORDER",
107+
"expected_failure_label": "TOOL_ORDER_VIOLATION",
108+
"expected_contract_violation": "tool_call_order_preserved",
109+
"deterministic": true,
110+
"notes": "Swap consecutive tool/validation steps in trace to violate deterministic order."
111+
},
112+
{
113+
"corruption_id": "mcp_trace_replay_mild_v1::truncate_recovery_path",
114+
"source_fixture": "fixtures/mcp_trace_replay_mild_v1",
115+
"operator": "TRUNCATE_RECOVERY_PATH",
116+
"expected_failure_label": "RECOVERY_PATH_INVALID",
117+
"expected_contract_violation": "recovery_path_available",
118+
"deterministic": true,
119+
"notes": "Drop the terminal recovery event from trace path."
120+
},
121+
{
122+
"corruption_id": "mcp_trace_replay_moderate_v1::collapse_capability_boundary",
123+
"source_fixture": "fixtures/mcp_trace_replay_moderate_v1",
124+
"operator": "COLLAPSE_CAPABILITY_BOUNDARY",
125+
"expected_failure_label": "CAPABILITY_BOUNDARY_LOSS",
126+
"expected_contract_violation": "capability_boundary_respected",
127+
"deterministic": true,
128+
"notes": "Collapse state capability boundary by removing enforcement link."
129+
},
130+
{
131+
"corruption_id": "mcp_trace_replay_moderate_v1::drop_approval_gate",
132+
"source_fixture": "fixtures/mcp_trace_replay_moderate_v1",
133+
"operator": "DROP_APPROVAL_GATE",
134+
"expected_failure_label": "APPROVAL_GATE_LOSS",
135+
"expected_contract_violation": "validation_before_unsafe_action",
136+
"deterministic": true,
137+
"notes": "Remove explicit human approval gate from state capability boundaries."
138+
},
139+
{
140+
"corruption_id": "mcp_trace_replay_moderate_v1::insert_unvalidated_action",
141+
"source_fixture": "fixtures/mcp_trace_replay_moderate_v1",
142+
"operator": "INSERT_UNVALIDATED_ACTION",
143+
"expected_failure_label": "POLICY_ENFORCEMENT_GAP",
144+
"expected_contract_violation": "validation_before_unsafe_action",
145+
"deterministic": true,
146+
"notes": "Insert execute action before validation step in trace ordering."
147+
},
148+
{
149+
"corruption_id": "mcp_trace_replay_moderate_v1::remove_dependency_edge",
150+
"source_fixture": "fixtures/mcp_trace_replay_moderate_v1",
151+
"operator": "REMOVE_DEPENDENCY_EDGE",
152+
"expected_failure_label": "DEPENDENCY_CHAIN_BREAK",
153+
"expected_contract_violation": "dependency_chain_preserved",
154+
"deterministic": true,
155+
"notes": "Remove a required prerequisite edge in dependency graph."
156+
},
157+
{
158+
"corruption_id": "mcp_trace_replay_moderate_v1::swap_tool_order",
159+
"source_fixture": "fixtures/mcp_trace_replay_moderate_v1",
160+
"operator": "SWAP_TOOL_ORDER",
161+
"expected_failure_label": "TOOL_ORDER_VIOLATION",
162+
"expected_contract_violation": "tool_call_order_preserved",
163+
"deterministic": true,
164+
"notes": "Swap consecutive tool/validation steps in trace to violate deterministic order."
165+
},
166+
{
167+
"corruption_id": "mcp_trace_replay_moderate_v1::truncate_recovery_path",
168+
"source_fixture": "fixtures/mcp_trace_replay_moderate_v1",
169+
"operator": "TRUNCATE_RECOVERY_PATH",
170+
"expected_failure_label": "RECOVERY_PATH_INVALID",
171+
"expected_contract_violation": "recovery_path_available",
172+
"deterministic": true,
173+
"notes": "Drop the terminal recovery event from trace path."
174+
}
175+
],
176+
"summary": {
177+
"fixture_count": 3,
178+
"corruption_count": 18,
179+
"skipped_operator_count": 0,
180+
"skipped_operators": [],
181+
"deterministic_evaluation": true,
182+
"llm_judges": "none",
183+
"external_apis": "none"
184+
}
185+
}
Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
"""Generate deterministic MCP trace corruption manifest from checked-in fixtures."""
2+
3+
from __future__ import annotations
4+
5+
import json
6+
from pathlib import Path
7+
from typing import Any
8+
9+
REPO_ROOT = Path(__file__).resolve().parents[1]
10+
FIXTURES_ROOT = REPO_ROOT / "fixtures"
11+
OUTPUT_PATH = REPO_ROOT / "artifacts" / "mcp_trace_corruption_manifest.json"
12+
REQUIRED_FIXTURE_FILES: tuple[str, ...] = ("trace.json", "dependency_graph.json", "state.json")
13+
14+
OPERATORS: tuple[str, ...] = (
15+
"DROP_APPROVAL_GATE",
16+
"SWAP_TOOL_ORDER",
17+
"TRUNCATE_RECOVERY_PATH",
18+
"REMOVE_DEPENDENCY_EDGE",
19+
"INSERT_UNVALIDATED_ACTION",
20+
"COLLAPSE_CAPABILITY_BOUNDARY",
21+
)
22+
23+
OPERATOR_EXPECTATIONS: dict[str, dict[str, str]] = {
24+
"DROP_APPROVAL_GATE": {
25+
"expected_failure_label": "APPROVAL_GATE_LOSS",
26+
"expected_contract_violation": "validation_before_unsafe_action",
27+
"notes": "Remove explicit human approval gate from state capability boundaries.",
28+
},
29+
"SWAP_TOOL_ORDER": {
30+
"expected_failure_label": "TOOL_ORDER_VIOLATION",
31+
"expected_contract_violation": "tool_call_order_preserved",
32+
"notes": "Swap consecutive tool/validation steps in trace to violate deterministic order.",
33+
},
34+
"TRUNCATE_RECOVERY_PATH": {
35+
"expected_failure_label": "RECOVERY_PATH_INVALID",
36+
"expected_contract_violation": "recovery_path_available",
37+
"notes": "Drop the terminal recovery event from trace path.",
38+
},
39+
"REMOVE_DEPENDENCY_EDGE": {
40+
"expected_failure_label": "DEPENDENCY_CHAIN_BREAK",
41+
"expected_contract_violation": "dependency_chain_preserved",
42+
"notes": "Remove a required prerequisite edge in dependency graph.",
43+
},
44+
"INSERT_UNVALIDATED_ACTION": {
45+
"expected_failure_label": "POLICY_ENFORCEMENT_GAP",
46+
"expected_contract_violation": "validation_before_unsafe_action",
47+
"notes": "Insert execute action before validation step in trace ordering.",
48+
},
49+
"COLLAPSE_CAPABILITY_BOUNDARY": {
50+
"expected_failure_label": "CAPABILITY_BOUNDARY_LOSS",
51+
"expected_contract_violation": "capability_boundary_respected",
52+
"notes": "Collapse state capability boundary by removing enforcement link.",
53+
},
54+
}
55+
56+
57+
def _repo_relative(path: Path) -> str:
58+
return path.relative_to(REPO_ROOT).as_posix()
59+
60+
61+
def _load_json(path: Path) -> dict[str, Any]:
62+
try:
63+
payload = json.loads(path.read_text(encoding="utf-8"))
64+
except FileNotFoundError as exc:
65+
raise RuntimeError(f"Required JSON file is missing: {_repo_relative(path)}") from exc
66+
except json.JSONDecodeError as exc:
67+
raise RuntimeError(f"Invalid JSON in {_repo_relative(path)}: {exc}") from exc
68+
69+
if not isinstance(payload, dict):
70+
raise RuntimeError(f"Expected JSON object in {_repo_relative(path)}")
71+
return payload
72+
73+
74+
def _as_list(value: Any, *, field: str) -> list[Any]:
75+
if value is None:
76+
return []
77+
if not isinstance(value, list):
78+
raise RuntimeError(f"Expected list field: {field}")
79+
return value
80+
81+
82+
def _mcp_fixtures() -> list[Path]:
83+
candidates = sorted(FIXTURES_ROOT.glob("mcp_trace_replay_*_v1/original"))
84+
85+
for path in candidates:
86+
missing = [name for name in REQUIRED_FIXTURE_FILES if not (path / name).exists()]
87+
if missing:
88+
raise RuntimeError(
89+
f"Incomplete MCP fixture {_repo_relative(path)}; missing: {', '.join(missing)}"
90+
)
91+
92+
return candidates
93+
94+
95+
def _trace_actions(trace: dict[str, Any]) -> list[str]:
96+
events = _as_list(trace.get("events"), field="trace.events")
97+
return [str(event.get("action", "")) for event in events if isinstance(event, dict)]
98+
99+
100+
def _supports_operator(operator: str, trace: dict[str, Any], graph: dict[str, Any], state: dict[str, Any]) -> bool:
101+
actions = _trace_actions(trace)
102+
edges = _as_list(graph.get("edges"), field="dependency_graph.edges")
103+
boundaries = _as_list(state.get("capability_boundaries"), field="state.capability_boundaries")
104+
105+
if operator == "DROP_APPROVAL_GATE":
106+
return ["human_approval", "execute_external_action"] in boundaries
107+
if operator == "SWAP_TOOL_ORDER":
108+
return "tool_schema_validated" in actions and "read_context" in actions
109+
if operator == "TRUNCATE_RECOVERY_PATH":
110+
return "recovery_path_registered" in actions
111+
if operator == "REMOVE_DEPENDENCY_EDGE":
112+
return any(
113+
isinstance(edge, dict)
114+
and edge.get("source") == "read_context"
115+
and edge.get("target") == "validate_external_action"
116+
for edge in edges
117+
)
118+
if operator == "INSERT_UNVALIDATED_ACTION":
119+
return "validate_external_action" in actions and "execute_external_action" in actions
120+
if operator == "COLLAPSE_CAPABILITY_BOUNDARY":
121+
return ["capability_scope_checked", "validate_external_action"] in boundaries
122+
return False
123+
124+
125+
def generate_mcp_trace_corruption_manifest(output_path: Path = OUTPUT_PATH) -> Path:
126+
entries: list[dict[str, Any]] = []
127+
skipped: list[dict[str, str]] = []
128+
fixtures = _mcp_fixtures()
129+
130+
for original_dir in fixtures:
131+
fixture_root = original_dir.parent
132+
source_fixture = _repo_relative(fixture_root)
133+
trace = _load_json(original_dir / "trace.json")
134+
graph = _load_json(original_dir / "dependency_graph.json")
135+
state = _load_json(original_dir / "state.json")
136+
137+
for operator in OPERATORS:
138+
if not _supports_operator(operator, trace, graph, state):
139+
skipped.append({"source_fixture": source_fixture, "operator": operator})
140+
continue
141+
142+
expected = OPERATOR_EXPECTATIONS[operator]
143+
entries.append(
144+
{
145+
"corruption_id": f"{fixture_root.name}::{operator.lower()}",
146+
"source_fixture": source_fixture,
147+
"operator": operator,
148+
"expected_failure_label": expected["expected_failure_label"],
149+
"expected_contract_violation": expected["expected_contract_violation"],
150+
"deterministic": True,
151+
"notes": expected["notes"],
152+
}
153+
)
154+
155+
entries.sort(key=lambda item: (item["source_fixture"], item["operator"], item["corruption_id"]))
156+
skipped.sort(key=lambda item: (item["source_fixture"], item["operator"]))
157+
158+
payload = {
159+
"manifest_id": "mcp_trace_corruption_manifest_v1",
160+
"version": "1.0",
161+
"allowed_operators": list(OPERATORS),
162+
"corruptions": entries,
163+
"summary": {
164+
"fixture_count": len(fixtures),
165+
"corruption_count": len(entries),
166+
"skipped_operator_count": len(skipped),
167+
"skipped_operators": skipped,
168+
"deterministic_evaluation": True,
169+
"llm_judges": "none",
170+
"external_apis": "none",
171+
},
172+
}
173+
174+
output_path.parent.mkdir(parents=True, exist_ok=True)
175+
output_path.write_text(
176+
json.dumps(payload, indent=2, sort_keys=False, ensure_ascii=False) + "\n",
177+
encoding="utf-8",
178+
)
179+
return output_path
180+
181+
182+
if __name__ == "__main__":
183+
path = generate_mcp_trace_corruption_manifest()
184+
print(path.relative_to(REPO_ROOT).as_posix())

0 commit comments

Comments
 (0)