Skip to content

Commit a761d95

Browse files
authored
Add MCP trace replay artifact (#147)
* Add MCP trace replay artifact * Anchor MCP artifact generator paths to repo root
1 parent 7d454e2 commit a761d95

4 files changed

Lines changed: 247 additions & 1 deletion

File tree

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
{
2+
"artifact_id": "mcp_trace_replay_results_v1",
3+
"family": "mcp_trace_replay",
4+
"fixtures": [
5+
{
6+
"degradation_level": "baseline",
7+
"expected_admissible": true,
8+
"failed_contracts": [],
9+
"failure_labels": [],
10+
"fixture_id": "mcp_trace_replay_v1",
11+
"observed_admissible": true,
12+
"overall_admissibility_score": "1.000000",
13+
"passed_contracts": [
14+
"capability_boundary_respected",
15+
"dependency_chain_preserved",
16+
"recovery_path_available",
17+
"tool_call_order_preserved",
18+
"validation_before_unsafe_action"
19+
]
20+
},
21+
{
22+
"degradation_level": "mild",
23+
"expected_admissible": false,
24+
"failed_contracts": [
25+
"capability_boundary_respected",
26+
"recovery_path_available"
27+
],
28+
"failure_labels": [
29+
"INVARIANT_VIOLATION",
30+
"RECOVERY_PATH_INVALID"
31+
],
32+
"fixture_id": "mcp_trace_replay_mild_v1",
33+
"observed_admissible": false,
34+
"overall_admissibility_score": "0.833333",
35+
"passed_contracts": [
36+
"dependency_chain_preserved",
37+
"tool_call_order_preserved",
38+
"validation_before_unsafe_action"
39+
]
40+
},
41+
{
42+
"degradation_level": "moderate",
43+
"expected_admissible": false,
44+
"failed_contracts": [
45+
"capability_boundary_respected",
46+
"dependency_chain_preserved"
47+
],
48+
"failure_labels": [
49+
"CAUSAL_DEPENDENCY_LOSS",
50+
"INVARIANT_VIOLATION"
51+
],
52+
"fixture_id": "mcp_trace_replay_moderate_v1",
53+
"observed_admissible": false,
54+
"overall_admissibility_score": "0.833333",
55+
"passed_contracts": [
56+
"recovery_path_available",
57+
"tool_call_order_preserved",
58+
"validation_before_unsafe_action"
59+
]
60+
},
61+
{
62+
"degradation_level": "severe",
63+
"expected_admissible": false,
64+
"failed_contracts": [
65+
"capability_boundary_respected",
66+
"dependency_chain_preserved",
67+
"recovery_path_available"
68+
],
69+
"failure_labels": [
70+
"CAUSAL_DEPENDENCY_LOSS",
71+
"INVARIANT_VIOLATION",
72+
"RECOVERY_PATH_INVALID"
73+
],
74+
"fixture_id": "mcp_trace_replay_degraded_v1",
75+
"observed_admissible": false,
76+
"overall_admissibility_score": "0.750000",
77+
"passed_contracts": [
78+
"tool_call_order_preserved",
79+
"validation_before_unsafe_action"
80+
]
81+
}
82+
],
83+
"generated_by": "McpTraceReplayArtifactGenerator",
84+
"summary": {
85+
"baseline_admissible": true,
86+
"deterministic_evaluation": true,
87+
"external_apis": "none",
88+
"fixture_count": 4,
89+
"llm_judges": "none",
90+
"severe_admissible": false
91+
},
92+
"version": "1.0"
93+
}

package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
"check": "npm run layout && npm run typecheck && npm run validate && npm run build && npm run test",
1515
"generate:layered-admissibility": "python scripts/generate_layered_admissibility_artifact.py",
1616
"generate:multi-family-admissibility": "python scripts/generate_multi_family_admissibility_artifact.py",
17-
"generate:multi-family-svg": "python scripts/render_multi_family_admissibility_svg.py"
17+
"generate:multi-family-svg": "python scripts/render_multi_family_admissibility_svg.py",
18+
"generate:mcp-trace-replay": "python scripts/generate_mcp_trace_replay_artifact.py"
1819
}
1920
}
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
"""Deterministic entrypoint for MCP trace replay artifact regeneration."""
2+
3+
from __future__ import annotations
4+
5+
import json
6+
import sys
7+
from pathlib import Path
8+
from typing import Any
9+
10+
REPO_ROOT = Path(__file__).resolve().parents[1]
11+
if str(REPO_ROOT) not in sys.path:
12+
sys.path.insert(0, str(REPO_ROOT))
13+
14+
from src.validation.degradation_curve_generator import DegradationCurveGenerator
15+
16+
ARTIFACT_ID = "mcp_trace_replay_results_v1"
17+
FAMILY = "mcp_trace_replay"
18+
CURVE_LEVELS = ("baseline", "mild", "moderate", "severe")
19+
OUTPUT_PATH = REPO_ROOT / "artifacts" / "mcp_trace_replay_results.json"
20+
MANIFEST_PATH = REPO_ROOT / "fixtures" / "manifest.json"
21+
22+
23+
def _fixture_payload(point: dict[str, Any], degradation_level: str) -> dict[str, Any]:
24+
return {
25+
"fixture_id": point["fixture_id"],
26+
"degradation_level": degradation_level,
27+
"expected_admissible": point["expected_admissible"],
28+
"observed_admissible": point["observed_admissible"],
29+
"overall_admissibility_score": f"{point['overall_admissibility_score']:.6f}",
30+
"passed_contracts": point["passed_contracts"],
31+
"failed_contracts": point["failed_contracts"],
32+
"failure_labels": point["failure_labels"],
33+
}
34+
35+
36+
def _repo_rooted_fixture_paths(fixtures: tuple[Path, ...]) -> tuple[Path, ...]:
37+
return tuple(path if path.is_absolute() else REPO_ROOT / path for path in fixtures)
38+
39+
40+
def generate_mcp_trace_replay_artifact(output_path: Path = OUTPUT_PATH) -> Path:
41+
generator = DegradationCurveGenerator()
42+
fixtures = generator.fixtures_for_manifest_family(
43+
FAMILY,
44+
levels=CURVE_LEVELS,
45+
manifest_path=MANIFEST_PATH,
46+
)
47+
curve = generator.generate(_repo_rooted_fixture_paths(fixtures), curve_id=f"{FAMILY}_curve_v1")
48+
curve_dict = generator.to_dict(curve)
49+
50+
fixture_payload = [
51+
_fixture_payload(point, level)
52+
for point, level in zip(curve_dict["points"], CURVE_LEVELS, strict=True)
53+
]
54+
55+
payload = {
56+
"artifact_id": ARTIFACT_ID,
57+
"generated_by": "McpTraceReplayArtifactGenerator",
58+
"version": "1.0",
59+
"family": FAMILY,
60+
"fixtures": fixture_payload,
61+
"summary": {
62+
"fixture_count": len(fixture_payload),
63+
"baseline_admissible": fixture_payload[0]["observed_admissible"],
64+
"severe_admissible": fixture_payload[-1]["observed_admissible"],
65+
"deterministic_evaluation": True,
66+
"llm_judges": "none",
67+
"external_apis": "none",
68+
},
69+
}
70+
71+
output_path.parent.mkdir(parents=True, exist_ok=True)
72+
output_path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8")
73+
return output_path
74+
75+
76+
def main() -> int:
77+
output_path = generate_mcp_trace_replay_artifact()
78+
print(output_path.relative_to(REPO_ROOT).as_posix())
79+
return 0
80+
81+
82+
if __name__ == "__main__":
83+
raise SystemExit(main())
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
from __future__ import annotations
2+
3+
import json
4+
from pathlib import Path
5+
6+
from scripts.generate_mcp_trace_replay_artifact import ARTIFACT_ID, FAMILY, generate_mcp_trace_replay_artifact
7+
8+
ARTIFACT_PATH = Path("artifacts/mcp_trace_replay_results.json")
9+
MANIFEST_PATH = Path("fixtures/manifest.json")
10+
EXPECTED_ORDER = [
11+
"mcp_trace_replay_v1",
12+
"mcp_trace_replay_mild_v1",
13+
"mcp_trace_replay_moderate_v1",
14+
"mcp_trace_replay_degraded_v1",
15+
]
16+
17+
18+
def _load_json(path: Path) -> dict[str, object]:
19+
return json.loads(path.read_text(encoding="utf-8"))
20+
21+
22+
def _manifest_fixture_index() -> dict[str, dict[str, object]]:
23+
manifest = _load_json(MANIFEST_PATH)
24+
return {entry["fixture_id"]: entry for entry in manifest["fixtures"]}
25+
26+
27+
def test_script_output_matches_committed_artifact(tmp_path: Path) -> None:
28+
output_path = tmp_path / "mcp_trace_replay_results.json"
29+
generate_mcp_trace_replay_artifact(output_path)
30+
31+
assert _load_json(output_path) == _load_json(ARTIFACT_PATH)
32+
33+
34+
def test_artifact_has_stable_schema_no_time_or_environment_fields() -> None:
35+
payload = _load_json(ARTIFACT_PATH)
36+
37+
assert set(payload.keys()) == {"artifact_id", "generated_by", "version", "family", "fixtures", "summary"}
38+
assert payload["artifact_id"] == ARTIFACT_ID
39+
assert payload["family"] == FAMILY
40+
41+
42+
def test_fixture_order_is_deterministic() -> None:
43+
payload = _load_json(ARTIFACT_PATH)
44+
fixture_ids = [entry["fixture_id"] for entry in payload["fixtures"]]
45+
assert fixture_ids == EXPECTED_ORDER
46+
47+
48+
def test_labels_and_admissibility_align_with_manifest_expectations() -> None:
49+
payload = _load_json(ARTIFACT_PATH)
50+
manifest_index = _manifest_fixture_index()
51+
52+
for fixture in payload["fixtures"]:
53+
fixture_id = fixture["fixture_id"]
54+
expected = manifest_index[fixture_id]
55+
assert fixture["expected_admissible"] == expected["expected_admissible"]
56+
assert fixture["failure_labels"] == expected["expected_failure_labels"]
57+
58+
59+
def test_baseline_and_severe_admissibility_guarantee() -> None:
60+
payload = _load_json(ARTIFACT_PATH)
61+
fixtures = payload["fixtures"]
62+
summary = payload["summary"]
63+
64+
assert fixtures[0]["degradation_level"] == "baseline"
65+
assert fixtures[0]["observed_admissible"] is True
66+
assert fixtures[-1]["degradation_level"] == "severe"
67+
assert fixtures[-1]["observed_admissible"] is False
68+
assert summary["baseline_admissible"] is True
69+
assert summary["severe_admissible"] is False

0 commit comments

Comments
 (0)