From ead595183a1987da9dde60b226740c6364842eb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20K=C3=B6lnberger?= <159939812+ProfRandom92@users.noreply.github.com> Date: Wed, 20 May 2026 03:06:44 -0700 Subject: [PATCH 1/2] Add MCP trace replay artifact --- artifacts/mcp_trace_replay_results.json | 93 +++++++++++++++++++ package.json | 3 +- scripts/generate_mcp_trace_replay_artifact.py | 74 +++++++++++++++ tests/test_mcp_trace_replay_artifact.py | 69 ++++++++++++++ 4 files changed, 238 insertions(+), 1 deletion(-) create mode 100644 artifacts/mcp_trace_replay_results.json create mode 100644 scripts/generate_mcp_trace_replay_artifact.py create mode 100644 tests/test_mcp_trace_replay_artifact.py diff --git a/artifacts/mcp_trace_replay_results.json b/artifacts/mcp_trace_replay_results.json new file mode 100644 index 0000000..59afd49 --- /dev/null +++ b/artifacts/mcp_trace_replay_results.json @@ -0,0 +1,93 @@ +{ + "artifact_id": "mcp_trace_replay_results_v1", + "family": "mcp_trace_replay", + "fixtures": [ + { + "degradation_level": "baseline", + "expected_admissible": true, + "failed_contracts": [], + "failure_labels": [], + "fixture_id": "mcp_trace_replay_v1", + "observed_admissible": true, + "overall_admissibility_score": "1.000000", + "passed_contracts": [ + "capability_boundary_respected", + "dependency_chain_preserved", + "recovery_path_available", + "tool_call_order_preserved", + "validation_before_unsafe_action" + ] + }, + { + "degradation_level": "mild", + "expected_admissible": false, + "failed_contracts": [ + "capability_boundary_respected", + "recovery_path_available" + ], + "failure_labels": [ + "INVARIANT_VIOLATION", + "RECOVERY_PATH_INVALID" + ], + "fixture_id": "mcp_trace_replay_mild_v1", + "observed_admissible": false, + "overall_admissibility_score": "0.833333", + "passed_contracts": [ + "dependency_chain_preserved", + "tool_call_order_preserved", + "validation_before_unsafe_action" + ] + }, + { + "degradation_level": "moderate", + "expected_admissible": false, + "failed_contracts": [ + "capability_boundary_respected", + "dependency_chain_preserved" + ], + "failure_labels": [ + "CAUSAL_DEPENDENCY_LOSS", + "INVARIANT_VIOLATION" + ], + "fixture_id": "mcp_trace_replay_moderate_v1", + "observed_admissible": false, + "overall_admissibility_score": "0.833333", + "passed_contracts": [ + "recovery_path_available", + "tool_call_order_preserved", + "validation_before_unsafe_action" + ] + }, + { + "degradation_level": "severe", + "expected_admissible": false, + "failed_contracts": [ + "capability_boundary_respected", + "dependency_chain_preserved", + "recovery_path_available" + ], + "failure_labels": [ + "CAUSAL_DEPENDENCY_LOSS", + "INVARIANT_VIOLATION", + "RECOVERY_PATH_INVALID" + ], + "fixture_id": "mcp_trace_replay_degraded_v1", + "observed_admissible": false, + "overall_admissibility_score": "0.750000", + "passed_contracts": [ + "tool_call_order_preserved", + "validation_before_unsafe_action" + ] + } + ], + "generated_by": "McpTraceReplayArtifactGenerator", + "summary": { + "baseline_admissible": true, + "deterministic_evaluation": true, + "external_apis": "none", + "fixture_count": 4, + "llm_judges": "none", + "severe_admissible": false + }, + "version": "1.0" +} diff --git a/package.json b/package.json index 25894f4..0607cf9 100644 --- a/package.json +++ b/package.json @@ -14,6 +14,7 @@ "check": "npm run layout && npm run typecheck && npm run validate && npm run build && npm run test", "generate:layered-admissibility": "python scripts/generate_layered_admissibility_artifact.py", "generate:multi-family-admissibility": "python scripts/generate_multi_family_admissibility_artifact.py", - "generate:multi-family-svg": "python scripts/render_multi_family_admissibility_svg.py" + "generate:multi-family-svg": "python scripts/render_multi_family_admissibility_svg.py", + "generate:mcp-trace-replay": "python scripts/generate_mcp_trace_replay_artifact.py" } } diff --git a/scripts/generate_mcp_trace_replay_artifact.py b/scripts/generate_mcp_trace_replay_artifact.py new file mode 100644 index 0000000..e9adf27 --- /dev/null +++ b/scripts/generate_mcp_trace_replay_artifact.py @@ -0,0 +1,74 @@ +"""Deterministic entrypoint for MCP trace replay artifact regeneration.""" + +from __future__ import annotations + +import json +import sys +from pathlib import Path +from typing import Any + +REPO_ROOT = Path(__file__).resolve().parents[1] +if str(REPO_ROOT) not in sys.path: + sys.path.insert(0, str(REPO_ROOT)) + +from src.validation.degradation_curve_generator import DegradationCurveGenerator + +ARTIFACT_ID = "mcp_trace_replay_results_v1" +FAMILY = "mcp_trace_replay" +CURVE_LEVELS = ("baseline", "mild", "moderate", "severe") +OUTPUT_PATH = Path("artifacts/mcp_trace_replay_results.json") + + +def _fixture_payload(point: dict[str, Any], degradation_level: str) -> dict[str, Any]: + return { + "fixture_id": point["fixture_id"], + "degradation_level": degradation_level, + "expected_admissible": point["expected_admissible"], + "observed_admissible": point["observed_admissible"], + "overall_admissibility_score": f"{point['overall_admissibility_score']:.6f}", + "passed_contracts": point["passed_contracts"], + "failed_contracts": point["failed_contracts"], + "failure_labels": point["failure_labels"], + } + + +def generate_mcp_trace_replay_artifact(output_path: Path = OUTPUT_PATH) -> Path: + generator = DegradationCurveGenerator() + fixtures = generator.fixtures_for_manifest_family(FAMILY, levels=CURVE_LEVELS) + curve = generator.generate(fixtures, curve_id=f"{FAMILY}_curve_v1") + curve_dict = generator.to_dict(curve) + + fixture_payload = [ + _fixture_payload(point, level) + for point, level in zip(curve_dict["points"], CURVE_LEVELS, strict=True) + ] + + payload = { + "artifact_id": ARTIFACT_ID, + "generated_by": "McpTraceReplayArtifactGenerator", + "version": "1.0", + "family": FAMILY, + "fixtures": fixture_payload, + "summary": { + "fixture_count": len(fixture_payload), + "baseline_admissible": fixture_payload[0]["observed_admissible"], + "severe_admissible": fixture_payload[-1]["observed_admissible"], + "deterministic_evaluation": True, + "llm_judges": "none", + "external_apis": "none", + }, + } + + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8") + return output_path + + +def main() -> int: + output_path = generate_mcp_trace_replay_artifact() + print(output_path.as_posix()) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tests/test_mcp_trace_replay_artifact.py b/tests/test_mcp_trace_replay_artifact.py new file mode 100644 index 0000000..5d99481 --- /dev/null +++ b/tests/test_mcp_trace_replay_artifact.py @@ -0,0 +1,69 @@ +from __future__ import annotations + +import json +from pathlib import Path + +from scripts.generate_mcp_trace_replay_artifact import ARTIFACT_ID, FAMILY, generate_mcp_trace_replay_artifact + +ARTIFACT_PATH = Path("artifacts/mcp_trace_replay_results.json") +MANIFEST_PATH = Path("fixtures/manifest.json") +EXPECTED_ORDER = [ + "mcp_trace_replay_v1", + "mcp_trace_replay_mild_v1", + "mcp_trace_replay_moderate_v1", + "mcp_trace_replay_degraded_v1", +] + + +def _load_json(path: Path) -> dict[str, object]: + return json.loads(path.read_text(encoding="utf-8")) + + +def _manifest_fixture_index() -> dict[str, dict[str, object]]: + manifest = _load_json(MANIFEST_PATH) + return {entry["fixture_id"]: entry for entry in manifest["fixtures"]} + + +def test_script_output_matches_committed_artifact(tmp_path: Path) -> None: + output_path = tmp_path / "mcp_trace_replay_results.json" + generate_mcp_trace_replay_artifact(output_path) + + assert _load_json(output_path) == _load_json(ARTIFACT_PATH) + + +def test_artifact_has_stable_schema_no_time_or_environment_fields() -> None: + payload = _load_json(ARTIFACT_PATH) + + assert set(payload.keys()) == {"artifact_id", "generated_by", "version", "family", "fixtures", "summary"} + assert payload["artifact_id"] == ARTIFACT_ID + assert payload["family"] == FAMILY + + +def test_fixture_order_is_deterministic() -> None: + payload = _load_json(ARTIFACT_PATH) + fixture_ids = [entry["fixture_id"] for entry in payload["fixtures"]] + assert fixture_ids == EXPECTED_ORDER + + +def test_labels_and_admissibility_align_with_manifest_expectations() -> None: + payload = _load_json(ARTIFACT_PATH) + manifest_index = _manifest_fixture_index() + + for fixture in payload["fixtures"]: + fixture_id = fixture["fixture_id"] + expected = manifest_index[fixture_id] + assert fixture["expected_admissible"] == expected["expected_admissible"] + assert fixture["failure_labels"] == expected["expected_failure_labels"] + + +def test_baseline_and_severe_admissibility_guarantee() -> None: + payload = _load_json(ARTIFACT_PATH) + fixtures = payload["fixtures"] + summary = payload["summary"] + + assert fixtures[0]["degradation_level"] == "baseline" + assert fixtures[0]["observed_admissible"] is True + assert fixtures[-1]["degradation_level"] == "severe" + assert fixtures[-1]["observed_admissible"] is False + assert summary["baseline_admissible"] is True + assert summary["severe_admissible"] is False From 0dc692461c281a46dccb812e96f7a451f50c8913 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20K=C3=B6lnberger?= <159939812+ProfRandom92@users.noreply.github.com> Date: Wed, 20 May 2026 03:27:27 -0700 Subject: [PATCH 2/2] Anchor MCP artifact generator paths to repo root --- scripts/generate_mcp_trace_replay_artifact.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/scripts/generate_mcp_trace_replay_artifact.py b/scripts/generate_mcp_trace_replay_artifact.py index e9adf27..3cd5ec7 100644 --- a/scripts/generate_mcp_trace_replay_artifact.py +++ b/scripts/generate_mcp_trace_replay_artifact.py @@ -16,7 +16,8 @@ ARTIFACT_ID = "mcp_trace_replay_results_v1" FAMILY = "mcp_trace_replay" CURVE_LEVELS = ("baseline", "mild", "moderate", "severe") -OUTPUT_PATH = Path("artifacts/mcp_trace_replay_results.json") +OUTPUT_PATH = REPO_ROOT / "artifacts" / "mcp_trace_replay_results.json" +MANIFEST_PATH = REPO_ROOT / "fixtures" / "manifest.json" def _fixture_payload(point: dict[str, Any], degradation_level: str) -> dict[str, Any]: @@ -32,10 +33,18 @@ def _fixture_payload(point: dict[str, Any], degradation_level: str) -> dict[str, } +def _repo_rooted_fixture_paths(fixtures: tuple[Path, ...]) -> tuple[Path, ...]: + return tuple(path if path.is_absolute() else REPO_ROOT / path for path in fixtures) + + def generate_mcp_trace_replay_artifact(output_path: Path = OUTPUT_PATH) -> Path: generator = DegradationCurveGenerator() - fixtures = generator.fixtures_for_manifest_family(FAMILY, levels=CURVE_LEVELS) - curve = generator.generate(fixtures, curve_id=f"{FAMILY}_curve_v1") + fixtures = generator.fixtures_for_manifest_family( + FAMILY, + levels=CURVE_LEVELS, + manifest_path=MANIFEST_PATH, + ) + curve = generator.generate(_repo_rooted_fixture_paths(fixtures), curve_id=f"{FAMILY}_curve_v1") curve_dict = generator.to_dict(curve) fixture_payload = [ @@ -66,7 +75,7 @@ def generate_mcp_trace_replay_artifact(output_path: Path = OUTPUT_PATH) -> Path: def main() -> int: output_path = generate_mcp_trace_replay_artifact() - print(output_path.as_posix()) + print(output_path.relative_to(REPO_ROOT).as_posix()) return 0