From 03c18bd48280e3209c72284e2dd3dd4902c03a7e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alexander=20K=C3=B6lnberger?=
 <159939812+ProfRandom92@users.noreply.github.com>
Date: Wed, 20 May 2026 03:43:19 -0700
Subject: [PATCH 1/3] Add replay semantic integrity artifact

---
 .../replay_semantic_integrity_results.json    | 292 ++++++++++++++++++
 package.json                                  |   3 +-
 ...rate_replay_semantic_integrity_artifact.py | 154 +++++++++
 ...test_replay_semantic_integrity_artifact.py | 114 +++++++
 4 files changed, 562 insertions(+), 1 deletion(-)
 create mode 100644 artifacts/replay_semantic_integrity_results.json
 create mode 100644 scripts/generate_replay_semantic_integrity_artifact.py
 create mode 100644 tests/test_replay_semantic_integrity_artifact.py

diff --git a/artifacts/replay_semantic_integrity_results.json b/artifacts/replay_semantic_integrity_results.json
new file mode 100644
index 0000000..abeb339
--- /dev/null
+++ b/artifacts/replay_semantic_integrity_results.json
@@ -0,0 +1,292 @@
+{
+  "artifact_id": "replay_semantic_integrity_results_v1",
+  "generated_by": "ReplaySemanticIntegrityArtifactGenerator",
+  "version": "1.0",
+  "evaluation_mode": "deterministic",
+  "llm_judges": "none",
+  "external_apis": "none",
+  "families": [
+    {
+      "family": "coding_workflow_pr_review",
+      "fixture_count": 4,
+      "levels": [
+        "baseline",
+        "mild",
+        "moderate",
+        "severe"
+      ],
+      "commitment_classes": {
+        "evidence": {
+          "passed": 0,
+          "failed": 0,
+          "failure_labels": []
+        },
+        "constraints": {
+          "passed": 3,
+          "failed": 1,
+          "failure_labels": [
+            "CAUSAL_DEPENDENCY_LOSS",
+            "INVARIANT_VIOLATION",
+            "POLICY_ORDER_BROKEN",
+            "RECOVERY_PATH_INVALID"
+          ]
+        },
+        "dependencies": {
+          "passed": 2,
+          "failed": 2,
+          "failure_labels": [
+            "CAUSAL_DEPENDENCY_LOSS",
+            "INVARIANT_VIOLATION",
+            "POLICY_ORDER_BROKEN",
+            "RECOVERY_PATH_INVALID"
+          ]
+        },
+        "recovery_paths": {
+          "passed": 1,
+          "failed": 3,
+          "failure_labels": [
+            "CAUSAL_DEPENDENCY_LOSS",
+            "INVARIANT_VIOLATION",
+            "POLICY_ORDER_BROKEN",
+            "RECOVERY_PATH_INVALID"
+          ]
+        },
+        "tool_order": {
+          "passed": 0,
+          "failed": 0,
+          "failure_labels": []
+        },
+        "capability_boundaries": {
+          "passed": 0,
+          "failed": 0,
+          "failure_labels": []
+        },
+        "governance_or_policy": {
+          "passed": 0,
+          "failed": 0,
+          "failure_labels": []
+        },
+        "invariants": {
+          "passed": 3,
+          "failed": 1,
+          "failure_labels": [
+            "CAUSAL_DEPENDENCY_LOSS",
+            "INVARIANT_VIOLATION",
+            "POLICY_ORDER_BROKEN",
+            "RECOVERY_PATH_INVALID"
+          ]
+        }
+      }
+    },
+    {
+      "family": "incident_response_page_triage",
+      "fixture_count": 4,
+      "levels": [
+        "baseline",
+        "mild",
+        "moderate",
+        "severe"
+      ],
+      "commitment_classes": {
+        "evidence": {
+          "passed": 0,
+          "failed": 0,
+          "failure_labels": []
+        },
+        "constraints": {
+          "passed": 6,
+          "failed": 2,
+          "failure_labels": [
+            "CAUSAL_DEPENDENCY_LOSS",
+            "INVARIANT_VIOLATION",
+            "POLICY_ORDER_BROKEN",
+            "RECOVERY_PATH_INVALID"
+          ]
+        },
+        "dependencies": {
+          "passed": 0,
+          "failed": 0,
+          "failure_labels": []
+        },
+        "recovery_paths": {
+          "passed": 1,
+          "failed": 3,
+          "failure_labels": [
+            "CAUSAL_DEPENDENCY_LOSS",
+            "INVARIANT_VIOLATION",
+            "POLICY_ORDER_BROKEN",
+            "RECOVERY_PATH_INVALID"
+          ]
+        },
+        "tool_order": {
+          "passed": 0,
+          "failed": 0,
+          "failure_labels": []
+        },
+        "capability_boundaries": {
+          "passed": 0,
+          "failed": 0,
+          "failure_labels": []
+        },
+        "governance_or_policy": {
+          "passed": 0,
+          "failed": 0,
+          "failure_labels": []
+        },
+        "invariants": {
+          "passed": 2,
+          "failed": 2,
+          "failure_labels": [
+            "CAUSAL_DEPENDENCY_LOSS",
+            "INVARIANT_VIOLATION",
+            "POLICY_ORDER_BROKEN",
+            "RECOVERY_PATH_INVALID"
+          ]
+        }
+      }
+    },
+    {
+      "family": "cross_domain_operational_dependency_workflow",
+      "fixture_count": 4,
+      "levels": [
+        "baseline",
+        "mild",
+        "moderate",
+        "severe"
+      ],
+      "commitment_classes": {
+        "evidence": {
+          "passed": 0,
+          "failed": 0,
+          "failure_labels": []
+        },
+        "constraints": {
+          "passed": 0,
+          "failed": 0,
+          "failure_labels": []
+        },
+        "dependencies": {
+          "passed": 2,
+          "failed": 2,
+          "failure_labels": [
+            "CAUSAL_DEPENDENCY_LOSS",
+            "INVARIANT_VIOLATION",
+            "POLICY_ORDER_BROKEN",
+            "RECOVERY_PATH_INVALID"
+          ]
+        },
+        "recovery_paths": {
+          "passed": 1,
+          "failed": 3,
+          "failure_labels": [
+            "CAUSAL_DEPENDENCY_LOSS",
+            "INVARIANT_VIOLATION",
+            "POLICY_ORDER_BROKEN",
+            "RECOVERY_PATH_INVALID"
+          ]
+        },
+        "tool_order": {
+          "passed": 3,
+          "failed": 1,
+          "failure_labels": [
+            "CAUSAL_DEPENDENCY_LOSS",
+            "INVARIANT_VIOLATION",
+            "POLICY_ORDER_BROKEN",
+            "RECOVERY_PATH_INVALID"
+          ]
+        },
+        "capability_boundaries": {
+          "passed": 0,
+          "failed": 0,
+          "failure_labels": []
+        },
+        "governance_or_policy": {
+          "passed": 0,
+          "failed": 0,
+          "failure_labels": []
+        },
+        "invariants": {
+          "passed": 1,
+          "failed": 3,
+          "failure_labels": [
+            "CAUSAL_DEPENDENCY_LOSS",
+            "INVARIANT_VIOLATION",
+            "POLICY_ORDER_BROKEN",
+            "RECOVERY_PATH_INVALID"
+          ]
+        }
+      }
+    },
+    {
+      "family": "mcp_trace_replay",
+      "fixture_count": 4,
+      "levels": [
+        "baseline",
+        "mild",
+        "moderate",
+        "severe"
+      ],
+      "commitment_classes": {
+        "evidence": {
+          "passed": 0,
+          "failed": 0,
+          "failure_labels": []
+        },
+        "constraints": {
+          "passed": 4,
+          "failed": 0,
+          "failure_labels": []
+        },
+        "dependencies": {
+          "passed": 2,
+          "failed": 2,
+          "failure_labels": [
+            "CAUSAL_DEPENDENCY_LOSS",
+            "INVARIANT_VIOLATION",
+            "RECOVERY_PATH_INVALID"
+          ]
+        },
+        "recovery_paths": {
+          "passed": 2,
+          "failed": 2,
+          "failure_labels": [
+            "CAUSAL_DEPENDENCY_LOSS",
+            "INVARIANT_VIOLATION",
+            "RECOVERY_PATH_INVALID"
+          ]
+        },
+        "tool_order": {
+          "passed": 4,
+          "failed": 0,
+          "failure_labels": []
+        },
+        "capability_boundaries": {
+          "passed": 1,
+          "failed": 3,
+          "failure_labels": [
+            "CAUSAL_DEPENDENCY_LOSS",
+            "INVARIANT_VIOLATION",
+            "RECOVERY_PATH_INVALID"
+          ]
+        },
+        "governance_or_policy": {
+          "passed": 0,
+          "failed": 0,
+          "failure_labels": []
+        },
+        "invariants": {
+          "passed": 0,
+          "failed": 0,
+          "failure_labels": []
+        }
+      }
+    }
+  ],
+  "global_summary": {
+    "family_count": 4,
+    "fixture_count": 16,
+    "deterministic_evaluation": true,
+    "llm_judges": "none",
+    "external_apis": "none"
+  }
+}
diff --git a/package.json b/package.json
index 0607cf9..1a12cb3 100644
--- a/package.json
+++ b/package.json
@@ -15,6 +15,7 @@
     "generate:layered-admissibility": "python scripts/generate_layered_admissibility_artifact.py",
     "generate:multi-family-admissibility": "python scripts/generate_multi_family_admissibility_artifact.py",
     "generate:multi-family-svg": "python scripts/render_multi_family_admissibility_svg.py",
-    "generate:mcp-trace-replay": "python scripts/generate_mcp_trace_replay_artifact.py"
+    "generate:mcp-trace-replay": "python scripts/generate_mcp_trace_replay_artifact.py",
+    "generate:replay-semantic-integrity": "python scripts/generate_replay_semantic_integrity_artifact.py"
   }
 }
diff --git a/scripts/generate_replay_semantic_integrity_artifact.py b/scripts/generate_replay_semantic_integrity_artifact.py
new file mode 100644
index 0000000..cf78362
--- /dev/null
+++ b/scripts/generate_replay_semantic_integrity_artifact.py
@@ -0,0 +1,154 @@
+"""Deterministic entrypoint for replay semantic integrity artifact regeneration."""
+
+from __future__ import annotations
+
+import json
+from collections import OrderedDict
+from pathlib import Path
+
+REPO_ROOT = Path(__file__).resolve().parents[1]
+MANIFEST_PATH = REPO_ROOT / "fixtures" / "manifest.json"
+MULTI_FAMILY_PATH = REPO_ROOT / "artifacts" / "multi_family_admissibility_results.json"
+OUTPUT_PATH = REPO_ROOT / "artifacts" / "replay_semantic_integrity_results.json"
+
+ARTIFACT_ID = "replay_semantic_integrity_results_v1"
+LEVELS = ("baseline", "mild", "moderate", "severe")
+COMMITMENT_CLASS_ORDER = (
+    "evidence",
+    "constraints",
+    "dependencies",
+    "recovery_paths",
+    "tool_order",
+    "capability_boundaries",
+    "governance_or_policy",
+    "invariants",
+)
+
+
+def _load_json(path: Path) -> dict[str, object]:
+    return json.loads(path.read_text(encoding="utf-8"))
+
+
+def _family_order_and_counts() -> tuple[list[str], dict[str, int], dict[str, str]]:
+    manifest = _load_json(MANIFEST_PATH)
+    fixtures = manifest["fixtures"]
+    family_order: list[str] = []
+    fixture_counts: dict[str, int] = {}
+    fixture_levels: dict[str, str] = {}
+
+    for entry in fixtures:
+        family = entry["family"]
+        fixture_id = entry["fixture_id"]
+        level = entry["degradation_level"]
+        if family not in fixture_counts:
+            family_order.append(family)
+            fixture_counts[family] = 0
+        fixture_counts[family] += 1
+        fixture_levels[fixture_id] = level
+
+    return family_order, fixture_counts, fixture_levels
+
+
+def _class_for_contract(contract_id: str) -> str:
+    contract = contract_id.lower()
+
+    if any(token in contract for token in ("evidence",)):
+        return "evidence"
+    if any(token in contract for token in ("constraint", "validation")):
+        return "constraints"
+    if any(token in contract for token in ("dependency", "causal", "chain")):
+        return "dependencies"
+    if any(token in contract for token in ("recovery", "rollback", "escalation")):
+        return "recovery_paths"
+    if any(token in contract for token in ("order", "ordering", "sequence", "tool_call_order")):
+        return "tool_order"
+    if any(token in contract for token in ("capability", "boundary")):
+        return "capability_boundaries"
+    if any(token in contract for token in ("policy", "governance", "approval")):
+        return "governance_or_policy"
+    if any(token in contract for token in ("invariant", "orphan")):
+        return "invariants"
+
+    return "constraints"
+
+
+def generate_replay_semantic_integrity_artifact(output_path: Path = OUTPUT_PATH) -> Path:
+    multi_family = _load_json(MULTI_FAMILY_PATH)
+    family_order, fixture_counts, fixture_levels = _family_order_and_counts()
+    family_curves = {entry["family"]: entry["curve"] for entry in multi_family["families"]}
+
+    families_payload: list[dict[str, object]] = []
+    total_fixture_count = 0
+
+    for family in family_order:
+        curve = family_curves[family]
+        points = sorted(curve["points"], key=lambda point: LEVELS.index(fixture_levels[point["fixture_id"]]))
+
+        commitment_classes: OrderedDict[str, dict[str, object]] = OrderedDict()
+        for commitment_class in COMMITMENT_CLASS_ORDER:
+            commitment_classes[commitment_class] = {
+                "passed": 0,
+                "failed": 0,
+                "failure_labels": set(),
+            }
+
+        for point in points:
+            failed_contracts = set(point["failed_contracts"])
+            for contract_id in point["passed_contracts"] + point["failed_contracts"]:
+                commitment_class = _class_for_contract(contract_id)
+                if contract_id in failed_contracts:
+                    commitment_classes[commitment_class]["failed"] += 1
+                    for failure_label in point["failure_labels"]:
+                        commitment_classes[commitment_class]["failure_labels"].add(failure_label)
+                else:
+                    commitment_classes[commitment_class]["passed"] += 1
+
+        serializable_classes: OrderedDict[str, dict[str, object]] = OrderedDict()
+        for commitment_class in COMMITMENT_CLASS_ORDER:
+            values = commitment_classes[commitment_class]
+            serializable_classes[commitment_class] = {
+                "passed": values["passed"],
+                "failed": values["failed"],
+                "failure_labels": sorted(values["failure_labels"]),
+            }
+
+        families_payload.append(
+            {
+                "family": family,
+                "fixture_count": fixture_counts[family],
+                "levels": list(LEVELS),
+                "commitment_classes": serializable_classes,
+            }
+        )
+        total_fixture_count += fixture_counts[family]
+
+    payload = {
+        "artifact_id": ARTIFACT_ID,
+        "generated_by": "ReplaySemanticIntegrityArtifactGenerator",
+        "version": "1.0",
+        "evaluation_mode": "deterministic",
+        "llm_judges": "none",
+        "external_apis": "none",
+        "families": families_payload,
+        "global_summary": {
+            "family_count": len(families_payload),
+            "fixture_count": total_fixture_count,
+            "deterministic_evaluation": True,
+            "llm_judges": "none",
+            "external_apis": "none",
+        },
+    }
+
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    output_path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
+    return output_path
+
+
+def main() -> int:
+    output_path = generate_replay_semantic_integrity_artifact()
+    print(output_path.relative_to(REPO_ROOT).as_posix())
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/tests/test_replay_semantic_integrity_artifact.py b/tests/test_replay_semantic_integrity_artifact.py
new file mode 100644
index 0000000..719dcea
--- /dev/null
+++ b/tests/test_replay_semantic_integrity_artifact.py
@@ -0,0 +1,114 @@
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+from scripts.generate_replay_semantic_integrity_artifact import (
+    ARTIFACT_ID,
+    COMMITMENT_CLASS_ORDER,
+    LEVELS,
+    generate_replay_semantic_integrity_artifact,
+)
+from src.validation.failure_taxonomy import FAILURE_TAXONOMY
+
+ARTIFACT_PATH = Path("artifacts/replay_semantic_integrity_results.json")
+MANIFEST_PATH = Path("fixtures/manifest.json")
+EXPECTED_FAMILIES = [
+    "coding_workflow_pr_review",
+    "incident_response_page_triage",
+    "cross_domain_operational_dependency_workflow",
+    "mcp_trace_replay",
+]
+FORBIDDEN_FIELDS = {
+    "timestamp",
+    "generated_at",
+    "environment",
+    "hostname",
+    "cwd",
+    "machine",
+    "semantic_similarity",
+    "embedding",
+    "llm",
+    "judge",
+}
+
+
+def _load_json(path: Path) -> dict[str, object]:
+    return json.loads(path.read_text(encoding="utf-8"))
+
+
+def _walk_keys(value: object) -> set[str]:
+    keys: set[str] = set()
+    if isinstance(value, dict):
+        keys.update(value.keys())
+        for nested in value.values():
+            keys.update(_walk_keys(nested))
+    elif isinstance(value, list):
+        for nested in value:
+            keys.update(_walk_keys(nested))
+    return keys
+
+
+def test_script_output_matches_committed_artifact(tmp_path: Path) -> None:
+    output_path = tmp_path / "replay_semantic_integrity_results.json"
+    generate_replay_semantic_integrity_artifact(output_path)
+
+    assert _load_json(output_path) == _load_json(ARTIFACT_PATH)
+
+
+def test_artifact_schema_has_no_time_or_environment_fields() -> None:
+    payload = _load_json(ARTIFACT_PATH)
+    assert payload["artifact_id"] == ARTIFACT_ID
+
+    all_keys = _walk_keys(payload)
+    for forbidden in FORBIDDEN_FIELDS:
+        assert forbidden not in all_keys
+
+
+def test_all_required_families_are_represented_in_manifest_order() -> None:
+    payload = _load_json(ARTIFACT_PATH)
+    families = [entry["family"] for entry in payload["families"]]
+    assert families == EXPECTED_FAMILIES
+
+
+def test_fixture_count_matches_manifest_and_levels_are_deterministic() -> None:
+    payload = _load_json(ARTIFACT_PATH)
+    manifest = _load_json(MANIFEST_PATH)
+
+    expected_fixture_count = len(manifest["fixtures"])
+    assert payload["global_summary"]["fixture_count"] == expected_fixture_count
+
+    for family_payload in payload["families"]:
+        assert family_payload["fixture_count"] == 4
+        assert family_payload["levels"] == list(LEVELS)
+
+
+def test_commitment_class_order_is_stable_and_complete() -> None:
+    payload = _load_json(ARTIFACT_PATH)
+
+    for family_payload in payload["families"]:
+        class_keys = list(family_payload["commitment_classes"].keys())
+        assert class_keys == list(COMMITMENT_CLASS_ORDER)
+
+
+def test_failure_labels_are_registered_and_sorted() -> None:
+    payload = _load_json(ARTIFACT_PATH)
+    registered_labels = set(FAILURE_TAXONOMY.keys())
+
+    for family_payload in payload["families"]:
+        for class_payload in family_payload["commitment_classes"].values():
+            labels = class_payload["failure_labels"]
+            assert labels == sorted(labels)
+            for label in labels:
+                assert label in registered_labels
+
+
+def test_artifact_declares_deterministic_mode_and_no_external_evaluators() -> None:
+    payload = _load_json(ARTIFACT_PATH)
+
+    assert payload["evaluation_mode"] == "deterministic"
+    assert payload["llm_judges"] == "none"
+    assert payload["external_apis"] == "none"
+    assert payload["global_summary"]["deterministic_evaluation"] is True
+    assert payload["global_summary"]["llm_judges"] == "none"
+    assert payload["global_summary"]["external_apis"] == "none"

From 568a85480ec45cc0c4da5df38563a03697b238e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alexander=20K=C3=B6lnberger?=
 <159939812+ProfRandom92@users.noreply.github.com>
Date: Wed, 20 May 2026 04:01:40 -0700
Subject: [PATCH 2/3] Fix contract-linked replay semantic integrity labels

---
 .../replay_semantic_integrity_results.json    | 107 ++++++------------
 ...rate_replay_semantic_integrity_artifact.py |  86 +++++++++-----
 ...test_replay_semantic_integrity_artifact.py |  74 ++++++++++++
 3 files changed, 166 insertions(+), 101 deletions(-)

diff --git a/artifacts/replay_semantic_integrity_results.json b/artifacts/replay_semantic_integrity_results.json
index abeb339..567be58 100644
--- a/artifacts/replay_semantic_integrity_results.json
+++ b/artifacts/replay_semantic_integrity_results.json
@@ -22,39 +22,30 @@
           "failure_labels": []
         },
         "constraints": {
-          "passed": 3,
-          "failed": 1,
-          "failure_labels": [
-            "CAUSAL_DEPENDENCY_LOSS",
-            "INVARIANT_VIOLATION",
-            "POLICY_ORDER_BROKEN",
-            "RECOVERY_PATH_INVALID"
-          ]
+          "passed": 0,
+          "failed": 0,
+          "failure_labels": []
         },
         "dependencies": {
           "passed": 2,
           "failed": 2,
           "failure_labels": [
-            "CAUSAL_DEPENDENCY_LOSS",
-            "INVARIANT_VIOLATION",
-            "POLICY_ORDER_BROKEN",
-            "RECOVERY_PATH_INVALID"
+            "CAUSAL_DEPENDENCY_LOSS"
           ]
         },
         "recovery_paths": {
           "passed": 1,
           "failed": 3,
           "failure_labels": [
-            "CAUSAL_DEPENDENCY_LOSS",
-            "INVARIANT_VIOLATION",
-            "POLICY_ORDER_BROKEN",
             "RECOVERY_PATH_INVALID"
           ]
         },
         "tool_order": {
-          "passed": 0,
-          "failed": 0,
-          "failure_labels": []
+          "passed": 3,
+          "failed": 1,
+          "failure_labels": [
+            "POLICY_ORDER_BROKEN"
+          ]
         },
         "capability_boundaries": {
           "passed": 0,
@@ -70,10 +61,7 @@
           "passed": 3,
           "failed": 1,
           "failure_labels": [
-            "CAUSAL_DEPENDENCY_LOSS",
-            "INVARIANT_VIOLATION",
-            "POLICY_ORDER_BROKEN",
-            "RECOVERY_PATH_INVALID"
+            "INVARIANT_VIOLATION"
           ]
         }
       }
@@ -94,34 +82,30 @@
           "failure_labels": []
         },
         "constraints": {
-          "passed": 6,
-          "failed": 2,
-          "failure_labels": [
-            "CAUSAL_DEPENDENCY_LOSS",
-            "INVARIANT_VIOLATION",
-            "POLICY_ORDER_BROKEN",
-            "RECOVERY_PATH_INVALID"
-          ]
-        },
-        "dependencies": {
           "passed": 0,
           "failed": 0,
           "failure_labels": []
         },
+        "dependencies": {
+          "passed": 3,
+          "failed": 1,
+          "failure_labels": [
+            "CAUSAL_DEPENDENCY_LOSS"
+          ]
+        },
         "recovery_paths": {
           "passed": 1,
           "failed": 3,
           "failure_labels": [
-            "CAUSAL_DEPENDENCY_LOSS",
-            "INVARIANT_VIOLATION",
-            "POLICY_ORDER_BROKEN",
             "RECOVERY_PATH_INVALID"
           ]
         },
         "tool_order": {
-          "passed": 0,
-          "failed": 0,
-          "failure_labels": []
+          "passed": 3,
+          "failed": 1,
+          "failure_labels": [
+            "POLICY_ORDER_BROKEN"
+          ]
         },
         "capability_boundaries": {
           "passed": 0,
@@ -137,10 +121,7 @@
           "passed": 2,
           "failed": 2,
           "failure_labels": [
-            "CAUSAL_DEPENDENCY_LOSS",
-            "INVARIANT_VIOLATION",
-            "POLICY_ORDER_BROKEN",
-            "RECOVERY_PATH_INVALID"
+            "INVARIANT_VIOLATION"
           ]
         }
       }
@@ -169,50 +150,38 @@
           "passed": 2,
           "failed": 2,
           "failure_labels": [
-            "CAUSAL_DEPENDENCY_LOSS",
-            "INVARIANT_VIOLATION",
-            "POLICY_ORDER_BROKEN",
-            "RECOVERY_PATH_INVALID"
+            "CAUSAL_DEPENDENCY_LOSS"
           ]
         },
         "recovery_paths": {
           "passed": 1,
           "failed": 3,
           "failure_labels": [
-            "CAUSAL_DEPENDENCY_LOSS",
-            "INVARIANT_VIOLATION",
-            "POLICY_ORDER_BROKEN",
             "RECOVERY_PATH_INVALID"
           ]
         },
         "tool_order": {
-          "passed": 3,
-          "failed": 1,
-          "failure_labels": [
-            "CAUSAL_DEPENDENCY_LOSS",
-            "INVARIANT_VIOLATION",
-            "POLICY_ORDER_BROKEN",
-            "RECOVERY_PATH_INVALID"
-          ]
-        },
-        "capability_boundaries": {
           "passed": 0,
           "failed": 0,
           "failure_labels": []
         },
-        "governance_or_policy": {
+        "capability_boundaries": {
           "passed": 0,
           "failed": 0,
           "failure_labels": []
         },
+        "governance_or_policy": {
+          "passed": 3,
+          "failed": 1,
+          "failure_labels": [
+            "POLICY_ORDER_BROKEN"
+          ]
+        },
         "invariants": {
           "passed": 1,
           "failed": 3,
           "failure_labels": [
-            "CAUSAL_DEPENDENCY_LOSS",
-            "INVARIANT_VIOLATION",
-            "POLICY_ORDER_BROKEN",
-            "RECOVERY_PATH_INVALID"
+            "INVARIANT_VIOLATION"
           ]
         }
       }
@@ -241,17 +210,13 @@
           "passed": 2,
           "failed": 2,
           "failure_labels": [
-            "CAUSAL_DEPENDENCY_LOSS",
-            "INVARIANT_VIOLATION",
-            "RECOVERY_PATH_INVALID"
+            "CAUSAL_DEPENDENCY_LOSS"
           ]
         },
         "recovery_paths": {
           "passed": 2,
           "failed": 2,
           "failure_labels": [
-            "CAUSAL_DEPENDENCY_LOSS",
-            "INVARIANT_VIOLATION",
             "RECOVERY_PATH_INVALID"
           ]
         },
@@ -264,9 +229,7 @@
           "passed": 1,
           "failed": 3,
           "failure_labels": [
-            "CAUSAL_DEPENDENCY_LOSS",
-            "INVARIANT_VIOLATION",
-            "RECOVERY_PATH_INVALID"
+            "INVARIANT_VIOLATION"
           ]
         },
         "governance_or_policy": {
diff --git a/scripts/generate_replay_semantic_integrity_artifact.py b/scripts/generate_replay_semantic_integrity_artifact.py
index cf78362..43e40bd 100644
--- a/scripts/generate_replay_semantic_integrity_artifact.py
+++ b/scripts/generate_replay_semantic_integrity_artifact.py
@@ -3,12 +3,18 @@
 from __future__ import annotations
 
 import json
+import sys
 from collections import OrderedDict
 from pathlib import Path
+from typing import Any
 
 REPO_ROOT = Path(__file__).resolve().parents[1]
+if str(REPO_ROOT) not in sys.path:
+    sys.path.insert(0, str(REPO_ROOT))
+
+from src.validation.contract_validator import ContractType, ContractValidator, Layer
+
 MANIFEST_PATH = REPO_ROOT / "fixtures" / "manifest.json"
-MULTI_FAMILY_PATH = REPO_ROOT / "artifacts" / "multi_family_admissibility_results.json"
 OUTPUT_PATH = REPO_ROOT / "artifacts" / "replay_semantic_integrity_results.json"
 
 ARTIFACT_ID = "replay_semantic_integrity_results_v1"
@@ -29,60 +35,65 @@ def _load_json(path: Path) -> dict[str, object]:
     return json.loads(path.read_text(encoding="utf-8"))
 
 
-def _family_order_and_counts() -> tuple[list[str], dict[str, int], dict[str, str]]:
+def _family_order_and_counts() -> tuple[list[str], dict[str, int]]:
     manifest = _load_json(MANIFEST_PATH)
     fixtures = manifest["fixtures"]
     family_order: list[str] = []
     fixture_counts: dict[str, int] = {}
-    fixture_levels: dict[str, str] = {}
 
     for entry in fixtures:
         family = entry["family"]
-        fixture_id = entry["fixture_id"]
-        level = entry["degradation_level"]
         if family not in fixture_counts:
             family_order.append(family)
             fixture_counts[family] = 0
         fixture_counts[family] += 1
-        fixture_levels[fixture_id] = level
 
-    return family_order, fixture_counts, fixture_levels
+    return family_order, fixture_counts
 
 
-def _class_for_contract(contract_id: str) -> str:
+def _class_for_contract(contract_id: str, contract_type: ContractType, layer: Layer) -> str:
     contract = contract_id.lower()
 
+    if any(token in contract for token in ("capability", "boundary")):
+        return "capability_boundaries"
+    if any(token in contract for token in ("policy", "governance", "approval")):
+        return "governance_or_policy"
+    if any(token in contract for token in ("recovery", "rollback", "escalation")):
+        return "recovery_paths"
+    if any(token in contract for token in ("dependency", "causal", "chain")):
+        return "dependencies"
+    if any(token in contract for token in ("order", "ordering", "sequence", "tool_call_order")):
+        return "tool_order"
+    if any(token in contract for token in ("invariant", "orphan")):
+        return "invariants"
     if any(token in contract for token in ("evidence",)):
         return "evidence"
     if any(token in contract for token in ("constraint", "validation")):
         return "constraints"
-    if any(token in contract for token in ("dependency", "causal", "chain")):
+
+    if contract_type == ContractType.CAUSALITY:
         return "dependencies"
-    if any(token in contract for token in ("recovery", "rollback", "escalation")):
+    if contract_type == ContractType.REACHABILITY:
         return "recovery_paths"
-    if any(token in contract for token in ("order", "ordering", "sequence", "tool_call_order")):
-        return "tool_order"
-    if any(token in contract for token in ("capability", "boundary")):
-        return "capability_boundaries"
-    if any(token in contract for token in ("policy", "governance", "approval")):
-        return "governance_or_policy"
-    if any(token in contract for token in ("invariant", "orphan")):
+    if contract_type == ContractType.ORDERING:
+        return "governance_or_policy" if layer == Layer.GOVERNANCE else "tool_order"
+    if contract_type == ContractType.INVARIANT:
         return "invariants"
 
     return "constraints"
 
 
 def generate_replay_semantic_integrity_artifact(output_path: Path = OUTPUT_PATH) -> Path:
-    multi_family = _load_json(MULTI_FAMILY_PATH)
-    family_order, fixture_counts, fixture_levels = _family_order_and_counts()
-    family_curves = {entry["family"]: entry["curve"] for entry in multi_family["families"]}
+    manifest = _load_json(MANIFEST_PATH)
+    fixtures = manifest["fixtures"]
+    family_order, fixture_counts = _family_order_and_counts()
 
     families_payload: list[dict[str, object]] = []
     total_fixture_count = 0
 
     for family in family_order:
-        curve = family_curves[family]
-        points = sorted(curve["points"], key=lambda point: LEVELS.index(fixture_levels[point["fixture_id"]]))
+        family_fixtures = [entry for entry in fixtures if entry["family"] == family]
+        points = sorted(family_fixtures, key=lambda entry: LEVELS.index(entry["degradation_level"]))
 
         commitment_classes: OrderedDict[str, dict[str, object]] = OrderedDict()
         for commitment_class in COMMITMENT_CLASS_ORDER:
@@ -92,14 +103,31 @@ def generate_replay_semantic_integrity_artifact(output_path: Path = OUTPUT_PATH)
                 "failure_labels": set(),
             }
 
-        for point in points:
-            failed_contracts = set(point["failed_contracts"])
-            for contract_id in point["passed_contracts"] + point["failed_contracts"]:
-                commitment_class = _class_for_contract(contract_id)
-                if contract_id in failed_contracts:
+        for fixture_entry in points:
+            fixture_path = REPO_ROOT / str(fixture_entry["path"])
+            original: dict[str, Any] = {
+                **_load_json(fixture_path / "original/trace.json"),
+                **_load_json(fixture_path / "original/state.json"),
+                "dependency_graph": _load_json(fixture_path / "original/dependency_graph.json"),
+            }
+            reconstructed: dict[str, Any] = {
+                **_load_json(fixture_path / "reconstructed/trace.json"),
+                **_load_json(fixture_path / "reconstructed/state.json"),
+                "dependency_graph": _load_json(fixture_path / "reconstructed/dependency_graph.json"),
+            }
+            contracts_dir = fixture_path / "original/contracts"
+            contracts_by_id = {
+                contract["contract_id"]: contract for contract in (_load_json(path) for path in sorted(contracts_dir.glob("*.json")))
+            }
+            contracts = [contracts_by_id[contract_id] for contract_id in fixture_entry["contracts"]]
+            results = ContractValidator().validate_contracts(original=original, reconstructed=reconstructed, contracts=contracts)
+
+            for result in results:
+                commitment_class = _class_for_contract(result.contract_id, result.contract_type, result.layer)
+                if not result.passed:
                     commitment_classes[commitment_class]["failed"] += 1
-                    for failure_label in point["failure_labels"]:
-                        commitment_classes[commitment_class]["failure_labels"].add(failure_label)
+                    if result.failure_label is not None:
+                        commitment_classes[commitment_class]["failure_labels"].add(result.failure_label)
                 else:
                     commitment_classes[commitment_class]["passed"] += 1
 
diff --git a/tests/test_replay_semantic_integrity_artifact.py b/tests/test_replay_semantic_integrity_artifact.py
index 719dcea..4320044 100644
--- a/tests/test_replay_semantic_integrity_artifact.py
+++ b/tests/test_replay_semantic_integrity_artifact.py
@@ -1,14 +1,18 @@
 from __future__ import annotations
 
 import json
+from collections import defaultdict
 from pathlib import Path
+from typing import Any
 
 from scripts.generate_replay_semantic_integrity_artifact import (
     ARTIFACT_ID,
     COMMITMENT_CLASS_ORDER,
     LEVELS,
+    _class_for_contract,
     generate_replay_semantic_integrity_artifact,
 )
+from src.validation.contract_validator import ContractValidator
 from src.validation.failure_taxonomy import FAILURE_TAXONOMY
 
 ARTIFACT_PATH = Path("artifacts/replay_semantic_integrity_results.json")
@@ -49,6 +53,39 @@ def _walk_keys(value: object) -> set[str]:
     return keys
 
 
+def _validation_label_union_by_family_and_class() -> dict[str, dict[str, set[str]]]:
+    manifest = _load_json(MANIFEST_PATH)
+    validator = ContractValidator()
+    output: dict[str, dict[str, set[str]]] = defaultdict(lambda: defaultdict(set))
+
+    for fixture_entry in manifest["fixtures"]:
+        fixture_path = Path(fixture_entry["path"])
+        original: dict[str, Any] = {
+            **_load_json(fixture_path / "original/trace.json"),
+            **_load_json(fixture_path / "original/state.json"),
+            "dependency_graph": _load_json(fixture_path / "original/dependency_graph.json"),
+        }
+        reconstructed: dict[str, Any] = {
+            **_load_json(fixture_path / "reconstructed/trace.json"),
+            **_load_json(fixture_path / "reconstructed/state.json"),
+            "dependency_graph": _load_json(fixture_path / "reconstructed/dependency_graph.json"),
+        }
+        contracts_dir = fixture_path / "original/contracts"
+        contracts_by_id = {
+            contract["contract_id"]: contract for contract in (_load_json(path) for path in sorted(contracts_dir.glob("*.json")))
+        }
+        contracts = [contracts_by_id[contract_id] for contract_id in fixture_entry["contracts"]]
+        results = validator.validate_contracts(original=original, reconstructed=reconstructed, contracts=contracts)
+
+        family = fixture_entry["family"]
+        for result in results:
+            commitment_class = _class_for_contract(result.contract_id, result.contract_type, result.layer)
+            if not result.passed and result.failure_label is not None:
+                output[family][commitment_class].add(result.failure_label)
+
+    return output
+
+
 def test_script_output_matches_committed_artifact(tmp_path: Path) -> None:
     output_path = tmp_path / "replay_semantic_integrity_results.json"
     generate_replay_semantic_integrity_artifact(output_path)
@@ -112,3 +149,40 @@ def test_artifact_declares_deterministic_mode_and_no_external_evaluators() -> No
     assert payload["global_summary"]["deterministic_evaluation"] is True
     assert payload["global_summary"]["llm_judges"] == "none"
     assert payload["global_summary"]["external_apis"] == "none"
+
+
+def test_contract_linked_label_behavior_recovery_and_ordering() -> None:
+    payload = _load_json(ARTIFACT_PATH)
+    families = {entry["family"]: entry for entry in payload["families"]}
+
+    coding_recovery_labels = set(families["coding_workflow_pr_review"]["commitment_classes"]["recovery_paths"]["failure_labels"])
+    assert "POLICY_ORDER_BROKEN" not in coding_recovery_labels
+    assert "CAUSAL_DEPENDENCY_LOSS" not in coding_recovery_labels
+    assert coding_recovery_labels == {"RECOVERY_PATH_INVALID"}
+
+    cross_domain_order_labels = set(
+        families["cross_domain_operational_dependency_workflow"]["commitment_classes"]["governance_or_policy"]["failure_labels"]
+    )
+    assert cross_domain_order_labels == {"POLICY_ORDER_BROKEN"}
+
+
+def test_no_class_gets_full_fixture_label_set_without_contract_support() -> None:
+    payload = _load_json(ARTIFACT_PATH)
+    validation_union = _validation_label_union_by_family_and_class()
+
+    for family_payload in payload["families"]:
+        family = family_payload["family"]
+        for commitment_class, class_payload in family_payload["commitment_classes"].items():
+            artifact_labels = set(class_payload["failure_labels"])
+            expected_labels = validation_union.get(family, {}).get(commitment_class, set())
+            assert artifact_labels == expected_labels
+
+
+def test_direct_validation_consistency_for_labels() -> None:
+    payload = _load_json(ARTIFACT_PATH)
+    validation_union = _validation_label_union_by_family_and_class()
+
+    for family_payload in payload["families"]:
+        family = family_payload["family"]
+        for commitment_class, class_payload in family_payload["commitment_classes"].items():
+            assert set(class_payload["failure_labels"]) == validation_union.get(family, {}).get(commitment_class, set())

From 28f80841e3cc249c244d863a34e23f47cb137b9b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alexander=20K=C3=B6lnberger?=
 <159939812+ProfRandom92@users.noreply.github.com>
Date: Wed, 20 May 2026 04:07:19 -0700
Subject: [PATCH 3/3] Simplify replay semantic integrity dictionaries

---
 .../generate_replay_semantic_integrity_artifact.py | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/scripts/generate_replay_semantic_integrity_artifact.py b/scripts/generate_replay_semantic_integrity_artifact.py
index 43e40bd..cb746a7 100644
--- a/scripts/generate_replay_semantic_integrity_artifact.py
+++ b/scripts/generate_replay_semantic_integrity_artifact.py
@@ -4,7 +4,6 @@
 
 import json
 import sys
-from collections import OrderedDict
 from pathlib import Path
 from typing import Any
 
@@ -95,13 +94,10 @@ def generate_replay_semantic_integrity_artifact(output_path: Path = OUTPUT_PATH)
         family_fixtures = [entry for entry in fixtures if entry["family"] == family]
         points = sorted(family_fixtures, key=lambda entry: LEVELS.index(entry["degradation_level"]))
 
-        commitment_classes: OrderedDict[str, dict[str, object]] = OrderedDict()
-        for commitment_class in COMMITMENT_CLASS_ORDER:
-            commitment_classes[commitment_class] = {
-                "passed": 0,
-                "failed": 0,
-                "failure_labels": set(),
-            }
+        commitment_classes: dict[str, dict[str, object]] = {
+            commitment_class: {"passed": 0, "failed": 0, "failure_labels": set()}
+            for commitment_class in COMMITMENT_CLASS_ORDER
+        }
 
         for fixture_entry in points:
             fixture_path = REPO_ROOT / str(fixture_entry["path"])
@@ -131,7 +127,7 @@ def generate_replay_semantic_integrity_artifact(output_path: Path = OUTPUT_PATH)
                 else:
                     commitment_classes[commitment_class]["passed"] += 1
 
-        serializable_classes: OrderedDict[str, dict[str, object]] = OrderedDict()
+        serializable_classes: dict[str, dict[str, object]] = {}
         for commitment_class in COMMITMENT_CLASS_ORDER:
             values = commitment_classes[commitment_class]
             serializable_classes[commitment_class] = {