scoropeza
diff --git a/‎agent/pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎agent/pyproject.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎agent/tests/test_cedar_parity.py‎
Lines changed: 181 additions & 0 deletions b/‎agent/tests/test_cedar_parity.py‎
Lines changed: 181 additions & 0 deletions
diff --git a/‎agent/tests/test_cedarpy_annotations_contract.py‎
Lines changed: 157 additions & 0 deletions b/‎agent/tests/test_cedarpy_annotations_contract.py‎
Lines changed: 157 additions & 0 deletions
diff --git a/‎agent/uv.lock‎
Lines changed: 1 addition & 1 deletion b/‎agent/uv.lock‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cdk/package.json‎
Lines changed: 1 addition & 0 deletions b/‎cdk/package.json‎
Lines changed: 1 addition & 0 deletions
@@ -11,7 +11,7 @@ dependencies = [
     "uvicorn==0.42.0",
     "aws-opentelemetry-distro~=0.15.0",
     "mcp==1.23.0",
-    "cedarpy>=4.8.0",
+    "cedarpy==4.8.0",
 ]
 
 [tool.bandit]
 
@@ -0,0 +1,181 @@
+"""Cedar cross-engine parity — agent side (cedarpy).
+
+Loads every ``contracts/cedar-parity/*.json`` fixture, runs each
+``(policies, input)`` through ``cedarpy.is_authorized``, and asserts the
+observed ``(decision, matching_rule_ids)`` equals the fixture's
+``expected`` payload.
+
+The companion test ``cdk/test/handlers/shared/cedar-parity.test.ts`` runs
+the same fixtures through ``@cedar-policy/cedar-wasm``. If either side
+disagrees with the fixture, CI fails BEFORE deploy — satisfying the
+cross-engine parity contract (decision #23, finding #1, §15.6 of
+``docs/design/CEDAR_HITL_GATES.md``).
+
+Fixture path resolution mirrors the pattern in
+``test_prompts.py::TestCrossLanguageHashParity`` for
+``contracts/memory-hash-vectors.json``.
+"""
+
+import json
+import os
+from pathlib import Path
+
+# Hard import (not importorskip): the parity contract REQUIRES cedarpy.
+# A dependency regression that drops cedarpy must fail loudly, not be
+# silently skipped — skipping would let divergence reach production.
+# See silent-failure audit finding #8 (Chunk 1 review, 2026-05-07).
+import cedarpy
+import pytest
+
+_FIXTURE_DIR = Path(os.path.dirname(__file__)) / ".." / ".." / "contracts" / "cedar-parity"
+_FIXTURE_DIR = _FIXTURE_DIR.resolve()
+
+_VALID_DECISIONS = frozenset({"allow", "deny"})
+
+
+def _validate_fixture(fixture: dict, path: Path) -> None:
+    """Reject malformed fixtures at load time so bad data fails loud."""
+    for required in ("name", "policies", "input", "expected"):
+        if required not in fixture:
+            raise AssertionError(f"{path.name}: missing required field {required!r}")
+    for required in ("principal", "action", "resource"):
+        if required not in fixture["input"]:
+            raise AssertionError(f"{path.name}: input missing {required!r}")
+    expected = fixture["expected"]
+    if "decision" not in expected or "matching_rule_ids" not in expected:
+        raise AssertionError(f"{path.name}: expected missing decision/matching_rule_ids")
+    # Enforce lowercase canonical form — both engines report lowercase
+    # natively (cedar-wasm) or are normalized on read (cedarpy via .value.lower()).
+    # Rejecting case drift at load prevents a fixture author from writing
+    # "Deny" and having only one engine's comparator hit the case-mismatch.
+    if expected["decision"] not in _VALID_DECISIONS:
+        raise AssertionError(
+            f"{path.name}: decision must be lowercase in {_VALID_DECISIONS}, "
+            f"got {expected['decision']!r}"
+        )
+
+
+def _load_fixtures() -> list[dict]:
+    """Load every parity fixture; skip README.md."""
+    assert _FIXTURE_DIR.is_dir(), (
+        f"expected fixture dir at {_FIXTURE_DIR}; see contracts/cedar-parity/README.md"
+    )
+    fixtures = []
+    for path in sorted(_FIXTURE_DIR.glob("*.json")):
+        with path.open() as f:
+            fixture = json.load(f)
+        _validate_fixture(fixture, path)
+        fixtures.append(fixture)
+    assert fixtures, f"no fixtures found under {_FIXTURE_DIR}; at least one golden file is required"
+    return fixtures
+
+
+def _entity_uid(entity_ref: dict) -> str:
+    """Format an entity reference dict as a Cedar UID string literal."""
+    return f'{entity_ref["type"]}::"{entity_ref["id"]}"'
+
+
+def _build_entities(fixture_input: dict) -> list[dict]:
+    """Build cedarpy's entities list from principal/action/resource references.
+
+    Includes ``action`` so the two engines receive equivalent entity sets;
+    cedarpy tolerates undeclared actions today but the TS side passes an
+    empty entities list — keeping both sides symmetric prevents silent
+    asymmetric failures if a future fixture attaches attributes to the
+    action entity. See silent-failure audit finding #3.
+    """
+    entities = []
+    for key in ("principal", "action", "resource"):
+        ref = fixture_input.get(key)
+        if ref and isinstance(ref, dict) and "type" in ref and "id" in ref:
+            entities.append(
+                {
+                    "uid": {"type": ref["type"], "id": ref["id"]},
+                    "attrs": {},
+                    "parents": [],
+                }
+            )
+    return entities
+
+
+def _build_request(fixture_input: dict) -> dict:
+    """Translate the fixture input into the cedarpy is_authorized request shape."""
+    return {
+        "principal": _entity_uid(fixture_input["principal"]),
+        "action": _entity_uid(fixture_input["action"]),
+        "resource": _entity_uid(fixture_input["resource"]),
+        "context": fixture_input.get("context", {}),
+    }
+
+
+def _recover_rule_ids(policies: str, matching_policy_ids: list[str]) -> list[str]:
+    """Map engine-internal positional IDs (policy0, ...) back to @rule_id annotations.
+
+    Enforces that EVERY matching policy must carry a ``@rule_id`` annotation.
+    Dropping unannotated matches would silently hide genuine cross-engine
+    disagreement (e.g. one engine matching the base ``permit`` alongside a
+    ``forbid``) — the whole point of this test is to fail such disagreement,
+    not bury it. See silent-failure audit finding #1 (Chunk 1 review,
+    2026-05-07). Fixture policies are expected to annotate every rule
+    including the base permit (``@rule_id("base_permit")``); a missing
+    annotation raises rather than silently coerces to empty.
+    """
+    try:
+        parsed = json.loads(cedarpy.policies_to_json_str(policies))
+    except Exception as exc:
+        raise AssertionError(
+            f"cedarpy.policies_to_json_str returned an unparseable result: "
+            f"{type(exc).__name__}: {exc}"
+        ) from exc
+    id_map = {
+        pid: body.get("annotations", {}).get("rule_id")
+        for pid, body in parsed.get("staticPolicies", {}).items()
+    }
+    recovered = []
+    for pid in matching_policy_ids:
+        rule_id = id_map.get(pid)
+        if not rule_id:
+            raise AssertionError(
+                f"cedarpy matched policy {pid!r} but the fixture's policies define "
+                f"no @rule_id annotation for it; every fixture policy (including the "
+                f"base permit) must carry a rule_id so cross-engine disagreement "
+                f"surfaces rather than being silently dropped"
+            )
+        recovered.append(rule_id)
+    return sorted(recovered)
+
+
+_FIXTURES = _load_fixtures()
+
+
+@pytest.mark.parametrize("fixture", _FIXTURES, ids=[f["name"] for f in _FIXTURES])
+def test_cedarpy_matches_fixture_decision(fixture: dict) -> None:
+    """cedarpy's decision + recovered rule IDs must match the fixture's expected payload."""
+    policies = fixture["policies"]
+    request = _build_request(fixture["input"])
+    entities = _build_entities(fixture["input"])
+
+    result = cedarpy.is_authorized(request, policies, entities)
+
+    # cedarpy decision enum: Decision.Allow / Decision.Deny.  Fixture stores
+    # lowercase to match cedar-wasm's native format; normalize before compare.
+    # Fixture-side case was already validated at load (see _validate_fixture).
+    observed_decision = result.decision.value.lower()
+    expected_decision = fixture["expected"]["decision"]
+    assert observed_decision == expected_decision, (
+        f"fixture {fixture['name']!r}: decision drift — "
+        f"cedarpy returned {observed_decision!r}, fixture expects {expected_decision!r}"
+    )
+
+    observed_rule_ids = _recover_rule_ids(policies, result.diagnostics.reasons)
+    expected_rule_ids = sorted(fixture["expected"]["matching_rule_ids"])
+    assert observed_rule_ids == expected_rule_ids, (
+        f"fixture {fixture['name']!r}: matching_rule_ids drift — "
+        f"cedarpy returned {observed_rule_ids!r}, fixture expects {expected_rule_ids!r}"
+    )
+
+
+def test_fixture_dir_exists() -> None:
+    """Guard against silent empty-dir regressions if glob picks up nothing."""
+    assert _FIXTURE_DIR.is_dir()
+    assert len(_FIXTURES) >= 1
@@ -0,0 +1,157 @@
+"""Cedar-HITL Day-1 spike: cedarpy annotation round-trip contract.
+
+Locks the assumption (decision #22 / §15.6 of docs/design/CEDAR_HITL_GATES.md)
+that ``cedarpy.policies_to_json_str()`` preserves all five annotations the
+engine relies on — ``@rule_id``, ``@tier``, ``@approval_timeout_s``,
+``@severity``, ``@category`` — verbatim as string-valued entries under
+``staticPolicies.<policy_id>.annotations``.
+
+If cedarpy's annotation surface ever changes shape (renamed key, dropped
+values, typed coercion), this test flips red BEFORE the engine's
+annotation-merging logic starts returning subtly-wrong answers.
+
+Parity with the TypeScript side is tested separately in
+``test_cedar_parity.py`` against the shared ``contracts/cedar-parity/``
+fixtures; this module validates only the agent-side API shape.
+"""
+
+import json
+
+import pytest
+
+cedarpy = pytest.importorskip("cedarpy")
+
+
+_ANNOTATED_POLICY = (
+    '@tier("soft") '
+    '@rule_id("force_push_any") '
+    '@approval_timeout_s("300") '
+    '@severity("medium") '
+    '@category("destructive") '
+    'forbid (principal, action == Agent::Action::"execute_bash", resource) '
+    'when { context.command like "*git push --force*" };'
+)
+
+
+def _first_static_policy(policies_text: str) -> dict:
+    """Parse a Cedar policy set and return the first staticPolicies entry."""
+    parsed = json.loads(cedarpy.policies_to_json_str(policies_text))
+    statics = parsed.get("staticPolicies", {})
+    assert statics, f"expected at least one static policy, got keys={list(parsed)}"
+    return next(iter(statics.values()))
+
+
+class TestAnnotationsRoundTrip:
+    """All five annotations round-trip verbatim as strings."""
+
+    def test_policies_to_json_str_returns_static_policies_wrapper(self):
+        parsed = json.loads(cedarpy.policies_to_json_str(_ANNOTATED_POLICY))
+        # The design's annotation-merging code keys off ``staticPolicies`` —
+        # if cedarpy ever flattens this wrapper, the engine's lookup table
+        # construction breaks silently.
+        assert "staticPolicies" in parsed
+
+    def test_annotations_key_present_on_parsed_policy(self):
+        body = _first_static_policy(_ANNOTATED_POLICY)
+        assert "annotations" in body, (
+            f"cedarpy dropped the annotations key from parsed policy; body keys were {list(body)}"
+        )
+
+    def test_rule_id_annotation_preserved(self):
+        annotations = _first_static_policy(_ANNOTATED_POLICY)["annotations"]
+        assert annotations.get("rule_id") == "force_push_any"
+
+    def test_tier_annotation_preserved(self):
+        annotations = _first_static_policy(_ANNOTATED_POLICY)["annotations"]
+        assert annotations.get("tier") == "soft"
+
+    def test_approval_timeout_s_annotation_preserved_as_string(self):
+        annotations = _first_static_policy(_ANNOTATED_POLICY)["annotations"]
+        # Cedar annotations are always string-valued; the engine coerces to
+        # int inside ``_merge_annotations`` (§6.3). If cedarpy ever switches
+        # to int coercion on its side, the merge code's ``try: int(...)``
+        # still works, but the documented contract (§5.2) says "string".
+        assert annotations.get("approval_timeout_s") == "300"
+        assert isinstance(annotations.get("approval_timeout_s"), str)
+
+    def test_severity_annotation_preserved(self):
+        annotations = _first_static_policy(_ANNOTATED_POLICY)["annotations"]
+        assert annotations.get("severity") == "medium"
+
+    def test_category_annotation_preserved(self):
+        annotations = _first_static_policy(_ANNOTATED_POLICY)["annotations"]
+        assert annotations.get("category") == "destructive"
+
+    def test_all_five_annotations_present_exactly(self):
+        annotations = _first_static_policy(_ANNOTATED_POLICY)["annotations"]
+        expected = {
+            "tier": "soft",
+            "rule_id": "force_push_any",
+            "approval_timeout_s": "300",
+            "severity": "medium",
+            "category": "destructive",
+        }
+        assert annotations == expected, f"annotations drift: expected {expected}, got {annotations}"
+
+
+class TestDiagnosticsShape:
+    """The is_authorized result carries matching policy IDs under diagnostics.reasons."""
+
+    def test_diagnostics_reasons_is_a_list(self):
+        # The engine's three-outcome branching walks ``diagnostics.reasons``
+        # to recover matching policy IDs, which the annotation lookup table
+        # then maps back to ``@rule_id`` values.  If cedarpy ever renames
+        # this attribute (singular ``.reason``, nested object, etc.) the
+        # engine silently loses the ability to surface rule IDs to users.
+        req = {
+            "principal": 'Agent::TaskAgent::"new_task"',
+            "action": 'Agent::Action::"execute_bash"',
+            "resource": 'Agent::BashCommand::"command"',
+            "context": {"command": "git push --force origin main"},
+        }
+        entities = [
+            {"uid": {"type": "Agent::TaskAgent", "id": "new_task"}, "attrs": {}, "parents": []},
+            {"uid": {"type": "Agent::BashCommand", "id": "command"}, "attrs": {}, "parents": []},
+        ]
+        r = cedarpy.is_authorized(req, _ANNOTATED_POLICY, entities)
+        assert hasattr(r.diagnostics, "reasons"), (
+            "cedarpy.Diagnostics no longer exposes .reasons — engine rule-ID "
+            "recovery will break. Update §15.6 IMPL-29 before proceeding."
+        )
+        assert isinstance(r.diagnostics.reasons, list)
+        assert len(r.diagnostics.reasons) >= 1
+
+
+class TestMultiMatchDiagnostics:
+    """Multi-match produces multiple policy IDs in diagnostics.reasons."""
+
+    def test_two_matching_policies_produce_two_reasons(self):
+        policies = (
+            _ANNOTATED_POLICY
+            + "\n"
+            + (
+                '@tier("soft") '
+                '@rule_id("force_push_main") '
+                '@approval_timeout_s("600") '
+                '@severity("high") '
+                'forbid (principal, action == Agent::Action::"execute_bash", resource) '
+                'when { context.command like "*git push --force origin main*" };'
+            )
+        )
+        req = {
+            "principal": 'Agent::TaskAgent::"new_task"',
+            "action": 'Agent::Action::"execute_bash"',
+            "resource": 'Agent::BashCommand::"command"',
+            "context": {"command": "git push --force origin main"},
+        }
+        entities = [
+            {"uid": {"type": "Agent::TaskAgent", "id": "new_task"}, "attrs": {}, "parents": []},
+            {"uid": {"type": "Agent::BashCommand", "id": "command"}, "attrs": {}, "parents": []},
+        ]
+        r = cedarpy.is_authorized(req, policies, entities)
+        # §6.3 annotation-merging depends on receiving both policy IDs here;
+        # if cedarpy short-circuits on first match, the "max severity" and
+        # "min timeout" merge rules never fire.
+        assert len(r.diagnostics.reasons) == 2, (
+            f"expected 2 matching policies, got {r.diagnostics.reasons}"
+        )
@@ -26,6 +26,7 @@
     "@aws-sdk/lib-dynamodb": "^3.1021.0",
     "@aws-sdk/s3-request-presigner": "^3.1021.0",
     "@aws/durable-execution-sdk-js": "^1.1.0",
+    "@cedar-policy/cedar-wasm": "4.10.0",
     "aws-cdk-lib": "^2.238.0",
     "cdk-nag": "^2.37.55",
     "constructs": "^10.3.0",
Original file line number	Diff line number	Diff line change
`@@ -11,7 +11,7 @@ dependencies = [`
`11`	`11`	`"uvicorn==0.42.0",`
`12`	`12`	`"aws-opentelemetry-distro~=0.15.0",`
`13`	`13`	`"mcp==1.23.0",`
`14`		`- "cedarpy>=4.8.0",`
	`14`	`+ "cedarpy==4.8.0",`
`15`	`15`	`]`
`16`	`16`
`17`	`17`	`[tool.bandit]`