Flagsmith
diff --git a/‎tests/conftest.py‎
Lines changed: 192 additions & 0 deletions b/‎tests/conftest.py‎
Lines changed: 192 additions & 0 deletions
diff --git a/‎tests/harnesses/__init__.py‎
Lines changed: 21 additions & 0 deletions b/‎tests/harnesses/__init__.py‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎tests/harnesses/_base.py‎
Lines changed: 71 additions & 0 deletions b/‎tests/harnesses/_base.py‎
Lines changed: 71 additions & 0 deletions
@@ -0,0 +1,192 @@
+"""Pytest fixtures.
+
+Unit tests run anywhere. The engine-parity suite drives a
+`DialectTestHarness` (see `tests/harnesses`) — one harness per SQL
+engine, registered in `HARNESSES`. Each harness owns its own session,
+scratch-table DDL, and batched INSERT / SELECT shapes.
+
+Per harness session, the fixtures:
+
+  - load every case-with-identity into the harness's scratch table in
+    one batched INSERT (`harness_identity_table`);
+  - translate each (case, segment) pair against the harness's dialect
+    and ask the harness to evaluate them all in one batched SELECT
+    (`harness_results`).
+
+Parametrised tests do an in-memory dict lookup against that result.
+"""
+
+import copy
+import json
+from collections.abc import Iterator
+from pathlib import Path
+from typing import Any, TypedDict, cast
+
+import json5
+import pytest
+from flag_engine.context.types import EvaluationContext, IdentityContext, SegmentContext
+from flag_engine.result.types import EvaluationResult
+
+from flagsmith_sql_flag_engine import TranslateContext, translate_segment
+from tests.harnesses import (
+    HARNESSES,
+    DialectTestHarness,
+    EvaluationCase,
+    IdentityRow,
+)
+
+
+class EngineTestCase(TypedDict):
+    """An engine-test-data fixture file. The `result` field (engine-evaluated
+    flag values) is carried through but unused by the engine-parity suite."""
+
+    name: str
+    context: EvaluationContext
+    result: EvaluationResult
+
+
+class SegmentEngineTestCase(EngineTestCase):
+    segment_key: str
+    segment_context: SegmentContext
+
+
+class SegmentTestResult(TypedDict):
+    """A match result for a given segment."""
+
+    test_case_name: str
+    segment_key: str
+    is_match: bool
+
+
+REPO_ROOT = Path(__file__).resolve().parents[1]
+ENGINE_TEST_DATA = REPO_ROOT / "engine-test-data" / "test_cases"
+TEST_CASE_PATHS: list[Path] = sorted(
+    [*ENGINE_TEST_DATA.glob("*.json"), *ENGINE_TEST_DATA.glob("*.jsonc")]
+)
+TEST_CASES: list[EngineTestCase] = [
+    {
+        "name": p.stem,
+        "context": (raw := json5.loads(p.read_text()))["context"],
+        "result": raw["result"],
+    }
+    for p in TEST_CASE_PATHS
+]
+SEGMENT_TEST_CASES: list[SegmentEngineTestCase] = [
+    {
+        **test_case,
+        "segment_key": segment_key,
+        "segment_context": segment_context,
+    }
+    for test_case in TEST_CASES
+    for segment_key, segment_context in (test_case["context"].get("segments") or {}).items()
+]
+
+
+# ASCII Unit Separator. Used to pack `(case_name, segment_key)` into a
+# single string column on the engine-parity SELECT and split it back at
+# row-iteration time. Picked over `:` (or any printable character) so a
+# future case_name or segment_key containing punctuation can't collide.
+_PAIR_SEP = "\x1f"
+
+
+@pytest.fixture(scope="session", params=HARNESSES, ids=lambda h: h.name)
+def harness(request: pytest.FixtureRequest) -> DialectTestHarness:
+    return cast(DialectTestHarness, request.param)
+
+
+@pytest.fixture(scope="session")
+def harness_session(harness: DialectTestHarness) -> Iterator[Any]:
+    with harness.session() as sess:
+        yield sess
+
+
+@pytest.fixture(scope="session")
+def harness_identities() -> list[EngineTestCase]:
+    """Deep-copied cases with `environment.key` suffixed for cross-case
+    uniqueness. Same shape across harnesses (the suffixing is dialect-
+    agnostic), so the fixture is computed once per pytest session."""
+    overridden: list[EngineTestCase] = []
+    for identity_id, case in enumerate(TEST_CASES, start=1):
+        case = copy.deepcopy(case)
+        case["context"]["environment"]["key"] += str(identity_id)
+        overridden.append(case)
+    return overridden
+
+
+@pytest.fixture(scope="session")
+def harness_identity_table(
+    harness: DialectTestHarness,
+    harness_session: Any,
+    harness_identities: list[EngineTestCase],
+) -> str:
+    """Per-harness scratch IDENTITIES table loaded with one row per case-
+    with-identity. Cases without an identity get no row — their segments
+    compile to row-independent SQL, so the empty result still gives the
+    right answer."""
+    rows: list[IdentityRow] = []
+    for identity_id, case in enumerate(harness_identities, start=1):
+        ctx = case["context"]
+        identity: IdentityContext | None = ctx.get("identity")
+        if not identity:
+            continue
+        traits = identity.get("traits")
+        rows.append(
+            IdentityRow(
+                environment_id=ctx["environment"]["key"],
+                id=identity_id,
+                identifier=identity.get("identifier") or "",
+                identity_key=identity.get("key") or "",
+                traits_json=json.dumps(traits) if traits else None,
+            )
+        )
+    return harness.setup_identities(harness_session, rows)
+
+
+@pytest.fixture(scope="session")
+def harness_results(
+    harness: DialectTestHarness,
+    harness_session: Any,
+    harness_identity_table: str,
+    harness_identities: list[EngineTestCase],
+) -> dict[tuple[str, str], SegmentTestResult]:
+    """Run every (case, segment) pair's translated SQL through the harness
+    in one batched query.
+
+    Returns a `(case_name, segment_key) -> SegmentTestResult` dict. Every
+    case in the dataset compiles today; cases that need to fall back to
+    the engine are listed in the harness's `xfail_case_names` rather than
+    carrying a third state on the way through.
+    """
+    cases: list[EvaluationCase] = []
+    for case in harness_identities:
+        ctx = case["context"]
+        env_key = ctx["environment"]["key"]
+        for segment_key, segment in (ctx.get("segments") or {}).items():
+            translate_ctx = TranslateContext(
+                evaluation_context=ctx,
+                dialect=harness.dialect,
+            )
+            sql = translate_segment(segment, translate_ctx)
+            assert sql is not None, (
+                f"case {case['name']} seg {segment_key} unsupported on {harness.name} — "
+                "either fix the translator or add the case name to the harness's "
+                "xfail_case_names"
+            )
+            cases.append(
+                EvaluationCase(
+                    pair_id=case["name"] + _PAIR_SEP + segment_key,
+                    environment_key=env_key,
+                    predicate_sql=sql,
+                )
+            )
+
+    raw = harness.evaluate(harness_session, harness_identity_table, cases)
+    results: dict[tuple[str, str], SegmentTestResult] = {}
+    for pair_id, is_match in raw.items():
+        case_name, segment_key = pair_id.split(_PAIR_SEP, 1)
+        results[(case_name, segment_key)] = SegmentTestResult(
+            test_case_name=case_name,
+            segment_key=segment_key,
+            is_match=is_match,
+        )
+    return results
@@ -0,0 +1,21 @@
+"""Engine-parity test harnesses — one per SQL engine.
+
+The conftest fixtures are parametrised over `HARNESSES`; adding a new
+dialect means writing one harness module and appending it here.
+"""
+
+from tests.harnesses._base import (
+    DialectTestHarness,
+    EvaluationCase,
+    IdentityRow,
+)
+from tests.harnesses.clickhouse import ClickHouseHarness
+
+HARNESSES: list[DialectTestHarness] = [ClickHouseHarness()]
+
+__all__ = [
+    "DialectTestHarness",
+    "EvaluationCase",
+    "HARNESSES",
+    "IdentityRow",
+]
@@ -0,0 +1,71 @@
+"""Engine-parity test harness protocol.
+
+Each `DialectTestHarness` adapts the engine-parity suite to one SQL
+engine. The conftest fixtures are parametrised over the registered
+harnesses; a test does an in-memory dict lookup against the harness's
+results.
+"""
+
+from contextlib import AbstractContextManager
+from dataclasses import dataclass
+from typing import Any, Protocol
+
+from flagsmith_sql_flag_engine.dialect import Dialect
+
+
+@dataclass(frozen=True)
+class IdentityRow:
+    """One row to seed into the harness's scratch IDENTITIES table.
+
+    Mirrors the canonical IDENTITIES schema: 4 typed columns plus a
+    `traits` payload pre-serialised to JSON (None when the source case
+    has no traits)."""
+
+    environment_id: str
+    id: int
+    identifier: str
+    identity_key: str
+    traits_json: str | None
+
+
+@dataclass(frozen=True)
+class EvaluationCase:
+    """One (case, segment) predicate to run against the scratch table.
+    `pair_id` is an opaque round-trip key the harness echoes back in its
+    result dict — the conftest uses it to recover the (case_name,
+    segment_key) tuple."""
+
+    pair_id: str
+    environment_key: str
+    predicate_sql: str
+
+
+class DialectTestHarness(Protocol):
+    """Adapter for running the engine-parity suite against one SQL engine.
+
+    Concrete harnesses own session/connection setup, scratch-table DDL,
+    INSERT batching, and the (case, segment) mega-SELECT shape — all the
+    bits that vary by SQL engine.
+    """
+
+    name: str
+    dialect: Dialect
+    xfail_case_names: set[str]
+
+    def session(self) -> AbstractContextManager[Any]:
+        """Open a session/connection. Caller manages lifecycle via ctx-mgr."""
+
+    def setup_identities(self, session: Any, rows: list[IdentityRow]) -> str:
+        """Create scratch IDENTITIES table on `session`, batch-INSERT all
+        `rows`, return the fully-qualified table name."""
+
+    def evaluate(
+        self,
+        session: Any,
+        identity_table: str,
+        cases: list[EvaluationCase],
+    ) -> dict[str, bool]:
+        """Run all `cases` as one batched query. Each case translates to
+        `EXISTS (SELECT 1 FROM identity_table i WHERE
+        i.environment_id = case.environment_key AND (case.predicate_sql))`
+        (or the dialect's equivalent). Returns `pair_id -> is_match`."""