Skip to content

Commit 4a33ce3

Browse files
committed
test(ClickHouse): Engine-parity harness on docker-compose ClickHouse
beep boop
1 parent fd2c9c7 commit 4a33ce3

5 files changed

Lines changed: 482 additions & 0 deletions

File tree

tests/conftest.py

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
"""Pytest fixtures.
2+
3+
Unit tests run anywhere. The engine-parity suite drives a
4+
`DialectTestHarness` (see `tests/harnesses`) — one harness per SQL
5+
engine, registered in `HARNESSES`. Each harness owns its own session,
6+
scratch-table DDL, and batched INSERT / SELECT shapes.
7+
8+
Per harness session, the fixtures:
9+
10+
- load every case-with-identity into the harness's scratch table in
11+
one batched INSERT (`harness_identity_table`);
12+
- translate each (case, segment) pair against the harness's dialect
13+
and ask the harness to evaluate them all in one batched SELECT
14+
(`harness_results`).
15+
16+
Parametrised tests do an in-memory dict lookup against that result.
17+
"""
18+
19+
import copy
20+
import json
21+
from collections.abc import Iterator
22+
from pathlib import Path
23+
from typing import Any, TypedDict, cast
24+
25+
import json5
26+
import pytest
27+
from flag_engine.context.types import EvaluationContext, IdentityContext, SegmentContext
28+
from flag_engine.result.types import EvaluationResult
29+
30+
from flagsmith_sql_flag_engine import TranslateContext, translate_segment
31+
from tests.harnesses import (
32+
HARNESSES,
33+
DialectTestHarness,
34+
EvaluationCase,
35+
IdentityRow,
36+
)
37+
38+
39+
class EngineTestCase(TypedDict):
40+
"""An engine-test-data fixture file. The `result` field (engine-evaluated
41+
flag values) is carried through but unused by the engine-parity suite."""
42+
43+
name: str
44+
context: EvaluationContext
45+
result: EvaluationResult
46+
47+
48+
class SegmentEngineTestCase(EngineTestCase):
49+
segment_key: str
50+
segment_context: SegmentContext
51+
52+
53+
class SegmentTestResult(TypedDict):
54+
"""A match result for a given segment."""
55+
56+
test_case_name: str
57+
segment_key: str
58+
is_match: bool
59+
60+
61+
REPO_ROOT = Path(__file__).resolve().parents[1]
62+
ENGINE_TEST_DATA = REPO_ROOT / "engine-test-data" / "test_cases"
63+
TEST_CASE_PATHS: list[Path] = sorted(
64+
[*ENGINE_TEST_DATA.glob("*.json"), *ENGINE_TEST_DATA.glob("*.jsonc")]
65+
)
66+
TEST_CASES: list[EngineTestCase] = [
67+
{
68+
"name": p.stem,
69+
"context": (raw := json5.loads(p.read_text()))["context"],
70+
"result": raw["result"],
71+
}
72+
for p in TEST_CASE_PATHS
73+
]
74+
SEGMENT_TEST_CASES: list[SegmentEngineTestCase] = [
75+
{
76+
**test_case,
77+
"segment_key": segment_key,
78+
"segment_context": segment_context,
79+
}
80+
for test_case in TEST_CASES
81+
for segment_key, segment_context in (test_case["context"].get("segments") or {}).items()
82+
]
83+
84+
85+
# ASCII Unit Separator. Used to pack `(case_name, segment_key)` into a
86+
# single string column on the engine-parity SELECT and split it back at
87+
# row-iteration time. Picked over `:` (or any printable character) so a
88+
# future case_name or segment_key containing punctuation can't collide.
89+
_PAIR_SEP = "\x1f"
90+
91+
92+
@pytest.fixture(scope="session", params=HARNESSES, ids=lambda h: h.name)
93+
def harness(request: pytest.FixtureRequest) -> DialectTestHarness:
94+
return cast(DialectTestHarness, request.param)
95+
96+
97+
@pytest.fixture(scope="session")
98+
def harness_session(harness: DialectTestHarness) -> Iterator[Any]:
99+
with harness.session() as sess:
100+
yield sess
101+
102+
103+
@pytest.fixture(scope="session")
104+
def harness_identities() -> list[EngineTestCase]:
105+
"""Deep-copied cases with `environment.key` suffixed for cross-case
106+
uniqueness. Same shape across harnesses (the suffixing is dialect-
107+
agnostic), so the fixture is computed once per pytest session."""
108+
overridden: list[EngineTestCase] = []
109+
for identity_id, case in enumerate(TEST_CASES, start=1):
110+
case = copy.deepcopy(case)
111+
case["context"]["environment"]["key"] += str(identity_id)
112+
overridden.append(case)
113+
return overridden
114+
115+
116+
@pytest.fixture(scope="session")
117+
def harness_identity_table(
118+
harness: DialectTestHarness,
119+
harness_session: Any,
120+
harness_identities: list[EngineTestCase],
121+
) -> str:
122+
"""Per-harness scratch IDENTITIES table loaded with one row per case-
123+
with-identity. Cases without an identity get no row — their segments
124+
compile to row-independent SQL, so the empty result still gives the
125+
right answer."""
126+
rows: list[IdentityRow] = []
127+
for identity_id, case in enumerate(harness_identities, start=1):
128+
ctx = case["context"]
129+
identity: IdentityContext | None = ctx.get("identity")
130+
if not identity:
131+
continue
132+
traits = identity.get("traits")
133+
rows.append(
134+
IdentityRow(
135+
environment_id=ctx["environment"]["key"],
136+
id=identity_id,
137+
identifier=identity.get("identifier") or "",
138+
identity_key=identity.get("key") or "",
139+
traits_json=json.dumps(traits) if traits else None,
140+
)
141+
)
142+
return harness.setup_identities(harness_session, rows)
143+
144+
145+
@pytest.fixture(scope="session")
146+
def harness_results(
147+
harness: DialectTestHarness,
148+
harness_session: Any,
149+
harness_identity_table: str,
150+
harness_identities: list[EngineTestCase],
151+
) -> dict[tuple[str, str], SegmentTestResult]:
152+
"""Run every (case, segment) pair's translated SQL through the harness
153+
in one batched query.
154+
155+
Returns a `(case_name, segment_key) -> SegmentTestResult` dict. Every
156+
case in the dataset compiles today; cases that need to fall back to
157+
the engine are listed in the harness's `xfail_case_names` rather than
158+
carrying a third state on the way through.
159+
"""
160+
cases: list[EvaluationCase] = []
161+
for case in harness_identities:
162+
ctx = case["context"]
163+
env_key = ctx["environment"]["key"]
164+
for segment_key, segment in (ctx.get("segments") or {}).items():
165+
translate_ctx = TranslateContext(
166+
evaluation_context=ctx,
167+
dialect=harness.dialect,
168+
)
169+
sql = translate_segment(segment, translate_ctx)
170+
assert sql is not None, (
171+
f"case {case['name']} seg {segment_key} unsupported on {harness.name} — "
172+
"either fix the translator or add the case name to the harness's "
173+
"xfail_case_names"
174+
)
175+
cases.append(
176+
EvaluationCase(
177+
pair_id=case["name"] + _PAIR_SEP + segment_key,
178+
environment_key=env_key,
179+
predicate_sql=sql,
180+
)
181+
)
182+
183+
raw = harness.evaluate(harness_session, harness_identity_table, cases)
184+
results: dict[tuple[str, str], SegmentTestResult] = {}
185+
for pair_id, is_match in raw.items():
186+
case_name, segment_key = pair_id.split(_PAIR_SEP, 1)
187+
results[(case_name, segment_key)] = SegmentTestResult(
188+
test_case_name=case_name,
189+
segment_key=segment_key,
190+
is_match=is_match,
191+
)
192+
return results

tests/harnesses/__init__.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
"""Engine-parity test harnesses — one per SQL engine.
2+
3+
The conftest fixtures are parametrised over `HARNESSES`; adding a new
4+
dialect means writing one harness module and appending it here.
5+
"""
6+
7+
from tests.harnesses._base import (
8+
DialectTestHarness,
9+
EvaluationCase,
10+
IdentityRow,
11+
)
12+
from tests.harnesses.clickhouse import ClickHouseHarness
13+
14+
HARNESSES: list[DialectTestHarness] = [ClickHouseHarness()]
15+
16+
__all__ = [
17+
"DialectTestHarness",
18+
"EvaluationCase",
19+
"HARNESSES",
20+
"IdentityRow",
21+
]

tests/harnesses/_base.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
"""Engine-parity test harness protocol.
2+
3+
Each `DialectTestHarness` adapts the engine-parity suite to one SQL
4+
engine. The conftest fixtures are parametrised over the registered
5+
harnesses; a test does an in-memory dict lookup against the harness's
6+
results.
7+
"""
8+
9+
from contextlib import AbstractContextManager
10+
from dataclasses import dataclass
11+
from typing import Any, Protocol
12+
13+
from flagsmith_sql_flag_engine.dialect import Dialect
14+
15+
16+
@dataclass(frozen=True)
17+
class IdentityRow:
18+
"""One row to seed into the harness's scratch IDENTITIES table.
19+
20+
Mirrors the canonical IDENTITIES schema: 4 typed columns plus a
21+
`traits` payload pre-serialised to JSON (None when the source case
22+
has no traits)."""
23+
24+
environment_id: str
25+
id: int
26+
identifier: str
27+
identity_key: str
28+
traits_json: str | None
29+
30+
31+
@dataclass(frozen=True)
32+
class EvaluationCase:
33+
"""One (case, segment) predicate to run against the scratch table.
34+
`pair_id` is an opaque round-trip key the harness echoes back in its
35+
result dict — the conftest uses it to recover the (case_name,
36+
segment_key) tuple."""
37+
38+
pair_id: str
39+
environment_key: str
40+
predicate_sql: str
41+
42+
43+
class DialectTestHarness(Protocol):
44+
"""Adapter for running the engine-parity suite against one SQL engine.
45+
46+
Concrete harnesses own session/connection setup, scratch-table DDL,
47+
INSERT batching, and the (case, segment) mega-SELECT shape — all the
48+
bits that vary by SQL engine.
49+
"""
50+
51+
name: str
52+
dialect: Dialect
53+
xfail_case_names: set[str]
54+
55+
def session(self) -> AbstractContextManager[Any]:
56+
"""Open a session/connection. Caller manages lifecycle via ctx-mgr."""
57+
58+
def setup_identities(self, session: Any, rows: list[IdentityRow]) -> str:
59+
"""Create scratch IDENTITIES table on `session`, batch-INSERT all
60+
`rows`, return the fully-qualified table name."""
61+
62+
def evaluate(
63+
self,
64+
session: Any,
65+
identity_table: str,
66+
cases: list[EvaluationCase],
67+
) -> dict[str, bool]:
68+
"""Run all `cases` as one batched query. Each case translates to
69+
`EXISTS (SELECT 1 FROM identity_table i WHERE
70+
i.environment_id = case.environment_key AND (case.predicate_sql))`
71+
(or the dialect's equivalent). Returns `pair_id -> is_match`."""

0 commit comments

Comments
 (0)