Skip to content

Commit dc5ae0d

Browse files
authored
feat: Bind segment values as query parameters (#20)
* feat: Bind segment values as query parameters Add an optional Binder on TranslateContext that promotes value-bearing literals to bound query parameters instead of inlining them, so no user value (notably a regex `%`) reaches a driver that substitutes parameters by %-formatting the query text. With no Binder the output is unchanged. beep boop * test: Add missing When marker to regex binder test beep boop
1 parent fd677d2 commit dc5ae0d

10 files changed

Lines changed: 565 additions & 27 deletions

File tree

README.md

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,36 @@ translator can't handle — typically a REGEX pattern the active dialect's
5050
regex flavour can't compile. Callers should fall back to
5151
`flag_engine.is_context_in_segment` for those segments.
5252

53+
## Bound parameters
54+
55+
By default the translator inlines each segment value as an escaped SQL string literal. Pass a `Binder` on the `TranslateContext` to bind value-bearing literals as query parameters instead.
56+
57+
```python
58+
from flagsmith_sql_flag_engine import (
59+
Binder,
60+
PyformatParamStyle,
61+
TranslateContext,
62+
translate_segment,
63+
)
64+
from flagsmith_sql_flag_engine.dialects import ClickHouseDialect
65+
66+
binder = Binder(PyformatParamStyle())
67+
ctx = TranslateContext(
68+
evaluation_context=eval_context,
69+
dialect=ClickHouseDialect(),
70+
binder=binder,
71+
)
72+
where_expr = translate_segment(segment, ctx)
73+
```
74+
75+
Hand both to the driver:
76+
77+
```python
78+
cursor.execute(f"... WHERE ({where_expr})", binder.params)
79+
```
80+
81+
Currently, `%`-prefixed style `PyformatParamStyle` and ClickHouse-specific `ClickHouseServerParamStyle` are supported.
82+
5383
## Schema
5484

5585
Each dialect publishes the table layout it expects via a `schema_ddl`

src/flagsmith_sql_flag_engine/__init__.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,22 @@
44
translate_segment(segment, ctx) -> str | None
55
TranslateContext
66
7+
By default the translator inlines each segment value as an escaped SQL
8+
string literal. Pass a `Binder` on the `TranslateContext` to bind
9+
value-bearing literals as query parameters instead — read its params off
10+
`Binder.params` after translation. See `flagsmith_sql_flag_engine.binder`.
11+
712
See README.md for usage. The translator is dialect-aware via the `Dialect`
813
protocol; `flagsmith_sql_flag_engine.dialects.clickhouse.ClickHouseDialect`
914
is the only implementation today.
1015
"""
1116

17+
from flagsmith_sql_flag_engine.binder import (
18+
Binder,
19+
ClickHouseServerParamStyle,
20+
ParamStyle,
21+
PyformatParamStyle,
22+
)
1223
from flagsmith_sql_flag_engine.dialect import Dialect
1324
from flagsmith_sql_flag_engine.translator import (
1425
TRANSLATABLE_OPERATORS,
@@ -20,7 +31,11 @@
2031

2132
__all__ = [
2233
"TRANSLATABLE_OPERATORS",
34+
"Binder",
35+
"ClickHouseServerParamStyle",
2336
"Dialect",
37+
"ParamStyle",
38+
"PyformatParamStyle",
2439
"TranslateContext",
2540
"translate_condition",
2641
"translate_rule",
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
from typing import Protocol
2+
3+
4+
class ParamStyle(Protocol):
5+
"""A driver's placeholder syntax for a named bound parameter."""
6+
7+
def placeholder(self, name: str) -> str:
8+
"""The placeholder token referencing bound parameter `name`."""
9+
...
10+
11+
12+
class PyformatParamStyle:
13+
"""`%(name)s`
14+
15+
Used by `clickhouse-driver` which substitutes parameters
16+
client-side via `query % params`."""
17+
18+
def placeholder(self, name: str) -> str:
19+
return f"%({name})s"
20+
21+
22+
class ClickHouseServerParamStyle:
23+
"""`{name:String}`
24+
25+
ClickHouse's native server-side parameter syntax,
26+
used by `clickhouse-connect`."""
27+
28+
def placeholder(self, name: str) -> str:
29+
return "{" + name + ":String}"
30+
31+
32+
class Binder:
33+
"""Collects bound parameter values and mints their placeholders.
34+
35+
Not thread-safe; use one `Binder` per predicate translation.
36+
"""
37+
38+
def __init__(self, style: ParamStyle, prefix: str = "") -> None:
39+
self.params: dict[str, str] = {}
40+
self._style = style
41+
self._prefix = prefix
42+
self._count = 0
43+
44+
def add(self, value: str) -> str:
45+
"""Record `value` under a fresh namespaced name and return its
46+
placeholder token for the active paramstyle."""
47+
name = f"{self._prefix}p{self._count}"
48+
self._count += 1
49+
self.params[name] = value
50+
return self._style.placeholder(name)

src/flagsmith_sql_flag_engine/dialect.py

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,19 @@
33

44
from typing import Protocol
55

6+
from flagsmith_sql_flag_engine.binder import Binder
7+
68

79
class Dialect(Protocol):
810
"""Per-dialect SQL fragments.
911
1012
Methods return SQL string fragments. Inputs are already-formatted SQL
1113
strings (column refs, string literals); the dialect only chooses the
1214
right syntax for the operation.
15+
16+
Methods that embed a segment- or context-derived value take an
17+
optional `binder`: when provided, the value is emitted as a bound
18+
query parameter rather than an inline literal.
1319
"""
1420

1521
name: str # human-readable, used in test ids and error messages
@@ -35,7 +41,14 @@ def trait_path(self, alias: str, trait_key: str) -> str:
3541
"""
3642
...
3743

38-
def trait_eq(self, alias: str, trait_key: str, value: object, negate: bool) -> str:
44+
def trait_eq(
45+
self,
46+
alias: str,
47+
trait_key: str,
48+
value: object,
49+
negate: bool,
50+
binder: Binder | None = None,
51+
) -> str:
3952
"""Type-aware EQUAL / NOT_EQUAL predicate on a trait, mirroring
4053
`flag_engine`'s per-type coercion: the segment value is cast to
4154
the trait's runtime type before compare, and a cast failure
@@ -45,7 +58,13 @@ def trait_eq(self, alias: str, trait_key: str, value: object, negate: bool) -> s
4558
"""
4659
...
4760

48-
def trait_in(self, alias: str, trait_key: str, items: list[str]) -> str:
61+
def trait_in(
62+
self,
63+
alias: str,
64+
trait_key: str,
65+
items: list[str],
66+
binder: Binder | None = None,
67+
) -> str:
4968
"""Type-aware IN predicate on a trait, mirroring engine semantics:
5069
string trait does direct lookup; integer trait stringifies and
5170
looks up; other trait types never match. `items` is the parsed
@@ -77,14 +96,19 @@ def regex_supports(self, pattern: str) -> bool:
7796
to `flag_engine`."""
7897
...
7998

80-
def regexp_anchored_match(self, value_expr: str, pattern: str) -> str:
99+
def regexp_anchored_match(
100+
self,
101+
value_expr: str,
102+
pattern: str,
103+
binder: Binder | None = None,
104+
) -> str:
81105
"""Boolean: equivalent to Python `re.match(pattern, value)` —
82106
anchored at position 0, may be a prefix of the value, not a
83107
full-match.
84108
85-
`pattern` is the raw Python regex string; the dialect handles
86-
its own escaping into a SQL literal, since regex flavours
87-
differ in how backslashes are treated."""
109+
`pattern` is the raw Python regex string. With no `binder`, the
110+
dialect handles its own escaping into a SQL literal, since regex
111+
flavours differ in how backslashes are treated."""
88112
...
89113

90114
def regexp_nth_digit_run(self, value_expr: str, n: int) -> str:

src/flagsmith_sql_flag_engine/dialects/clickhouse.py

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,8 @@
7676
ClickHouse Cloud as of 25.12 (no longer experimental on OSS 25.x).
7777
Callers should apply this setting at session creation."""
7878

79-
from flagsmith_sql_flag_engine.utils import re2_safe, string_literal
79+
from flagsmith_sql_flag_engine.binder import Binder
80+
from flagsmith_sql_flag_engine.utils import bind_or_inline, re2_safe
8081

8182
SCHEMA_DDL = """\
8283
CREATE TABLE IF NOT EXISTS IDENTITIES (
@@ -154,10 +155,17 @@ def trait_path(self, alias: str, trait_key: str) -> str:
154155
sub = self._sub(alias, trait_key)
155156
return f"if({sub} IS NULL, NULL, toString({sub}))"
156157

157-
def trait_eq(self, alias: str, trait_key: str, value: object, negate: bool) -> str:
158+
def trait_eq(
159+
self,
160+
alias: str,
161+
trait_key: str,
162+
value: object,
163+
negate: bool,
164+
binder: Binder | None = None,
165+
) -> str:
158166
sub = self._sub(alias, trait_key)
159167
str_value = str(value)
160-
str_lit = string_literal(str_value)
168+
str_lit = bind_or_inline(binder, str_value)
161169
# Engine bool cast: `v not in ("False", "false")`. A JSON true matches
162170
# every segment value except literal "False" / "false"; those two coerce
163171
# to False and match a JSON false.
@@ -223,7 +231,13 @@ def trait_eq(self, alias: str, trait_key: str, value: object, negate: bool) -> s
223231
f"(({str_sub} IS NOT NULL AND {str_sub} <> {str_lit}) OR {bool_branch} OR {num_branch})"
224232
)
225233

226-
def trait_in(self, alias: str, trait_key: str, items: list[str]) -> str:
234+
def trait_in(
235+
self,
236+
alias: str,
237+
trait_key: str,
238+
items: list[str],
239+
binder: Binder | None = None,
240+
) -> str:
227241
# `toString(<sub>)` returns the canonical string form for any JSON
228242
# value type in a single subcolumn read. Engine semantics only
229243
# match String and integer trait types — bool / float / array
@@ -235,7 +249,7 @@ def trait_in(self, alias: str, trait_key: str, items: list[str]) -> str:
235249
bool_sub = f"{sub}.:Bool"
236250
float_sub = f"{sub}.:Float64"
237251
str_path = f"toString({sub})"
238-
item_lits = ",".join(string_literal(v) for v in items)
252+
item_lits = ",".join(bind_or_inline(binder, v) for v in items)
239253
return f"({bool_sub} IS NULL AND {float_sub} IS NULL AND {str_path} IN ({item_lits}))"
240254

241255
# ----- string operations -----
@@ -267,13 +281,21 @@ def _regex_literal(pattern: str) -> str:
267281
doubled = pattern.replace("\\", "\\\\").replace("'", "''")
268282
return f"'{doubled}'"
269283

270-
def regexp_anchored_match(self, value_expr: str, pattern: str) -> str:
284+
def regexp_anchored_match(
285+
self, value_expr: str, pattern: str, binder: Binder | None = None
286+
) -> str:
271287
# `match` is RE2 but unanchored — equivalent to `re.search`. Prepend
272288
# `^` to get `re.match` semantics (start-anchored, prefix-allowed).
273289
# Wrapping in `(...)` keeps the user's top-level alternation from
274290
# binding tighter than the anchor.
275291
anchored = "^(" + pattern + ")"
276-
return f"match({_non_null(value_expr)}, {self._regex_literal(anchored)})"
292+
# Bind the raw pattern when a binder is active: the driver escapes
293+
# it, and — crucially — no `%` from a character class like
294+
# `[a-z%]` lands in the query text to trip a `%`-substituting
295+
# driver. Inline, `_regex_literal` doubles backslashes so RE2 sees
296+
# the pattern the segment author wrote.
297+
pattern_lit = binder.add(anchored) if binder is not None else self._regex_literal(anchored)
298+
return f"match({_non_null(value_expr)}, {pattern_lit})"
277299

278300
def regexp_nth_digit_run(self, value_expr: str, n: int) -> str:
279301
# `extractAll` returns the matches array; subscript is 1-indexed

0 commit comments

Comments
 (0)