Skip to content

Commit 6de406d

Browse files
committed
fix(translator): Hash identity-key percentage splits without a context identity
A PERCENTAGE_SPLIT keyed on the identity (implicit `$.identity.key`, `$.identity.key`, or `$.identity.identifier`) compiled to FALSE when the evaluation context carried no identity. The translated predicate runs per-row over IDENTITIES, where the identity-key and identifier columns are always present, so it should hash the row column regardless of whether the row-oriented context supplies an identity. The guard made row-oriented callers (segment-membership counts/members) see every percentage-split segment as empty. Drop the eval-context identity guard for those three cases and emit the per-row hash. Trait-keyed splits are unchanged. This intentionally diverges from flag_engine's single-eval "no identity -> False" for the identity-less context, which is correct for the row-oriented SQL engine where every row is an identity; the one engine-test-data case that exercises it (test_percentage_split__no_identity_key__should_match) is added to the ClickHouse harness's _XFAIL_CASE_NAMES. The translator unit tests assert the full emitted SQL. beep boop Claude-Session: https://claude.ai/code/session_01EgZ5iHpDASZzCapiHRxHLB
1 parent cfc3d4e commit 6de406d

3 files changed

Lines changed: 57 additions & 24 deletions

File tree

src/flagsmith_sql_flag_engine/translator.py

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -435,19 +435,14 @@ def translate_condition(cond: SegmentCondition, ctx: TranslateContext) -> str |
435435
identity: dict[str, object] = ctx.evaluation_context.get("identity") or {} # type: ignore[assignment]
436436
kind = classification.kind
437437
if not prop:
438-
# Implicit `$.identity.key` — engine returns False when no
439-
# identity, or when the identity lacks `key`. The engine
440-
# never synthesises one from env+identifier.
441-
if not identity.get("key"):
442-
return "FALSE"
438+
# Implicit `$.identity.key`. The key is always present in the store,
439+
# so the split is translatable whether or not the evaluation context
440+
# carries an identity. This intentionally diverges from the engine's
441+
# "no identity → False" verdict.
443442
value_expr = ctx.dialect.cast_string(ctx.identity_key_expr)
444443
elif kind == "key":
445-
if not identity.get("key"):
446-
return "FALSE"
447444
value_expr = ctx.dialect.cast_string(ctx.jsonpath_expr("$.identity.key"))
448445
elif kind == "identifier":
449-
if not identity.get("identifier"):
450-
return "FALSE"
451446
value_expr = ctx.dialect.cast_string(ctx.jsonpath_expr("$.identity.identifier"))
452447
elif kind == "identity_object":
453448
# PERCENTAGE_SPLIT on `$.identity` — the whole dict. Engine

tests/harnesses/clickhouse.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,12 @@
3939
# condition, so we accept the divergence on this niche shape
4040
# (`$.`-prefixed trait names) and let callers fall back to the engine.
4141
"test_jsonpath_like_trait__existing_jsonpath__should_match_trait",
42+
# Engine returns False for a PERCENTAGE_SPLIT when the eval context has
43+
# no identity key. The SQL engine intentionally diverges: it runs over
44+
# IDENTITIES rows where the identity key always exists, so it hashes the
45+
# row's key rather than folding to FALSE — the behaviour row-oriented
46+
# callers (segment-membership counts/members) need.
47+
"test_percentage_split__no_identity_key__should_match",
4248
}
4349

4450

tests/test_translator_unit.py

Lines changed: 47 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -542,9 +542,10 @@ def _ctx_identity_without(field: str) -> TranslateContext:
542542
)
543543

544544

545-
def test_translate_segment__percentage_split_implicit_key_no_identity__compiles_to_false() -> None:
546-
# Given a PERCENTAGE_SPLIT with no property (implicit `$.identity.key`) and an eval
547-
# context with no identity
545+
def test_translate_segment__percentage_split_implicit_key_no_identity__inlines_md5() -> None:
546+
# Given a PERCENTAGE_SPLIT with no property (implicit `$.identity.key`) and an
547+
# eval context with no identity — the row-oriented case (e.g. segment-membership
548+
# counting over the whole IDENTITIES table)
548549
seg: SegmentContext = {
549550
"key": "ps2",
550551
"name": "s",
@@ -556,12 +557,25 @@ def test_translate_segment__percentage_split_implicit_key_no_identity__compiles_
556557
],
557558
}
558559

559-
# When / Then the predicate collapses to FALSE — engine returns False without identity
560-
assert translate_segment(seg, _ctx_no_identity()) == "((FALSE))"
560+
# When translated
561+
sql = translate_segment(seg, _ctx_no_identity())
562+
563+
# Then it hashes the per-row identity-key column rather than folding to
564+
# FALSE — the row supplies the subject, so a context identity is not needed
565+
assert sql == (
566+
"(((modulo(reinterpretAsUInt32(reverse(substring(MD5('ps2' || ',' ||"
567+
" (toString(i.identity_key))), 1, 4))) * 7291 +"
568+
" reinterpretAsUInt32(reverse(substring(MD5('ps2' || ',' ||"
569+
" (toString(i.identity_key))), 5, 4))) * 1897 +"
570+
" reinterpretAsUInt32(reverse(substring(MD5('ps2' || ',' ||"
571+
" (toString(i.identity_key))), 9, 4))) * 6835 +"
572+
" reinterpretAsUInt32(reverse(substring(MD5('ps2' || ',' ||"
573+
" (toString(i.identity_key))), 13, 4))), 9999) / 9998.0 * 100.0 <= 50.0)))"
574+
)
561575

562576

563-
def test_translate_segment__percentage_split_identity_key_missing__compiles_to_false() -> None:
564-
# Given the eval context's identity has no `key`
577+
def test_translate_segment__percentage_split_key_no_context_identity__inlines_md5() -> None:
578+
# Given a PERCENTAGE_SPLIT on `$.identity.key` and a context identity without `key`
565579
seg: SegmentContext = {
566580
"key": "ps3",
567581
"name": "s",
@@ -575,14 +589,23 @@ def test_translate_segment__percentage_split_identity_key_missing__compiles_to_f
575589
],
576590
}
577591

578-
# When / Then the predicate collapses to FALSE
579-
assert translate_segment(seg, _ctx_identity_without("key")) == "((FALSE))"
592+
# When / Then it still hashes the per-row identity-key column
593+
sql = translate_segment(seg, _ctx_identity_without("key"))
594+
assert sql == (
595+
"(((modulo(reinterpretAsUInt32(reverse(substring(MD5('ps3' || ',' ||"
596+
" (toString(i.identity_key))), 1, 4))) * 7291 +"
597+
" reinterpretAsUInt32(reverse(substring(MD5('ps3' || ',' ||"
598+
" (toString(i.identity_key))), 5, 4))) * 1897 +"
599+
" reinterpretAsUInt32(reverse(substring(MD5('ps3' || ',' ||"
600+
" (toString(i.identity_key))), 9, 4))) * 6835 +"
601+
" reinterpretAsUInt32(reverse(substring(MD5('ps3' || ',' ||"
602+
" (toString(i.identity_key))), 13, 4))), 9999) / 9998.0 * 100.0 <= 50.0)))"
603+
)
580604

581605

582-
def test_translate_segment__percentage_split_identity_identifier_missing__compiles_to_false() -> (
583-
None
584-
):
585-
# Given the eval context's identity has no `identifier`
606+
def test_translate_segment__percentage_split_identifier_no_context_identity__inlines_md5() -> None:
607+
# Given a PERCENTAGE_SPLIT on `$.identity.identifier` and a context identity
608+
# without `identifier`
586609
seg: SegmentContext = {
587610
"key": "ps4",
588611
"name": "s",
@@ -600,8 +623,17 @@ def test_translate_segment__percentage_split_identity_identifier_missing__compil
600623
],
601624
}
602625

603-
# When / Then the predicate collapses to FALSE
604-
assert translate_segment(seg, _ctx_identity_without("identifier")) == "((FALSE))"
626+
# When / Then it still hashes the per-row identifier column
627+
sql = translate_segment(seg, _ctx_identity_without("identifier"))
628+
assert sql == (
629+
"(((modulo(reinterpretAsUInt32(reverse(substring(MD5('ps4' || ',' ||"
630+
" (toString(i.identifier))), 1, 4))) * 7291 +"
631+
" reinterpretAsUInt32(reverse(substring(MD5('ps4' || ',' || (toString(i.identifier))),"
632+
" 5, 4))) * 1897 + reinterpretAsUInt32(reverse(substring(MD5('ps4' || ',' ||"
633+
" (toString(i.identifier))), 9, 4))) * 6835 +"
634+
" reinterpretAsUInt32(reverse(substring(MD5('ps4' || ',' || (toString(i.identifier))),"
635+
" 13, 4))), 9999) / 9998.0 * 100.0 <= 50.0)))"
636+
)
605637

606638

607639
def test_translate_segment__percentage_split_unknown_jsonpath__returns_none() -> None:

0 commit comments

Comments
 (0)