perf: Speed up local-evaluation hot path for large environments

khvn26 · khvn26 · commit 7b59ab37df5a · 2026-04-24T12:56:25.000+01:00
Addresses the regression reported in Flagsmith/flagsmith-python-client#198: for a 262-feature environment with ~10% multivariate features, local evaluation is ~32% faster (~115 us -> ~78 us per call on an M-series Mac). Changes: - Hoist `_get_identity_key` out of the per-feature loop in `evaluate_features`. The identity key is invariant across features in a single evaluation, so we now resolve it once per `get_evaluation_result` call instead of once per feature. - Inline the per-feature flag-result construction formerly done by `get_flag_result_from_context`. The public helper is retained as a thin wrapper so existing callers / mocks still work. - Localise hot-loop references (`hash_fn`, `segment_overrides.get`, variant priority key) to avoid chasing module globals per iteration. - Add a two-key fast path `get_hashed_percentage_for_object_id_pair` for variant selection and `PERCENTAGE_SPLIT` conditions, skipping the iterable / list wrapping the generic helper performs on every call. Also adds a 262-feature synthetic benchmark alongside the existing 5-feature ones so CodSpeed can catch regressions that only appear at scale. beep boop
diff --git a/flag_engine/segments/evaluator.py b/flag_engine/segments/evaluator.py
@@ -31,7 +31,7 @@
     is_context_value,
 )
 from flag_engine.segments.utils import get_matching_function
-from flag_engine.utils.hashing import get_hashed_percentage_for_object_ids
+from flag_engine.utils.hashing import get_hashed_percentage_for_object_id_pair
 from flag_engine.utils.semver import is_semver
 from flag_engine.utils.types import SupportsStr, get_casting_function
 
@@ -58,8 +58,9 @@ def get_evaluation_result(
     :return: EvaluationResult containing the context, flags, and segments
     """
     context = get_enriched_context(context)
+    identity_key = _get_identity_key(context)
     segments, segment_overrides = evaluate_segments(context)
-    flags = evaluate_features(context, segment_overrides)
+    flags = evaluate_features(context, segment_overrides, identity_key=identity_key)
 
     return {
         "flags": flags,
@@ -138,26 +139,57 @@ def evaluate_segments(
 def evaluate_features(
     context: EvaluationContext[typing.Any, FeatureMetadataT],
     segment_overrides: SegmentOverrides[FeatureMetadataT],
+    *,
+    identity_key: typing.Optional[str] = None,
 ) -> dict[str, FlagResult[FeatureMetadataT]]:
     if not (features := context.get("features")):
         return {}
 
+    # ``identity_key`` is invariant across all features in a single evaluation.
+    # Resolving it here (or accepting it from the caller) means the per-feature
+    # hot loop below doesn't have to re-walk ``context["identity"]`` N times.
+    if identity_key is None:
+        identity_key = _get_identity_key(context)
+
+    # Localise loop dependencies once so the tight per-feature loop doesn't
+    # chase module globals on every iteration. ``_build_flag_result`` is
+    # inlined below for environments with many features (e.g. 250+), where
+    # the function-call overhead is otherwise ~15% of per-call time.
+    hash_fn = get_hashed_percentage_for_object_id_pair
+    overrides_get = segment_overrides.get
+
     flags: dict[str, FlagResult[FeatureMetadataT]] = {}
+    for feature_name, feature_context in features.items():
+        if segment_override := overrides_get(feature_name):
+            effective_feature_context = segment_override["feature_context"]
+            reason = f"TARGETING_MATCH; segment={segment_override['segment_name']}"
+        else:
+            effective_feature_context = feature_context
+            reason = "DEFAULT"
 
-    for feature_context in features.values():
-        feature_name = feature_context["name"]
-        if segment_override := segment_overrides.get(feature_name):
-            flags[feature_name] = get_flag_result_from_context(
-                context=context,
-                feature_context=segment_override["feature_context"],
-                reason=f"TARGETING_MATCH; segment={segment_override['segment_name']}",
-            )
-            continue
-        flags[feature_name] = get_flag_result_from_context(
-            context=context,
-            feature_context=context["features"][feature_name],
-            reason="DEFAULT",
-        )
+        value: typing.Any = effective_feature_context["value"]
+        if identity_key is not None and (
+            variants := effective_feature_context.get("variants")
+        ):
+            percentage_value = hash_fn(effective_feature_context["key"], identity_key)
+            start_percentage = 0.0
+            for variant in sorted(variants, key=_variant_priority):
+                limit = (weight := variant["weight"]) + start_percentage
+                if start_percentage <= percentage_value < limit:
+                    value = variant["value"]
+                    reason = f"SPLIT; weight={weight}"
+                    break
+                start_percentage = limit
+
+        flag_result: FlagResult[FeatureMetadataT] = {
+            "enabled": effective_feature_context["enabled"],
+            "name": effective_feature_context["name"],
+            "reason": reason,
+            "value": value,
+        }
+        if metadata := effective_feature_context.get("metadata"):
+            flag_result["metadata"] = metadata
+        flags[feature_name] = flag_result
 
     return flags
 
@@ -176,47 +208,38 @@ def get_flag_result_from_context(
     :param reason: reason to use when no variant selected
     :return: the value for the feature name in the evaluation context
     """
-    key = _get_identity_key(context)
+    identity_key = _get_identity_key(context)
+    value: typing.Any = feature_context["value"]
 
-    flag_result: typing.Optional[FlagResult[FeatureMetadataT]] = None
-
-    if key is not None and (variants := feature_context.get("variants")):
-        percentage_value = get_hashed_percentage_for_object_ids(
-            [feature_context["key"], key]
+    if identity_key is not None and (variants := feature_context.get("variants")):
+        percentage_value = get_hashed_percentage_for_object_id_pair(
+            feature_context["key"], identity_key
         )
-
         start_percentage = 0.0
-
-        for variant in sorted(
-            variants,
-            key=operator.itemgetter("priority"),
-        ):
+        for variant in sorted(variants, key=_variant_priority):
             limit = (weight := variant["weight"]) + start_percentage
             if start_percentage <= percentage_value < limit:
-                flag_result = {
-                    "enabled": feature_context["enabled"],
-                    "name": feature_context["name"],
-                    "reason": f"SPLIT; weight={weight}",
-                    "value": variant["value"],
-                }
+                value = variant["value"]
+                reason = f"SPLIT; weight={weight}"
                 break
-
             start_percentage = limit
 
-    if flag_result is None:
-        flag_result = {
-            "enabled": feature_context["enabled"],
-            "name": feature_context["name"],
-            "reason": reason,
-            "value": feature_context["value"],
-        }
-
+    flag_result: FlagResult[FeatureMetadataT] = {
+        "enabled": feature_context["enabled"],
+        "name": feature_context["name"],
+        "reason": reason,
+        "value": value,
+    }
     if metadata := feature_context.get("metadata"):
         flag_result["metadata"] = metadata
-
     return flag_result
 
 
+def _variant_priority(variant: typing.Mapping[str, typing.Any]) -> int:
+    priority: int = variant["priority"]
+    return priority
+
+
 def is_context_in_segment(
     context: _EvaluationContextAnyMeta,
     segment_context: SegmentContext[typing.Any, typing.Any],
@@ -304,14 +327,14 @@ def context_matches_condition(
     if condition_operator == constants.PERCENTAGE_SPLIT:
         if context_value is None:
             return False
-
-        object_ids = [segment_key, context_value]
-
         try:
             float_value = float(condition["value"])
         except ValueError:
             return False
-        return get_hashed_percentage_for_object_ids(object_ids) <= float_value
+        return (
+            get_hashed_percentage_for_object_id_pair(segment_key, context_value)
+            <= float_value
+        )
 
     if condition_operator == constants.IS_NOT_SET:
         return context_value is None
diff --git a/flag_engine/utils/hashing.py b/flag_engine/utils/hashing.py
@@ -31,3 +31,24 @@ def get_hashed_percentage_for_object_ids(
         )
 
     return value
+
+
+def get_hashed_percentage_for_object_id_pair(
+    first: SupportsStr,
+    second: SupportsStr,
+) -> float:
+    """Fast path for the hot two-key case used by variant selection and
+    ``PERCENTAGE_SPLIT`` conditions. Skips the iterator / list wrapping that
+    the generic helper performs on every call.
+
+    Returns the same value as
+    ``get_hashed_percentage_for_object_ids([first, second])``.
+    """
+    to_hash = f"{first},{second}"
+    hashed_value = hashlib.md5(to_hash.encode("utf-8"))
+    hashed_value_as_int = int(hashed_value.hexdigest(), base=16)
+    value = ((hashed_value_as_int % 9999) / 9998) * 100
+    if value == 100:
+        # Extremely unlikely; fall back to the generic recursion-capable path.
+        return get_hashed_percentage_for_object_ids([first, second], iterations=2)
+    return value
diff --git a/tests/engine_tests/test_engine.py b/tests/engine_tests/test_engine.py
@@ -6,7 +6,7 @@
 import pytest
 from _pytest.mark import ParameterSet
 
-from flag_engine.context.types import EvaluationContext
+from flag_engine.context.types import EvaluationContext, FeatureContext
 from flag_engine.engine import get_evaluation_result
 from flag_engine.result.types import EvaluationResult
 
@@ -40,11 +40,66 @@ def _extract_benchmark_contexts(
         yield pyjson5.loads((test_cases_dir_path / file_path).read_text())["context"]
 
 
+def _build_large_benchmark_context(
+    n_features: int = 262,
+    multivariate_features: int = 26,
+) -> EvaluationContext:
+    """Mirror the scenario from flagsmith-python-client issue #198: a real-world
+    local-evaluation environment with ~260 features, a handful of which use
+    multivariate splits, evaluated for a single identity. Small enough to
+    keep the benchmark fast but large enough to surface per-feature overhead.
+    """
+    features: dict[str, FeatureContext[typing.Any]] = {}
+    for i in range(n_features):
+        name = f"feature_{i:04d}"
+        fc: FeatureContext[typing.Any] = {
+            "key": str(i + 1),
+            "name": name,
+            "enabled": bool(i % 2),
+            "value": f"value-{i}",
+            "metadata": {"id": i + 1},
+        }
+        if i < multivariate_features:
+            # Intentionally reverse-ordered so ``sorted()`` has work to do.
+            fc["variants"] = [
+                {"value": f"mv-{i}-b", "weight": 40.0, "priority": 2},
+                {"value": f"mv-{i}-a", "weight": 60.0, "priority": 1},
+            ]
+        features[name] = fc
+    return {
+        "environment": {"key": "bench-env", "name": "bench"},
+        "features": features,
+        "segments": {
+            "1": {
+                "key": "1",
+                "name": "bench-segment",
+                "rules": [
+                    {
+                        "type": "ALL",
+                        "conditions": [
+                            {
+                                "property": "venue_id",
+                                "operator": "EQUAL",
+                                "value": "no-match",
+                            }
+                        ],
+                    }
+                ],
+            }
+        },
+        "identity": {
+            "identifier": "anonymous",
+            "traits": {"venue_id": "12345"},
+        },
+    }
+
+
 TEST_CASES = sorted(
     _extract_test_cases(TEST_CASES_PATH),
     key=lambda param: str(param.id),
 )
 BENCHMARK_CONTEXTS = list(_extract_benchmark_contexts(TEST_CASES_PATH))
+LARGE_BENCHMARK_CONTEXT = _build_large_benchmark_context()
 
 
 @pytest.mark.parametrize(
@@ -66,3 +121,8 @@ def test_engine(
 def test_engine_benchmark() -> None:
     for context in BENCHMARK_CONTEXTS:
         get_evaluation_result(context)
+
+
+@pytest.mark.benchmark
+def test_engine_benchmark_large_context() -> None:
+    get_evaluation_result(LARGE_BENCHMARK_CONTEXT)
diff --git a/tests/unit/segments/test_segments_evaluator.py b/tests/unit/segments/test_segments_evaluator.py
@@ -265,7 +265,7 @@ def test_context_in_segment_percentage_split(
     }
 
     mock_get_hashed_percentage = mocker.patch(
-        "flag_engine.segments.evaluator.get_hashed_percentage_for_object_ids"
+        "flag_engine.segments.evaluator.get_hashed_percentage_for_object_id_pair"
     )
     mock_get_hashed_percentage.return_value = identity_hashed_percentage
 
@@ -308,7 +308,7 @@ def test_context_in_segment_percentage_split__no_identity__returns_expected(
     }
 
     mock_get_hashed_percentage = mocker.patch(
-        "flag_engine.segments.evaluator.get_hashed_percentage_for_object_ids"
+        "flag_engine.segments.evaluator.get_hashed_percentage_for_object_id_pair"
     )
 
     # When
@@ -352,7 +352,7 @@ def test_context_in_segment_percentage_split__trait_value__calls_expected(
     }
 
     mock_get_hashed_percentage = mocker.patch(
-        "flag_engine.segments.evaluator.get_hashed_percentage_for_object_ids"
+        "flag_engine.segments.evaluator.get_hashed_percentage_for_object_id_pair"
     )
     mock_get_hashed_percentage.return_value = 1
 
@@ -361,7 +361,7 @@ def test_context_in_segment_percentage_split__trait_value__calls_expected(
 
     # Then
     mock_get_hashed_percentage.assert_called_once_with(
-        [segment_context["key"], "custom_value"]
+        segment_context["key"], "custom_value"
     )
     assert result
 
@@ -841,7 +841,7 @@ def test_get_flag_result_from_context__calls_returns_expected(
     # we mock the function which gets the percentage value for an identity to
     # return a deterministic value so we know which value to expect
     get_hashed_percentage_for_object_ids_mock = mocker.patch(
-        "flag_engine.segments.evaluator.get_hashed_percentage_for_object_ids",
+        "flag_engine.segments.evaluator.get_hashed_percentage_for_object_id_pair",
     )
     get_hashed_percentage_for_object_ids_mock.return_value = percentage_value
 
@@ -870,10 +870,8 @@ def test_get_flag_result_from_context__calls_returns_expected(
 
     # the function is called with the expected key
     get_hashed_percentage_for_object_ids_mock.assert_called_once_with(
-        [
-            expected_feature_context_key,
-            expected_key,
-        ]
+        expected_feature_context_key,
+        expected_key,
     )
 
 
@@ -885,7 +883,7 @@ def test_get_flag_result_from_feature_context__null_key__calls_returns_expected(
     expected_feature_context_key = "2"
 
     get_hashed_percentage_for_object_ids_mock = mocker.patch(
-        "flag_engine.segments.evaluator.get_hashed_percentage_for_object_ids",
+        "flag_engine.segments.evaluator.get_hashed_percentage_for_object_id_pair",
     )
 
     feature_context: FeatureContext = {

Original file line number	Diff line number	Diff line change
`@@ -265,7 +265,7 @@ def test_context_in_segment_percentage_split(`
`265`	`265`	`}`
`266`	`266`
`267`	`267`	`mock_get_hashed_percentage = mocker.patch(`
`268`		`- "flag_engine.segments.evaluator.get_hashed_percentage_for_object_ids"`
	`268`	`+ "flag_engine.segments.evaluator.get_hashed_percentage_for_object_id_pair"`
`269`	`269`	`)`
`270`	`270`	`mock_get_hashed_percentage.return_value = identity_hashed_percentage`
`271`	`271`
`@@ -308,7 +308,7 @@ def test_context_in_segment_percentage_split__no_identity__returns_expected(`
`308`	`308`	`}`
`309`	`309`
`310`	`310`	`mock_get_hashed_percentage = mocker.patch(`
`311`		`- "flag_engine.segments.evaluator.get_hashed_percentage_for_object_ids"`
	`311`	`+ "flag_engine.segments.evaluator.get_hashed_percentage_for_object_id_pair"`
`312`	`312`	`)`
`313`	`313`
`314`	`314`	`# When`
`@@ -352,7 +352,7 @@ def test_context_in_segment_percentage_split__trait_value__calls_expected(`
`352`	`352`	`}`
`353`	`353`
`354`	`354`	`mock_get_hashed_percentage = mocker.patch(`
`355`		`- "flag_engine.segments.evaluator.get_hashed_percentage_for_object_ids"`
	`355`	`+ "flag_engine.segments.evaluator.get_hashed_percentage_for_object_id_pair"`
`356`	`356`	`)`
`357`	`357`	`mock_get_hashed_percentage.return_value = 1`
`358`	`358`
`@@ -361,7 +361,7 @@ def test_context_in_segment_percentage_split__trait_value__calls_expected(`
`361`	`361`
`362`	`362`	`# Then`
`363`	`363`	`mock_get_hashed_percentage.assert_called_once_with(`
`364`		`- [segment_context["key"], "custom_value"]`
	`364`	`+ segment_context["key"], "custom_value"`
`365`	`365`	`)`
`366`	`366`	`assert result`
`367`	`367`
`@@ -841,7 +841,7 @@ def test_get_flag_result_from_context__calls_returns_expected(`
`841`	`841`	`# we mock the function which gets the percentage value for an identity to`
`842`	`842`	`# return a deterministic value so we know which value to expect`
`843`	`843`	`get_hashed_percentage_for_object_ids_mock = mocker.patch(`
`844`		`- "flag_engine.segments.evaluator.get_hashed_percentage_for_object_ids",`
	`844`	`+ "flag_engine.segments.evaluator.get_hashed_percentage_for_object_id_pair",`
`845`	`845`	`)`
`846`	`846`	`get_hashed_percentage_for_object_ids_mock.return_value = percentage_value`
`847`	`847`
`@@ -870,10 +870,8 @@ def test_get_flag_result_from_context__calls_returns_expected(`
`870`	`870`
`871`	`871`	`# the function is called with the expected key`
`872`	`872`	`get_hashed_percentage_for_object_ids_mock.assert_called_once_with(`
`873`		`- [`
`874`		`- expected_feature_context_key,`
`875`		`- expected_key,`
`876`		`- ]`
	`873`	`+ expected_feature_context_key,`
	`874`	`+ expected_key,`
`877`	`875`	`)`
`878`	`876`
`879`	`877`
`@@ -885,7 +883,7 @@ def test_get_flag_result_from_feature_context__null_key__calls_returns_expected(`
`885`	`883`	`expected_feature_context_key = "2"`
`886`	`884`
`887`	`885`	`get_hashed_percentage_for_object_ids_mock = mocker.patch(`
`888`		`- "flag_engine.segments.evaluator.get_hashed_percentage_for_object_ids",`
	`886`	`+ "flag_engine.segments.evaluator.get_hashed_percentage_for_object_id_pair",`
`889`	`887`	`)`
`890`	`888`
`891`	`889`	`feature_context: FeatureContext = {`