Skip to content

Commit 3e059c9

Browse files
aepfliclaude
andauthored
bench(python): add pytest-benchmark suite with evaluation and operator benchmarks (#65)
## Summary - Replace manual `time.time()` benchmark with proper pytest-benchmark suite - Add 19 benchmark scenarios covering evaluation, custom operators, state management, and concurrency - Add optional comparison against pure-Python JSON Logic library - Add `pytest-benchmark>=4.0` to dev dependencies ## Benchmark Scenarios | Category | Benchmarks | What they measure | |----------|-----------|-----------------| | **Evaluation** | bool simple, targeting match/no-match, string, int, float, object, large context | Flag evaluation across types and context sizes | | **Custom Operators** | fractional, semver, starts_with, ends_with | Individual operator performance | | **State Management** | 5/50/200 flags, no-change re-apply | `update_state()` scaling | | **Concurrent** | 4-thread evaluation | Thread safety and contention | | **Comparison** | vs pure-Python json-logic (optional) | Native PyO3 vs alternative | ## How to run ```bash cd python uv sync --group dev maturin develop pytest benchmarks/ --benchmark-only --benchmark-disable-gc -v ``` ## Initial Results - Boolean simple: ~714ns/call (~1.4M ops/sec) - Targeting match: ~1.7us/call (~594K ops/sec) - Custom operators: ~1.3-4.6us/call - State updates: ~24us (5 flags) to ~1ms (200 flags) ## Test plan - [x] All 18 benchmarks pass (1 skipped - optional comparison library) - [x] `maturin develop` + `pytest benchmarks/` succeeds - [ ] Verify results are stable across runs Closes #62 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 8a44c79 commit 3e059c9

6 files changed

Lines changed: 676 additions & 87 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
/target/
33
/dist/
44
/python/tests/__pycache__
5+
/python/benchmarks/__pycache__
56

67
# Cargo.lock is tracked to ensure reproducible WASM builds.
78
# The WASM binary's import names include hashes that must match Java host functions.

python/benchmarks/bench_vs_wasm.py

Lines changed: 0 additions & 87 deletions
This file was deleted.

python/benchmarks/conftest.py

Lines changed: 218 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,218 @@
1+
"""Shared fixtures for flagd-evaluator benchmarks."""
2+
3+
import pytest
4+
from flagd_evaluator import FlagEvaluator
5+
6+
7+
def _build_flag_config():
8+
"""Build a rich flag configuration for benchmarks."""
9+
return {
10+
"flags": {
11+
# Simple boolean flag (no targeting)
12+
"simple-bool": {
13+
"state": "ENABLED",
14+
"variants": {"on": True, "off": False},
15+
"defaultVariant": "on",
16+
},
17+
# Boolean flag with targeting rule that matches
18+
"targeted-bool": {
19+
"state": "ENABLED",
20+
"variants": {"on": True, "off": False},
21+
"defaultVariant": "off",
22+
"targeting": {
23+
"if": [
24+
{"==": [{"var": "tier"}, "premium"]},
25+
"on",
26+
"off",
27+
]
28+
},
29+
},
30+
# String flag
31+
"string-flag": {
32+
"state": "ENABLED",
33+
"variants": {
34+
"banner-a": "Welcome to our new experience!",
35+
"banner-b": "Check out our latest features!",
36+
"default-banner": "Welcome!",
37+
},
38+
"defaultVariant": "default-banner",
39+
"targeting": {
40+
"if": [
41+
{"==": [{"var": "segment"}, "beta"]},
42+
"banner-a",
43+
{"if": [
44+
{"==": [{"var": "segment"}, "internal"]},
45+
"banner-b",
46+
"default-banner",
47+
]},
48+
]
49+
},
50+
},
51+
# Integer flag
52+
"int-flag": {
53+
"state": "ENABLED",
54+
"variants": {"low": 10, "medium": 50, "high": 100},
55+
"defaultVariant": "medium",
56+
},
57+
# Float flag
58+
"float-flag": {
59+
"state": "ENABLED",
60+
"variants": {"conservative": 0.1, "moderate": 0.5, "aggressive": 0.9},
61+
"defaultVariant": "moderate",
62+
},
63+
# Object flag
64+
"object-flag": {
65+
"state": "ENABLED",
66+
"variants": {
67+
"config-a": {
68+
"color": "blue",
69+
"size": "large",
70+
"features": ["search", "export"],
71+
},
72+
"config-b": {
73+
"color": "green",
74+
"size": "medium",
75+
"features": ["search"],
76+
},
77+
},
78+
"defaultVariant": "config-a",
79+
},
80+
# Disabled flag
81+
"disabled-flag": {
82+
"state": "DISABLED",
83+
"variants": {"on": True, "off": False},
84+
"defaultVariant": "on",
85+
},
86+
# Fractional bucketing flag
87+
"fractional-flag": {
88+
"state": "ENABLED",
89+
"variants": {
90+
"control": "control-experience",
91+
"treatment-a": "treatment-a-experience",
92+
"treatment-b": "treatment-b-experience",
93+
},
94+
"defaultVariant": "control",
95+
"targeting": {
96+
"fractional": [
97+
{"var": "targetingKey"},
98+
["control", 50],
99+
["treatment-a", 25],
100+
["treatment-b", 25],
101+
]
102+
},
103+
},
104+
# Semver flag
105+
"semver-flag": {
106+
"state": "ENABLED",
107+
"variants": {"new-ui": True, "old-ui": False},
108+
"defaultVariant": "old-ui",
109+
"targeting": {
110+
"if": [
111+
{"sem_ver": [{"var": "appVersion"}, ">=", "2.0.0"]},
112+
"new-ui",
113+
"old-ui",
114+
]
115+
},
116+
},
117+
# starts_with flag
118+
"starts-with-flag": {
119+
"state": "ENABLED",
120+
"variants": {"internal": "internal-access", "external": "external-access"},
121+
"defaultVariant": "external",
122+
"targeting": {
123+
"if": [
124+
{"starts_with": [{"var": "email"}, "admin@"]},
125+
"internal",
126+
"external",
127+
]
128+
},
129+
},
130+
# ends_with flag
131+
"ends-with-flag": {
132+
"state": "ENABLED",
133+
"variants": {"corp": "corporate-plan", "personal": "personal-plan"},
134+
"defaultVariant": "personal",
135+
"targeting": {
136+
"if": [
137+
{"ends_with": [{"var": "email"}, "@corp.example.com"]},
138+
"corp",
139+
"personal",
140+
]
141+
},
142+
},
143+
# Complex targeting flag (nested if/and/or)
144+
"complex-targeting": {
145+
"state": "ENABLED",
146+
"defaultVariant": "basic",
147+
"variants": {
148+
"premium": "premium-tier",
149+
"standard": "standard-tier",
150+
"basic": "basic-tier",
151+
},
152+
"targeting": {
153+
"if": [
154+
{"and": [
155+
{"==": [{"var": "tier"}, "premium"]},
156+
{">": [{"var": "score"}, 90]},
157+
]},
158+
"premium",
159+
{"if": [
160+
{"or": [
161+
{"==": [{"var": "tier"}, "standard"]},
162+
{">": [{"var": "score"}, 50]},
163+
]},
164+
"standard",
165+
"basic",
166+
]},
167+
]
168+
},
169+
},
170+
}
171+
}
172+
173+
174+
@pytest.fixture
175+
def flag_config():
176+
"""Raw flag configuration dict for state-update benchmarks."""
177+
return _build_flag_config()
178+
179+
180+
@pytest.fixture
181+
def evaluator():
182+
"""FlagEvaluator preloaded with a rich flag configuration."""
183+
ev = FlagEvaluator()
184+
ev.update_state(_build_flag_config())
185+
return ev
186+
187+
188+
@pytest.fixture
189+
def small_context():
190+
"""Evaluation context with 5 attributes."""
191+
return {
192+
"targetingKey": "user-123",
193+
"tier": "premium",
194+
"role": "admin",
195+
"region": "us-east",
196+
"score": 85,
197+
}
198+
199+
200+
@pytest.fixture
201+
def large_context():
202+
"""Evaluation context with 100+ attributes."""
203+
ctx = {
204+
"targetingKey": "user-bench-12345",
205+
"tier": "premium",
206+
"segment": "beta",
207+
"email": "admin@corp.example.com",
208+
"appVersion": "2.5.1",
209+
"role": "admin",
210+
"country": "US",
211+
"locale": "en-US",
212+
"platform": "linux",
213+
"deviceType": "desktop",
214+
}
215+
# Add 100 additional attributes
216+
for i in range(100):
217+
ctx[f"attr_{i}"] = f"value_{i}"
218+
return ctx

0 commit comments

Comments
 (0)