Skip to content

Commit a433395

Browse files
feat(rate-limiter): add Rust-backed engine, check() API, benchmarks, and validation
- Rust-backed sliding window engine with pyo3-log integration - check() API with tenant propagation, sweep/retry-after support - Eliminate redundant ZRANGE in sliding window Lua script - Fix detect-secrets baseline for rate limiter load tests - Clarify memory backend is single-instance only in docs Signed-off-by: Pratik Gandhi <gandhipratik203@gmail.com>
1 parent 9d074d5 commit a433395

29 files changed

Lines changed: 8750 additions & 123 deletions

.dockerignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,7 @@ docs/build/
303303
# PyBuilder
304304
target/
305305
**/target/
306+
**/target/**
306307

307308
# Jupyter Notebook
308309
.ipynb_checkpoints

.secrets.baseline

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"files": "package-lock.json|Cargo.lock|^.secrets.baseline$|scripts/sign_image.sh|scripts/zap|sonar-project.properties|^/Users/brian/dev/github.ibm.com/contextforge-org/sps-pipeline-config/.secrets.baseline$|^./.secrets.baseline$",
44
"lines": null
55
},
6-
"generated_at": "2026-03-27T22:09:20Z",
6+
"generated_at": "2026-03-28T07:44:11Z",
77
"plugins_used": [
88
{
99
"name": "AWSKeyDetector"
@@ -376,39 +376,39 @@
376376
"hashed_secret": "d3ac7a4ef1a838b4134f2f6e7f3c0d249d74b674",
377377
"is_secret": false,
378378
"is_verified": false,
379-
"line_number": 5781,
379+
"line_number": 5864,
380380
"type": "Secret Keyword",
381381
"verified_result": null
382382
},
383383
{
384384
"hashed_secret": "5932862bcd24dd27d0dc0407ec94fe9d6ea24aeb",
385385
"is_secret": false,
386386
"is_verified": false,
387-
"line_number": 6278,
387+
"line_number": 6361,
388388
"type": "Secret Keyword",
389389
"verified_result": null
390390
},
391391
{
392392
"hashed_secret": "c77c805e32f173e4321ee9187de9c29cb3804513",
393393
"is_secret": false,
394394
"is_verified": false,
395-
"line_number": 6290,
395+
"line_number": 6373,
396396
"type": "Secret Keyword",
397397
"verified_result": null
398398
},
399399
{
400400
"hashed_secret": "8fe3df8a68ddd0d4ab2214186cbb8e38ccd0e06a",
401401
"is_secret": false,
402402
"is_verified": false,
403-
"line_number": 6362,
403+
"line_number": 6445,
404404
"type": "Secret Keyword",
405405
"verified_result": null
406406
},
407407
{
408408
"hashed_secret": "93ac8946882128457cd9e283b30ca851945e6690",
409409
"is_secret": false,
410410
"is_verified": false,
411-
"line_number": 7464,
411+
"line_number": 7547,
412412
"type": "Secret Keyword",
413413
"verified_result": null
414414
}
@@ -10567,26 +10567,26 @@
1056710567
"verified_result": null
1056810568
},
1056910569
{
10570-
"hashed_secret": "79bead8e6d65862a00cffaa12ccde1189ec34d29",
10570+
"hashed_secret": "dfd99b5f25f839608a3c275c0f8ceb363f8f0bc0",
1057110571
"is_secret": false,
1057210572
"is_verified": false,
10573-
"line_number": 2953,
10573+
"line_number": 3514,
1057410574
"type": "Secret Keyword",
1057510575
"verified_result": null
1057610576
},
1057710577
{
10578-
"hashed_secret": "dfd99b5f25f839608a3c275c0f8ceb363f8f0bc0",
10578+
"hashed_secret": "5038e18712161fca54e52805726d3c70b296eff6",
1057910579
"is_secret": false,
1058010580
"is_verified": false,
10581-
"line_number": 3514,
10581+
"line_number": 3623,
1058210582
"type": "Secret Keyword",
1058310583
"verified_result": null
1058410584
},
1058510585
{
10586-
"hashed_secret": "5038e18712161fca54e52805726d3c70b296eff6",
10586+
"hashed_secret": "79bead8e6d65862a00cffaa12ccde1189ec34d29",
1058710587
"is_secret": false,
1058810588
"is_verified": false,
10589-
"line_number": 3623,
10589+
"line_number": 3822,
1059010590
"type": "Secret Keyword",
1059110591
"verified_result": null
1059210592
}

Makefile

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2324,9 +2324,13 @@ load-test-agentgateway-mcp-server-time: ## Load test external MCP server (loc
23242324
MCP_PROTOCOL_LOCUSTFILE ?= tests/loadtest/locustfile_mcp_protocol.py
23252325
MCP_RATE_LIMITER_LOCUSTFILE ?= tests/loadtest/locustfile_rate_limiter_backend_correctness.py
23262326
MCP_RATE_LIMITER_SCALE_LOCUSTFILE ?= tests/loadtest/locustfile_rate_limiter_scale.py
2327+
MCP_RATE_LIMITER_REDIS_CAPACITY_LOCUSTFILE ?= tests/loadtest/locustfile_rate_limiter_redis_capacity.py
23272328
RL_ALGORITHM ?= fixed_window
23282329
RL_USERS ?= 100
23292330
RL_SPAWN_RATE ?= 10
2331+
RL_REQS_PER_SECOND ?= 0.25
2332+
RL_PROMPT_ID ?=
2333+
RATE_LIMITER_FORCE_PYTHON ?=
23302334
MCP_PROTOCOL_HOST ?= http://localhost:4444
23312335
MCP_BENCHMARK_HOST ?= http://localhost:8080
23322336
MCP_BENCHMARK_SERVER_ID ?= 9779b6698cbd4b4995ee04a4fab38737
@@ -2447,7 +2451,7 @@ benchmark-rate-limiter: ## Rate limiter correctness test (1
24472451
# help: benchmark-rate-limiter-scale - Multi-user scale test showing Redis memory divergence across algorithms
24482452
.PHONY: benchmark-rate-limiter-scale
24492453
RL_RUN_TIME ?= 300s
2450-
benchmark-rate-limiter-scale: ## Scale test: 500 unique users, Redis memory timeline per algorithm
2454+
benchmark-rate-limiter-scale: ## Scale test: RL_USERS unique users (default 100), Redis memory timeline per algorithm
24512455
@echo "📈 Running rate limiter scale test (resource divergence)..."
24522456
@echo " Algorithm: $(RL_ALGORITHM) (must match plugins/config.yaml)"
24532457
@echo " Users: $(RL_USERS) unique identities (each creates own Redis key)"
@@ -2477,6 +2481,47 @@ benchmark-rate-limiter-scale: ## Scale test: 500 unique users, Red
24772481
--only-summary \
24782482
ScaleComparisonUser || true'
24792483

2484+
2485+
# help: benchmark-rate-limiter-redis-capacity - Multi-instance prompt-path concurrency benchmark for Redis rate limiting
2486+
.PHONY: benchmark-rate-limiter-redis-capacity
2487+
benchmark-rate-limiter-redis-capacity: ## Capacity test: 3 gateways + Redis on prompt_pre_fetch path
2488+
@echo "🚀 Running rate limiter Redis capacity test..."
2489+
@echo " Host: $(MCP_BENCHMARK_HOST)"
2490+
@echo " Topology: nginx -> 3 gateways -> shared Redis"
2491+
@echo " Path: REST /prompts/{id} (prompt_pre_fetch)"
2492+
@echo " Users: $(RL_USERS)"
2493+
@echo " Spawn rate: $(RL_SPAWN_RATE)/s"
2494+
@echo " Pace: $(RL_REQS_PER_SECOND) req/s per user"
2495+
@echo " Duration: $(RL_RUN_TIME)"
2496+
@test -d "$(VENV_DIR)" || $(MAKE) venv
2497+
@/bin/bash -eu -o pipefail -c 'source $(VENV_DIR)/bin/activate && \
2498+
LOCUST_LOG_LEVEL=ERROR \
2499+
RATE_LIMITER_FORCE_PYTHON=$(RATE_LIMITER_FORCE_PYTHON) \
2500+
RL_USERS=$(RL_USERS) \
2501+
RL_SPAWN_RATE=$(RL_SPAWN_RATE) \
2502+
RL_RUN_TIME=$(RL_RUN_TIME) \
2503+
RL_REQS_PER_SECOND=$(RL_REQS_PER_SECOND) \
2504+
RL_LIMIT_PER_MIN=$(RL_LIMIT_PER_MIN) \
2505+
RL_PROMPT_ID=$(RL_PROMPT_ID) \
2506+
locust -f $(MCP_RATE_LIMITER_REDIS_CAPACITY_LOCUSTFILE) \
2507+
--host=$(MCP_BENCHMARK_HOST) \
2508+
--users=$(RL_USERS) \
2509+
--spawn-rate=$(RL_SPAWN_RATE) \
2510+
--run-time=$(RL_RUN_TIME) \
2511+
--headless \
2512+
--only-summary \
2513+
CapacityPromptUser || true'
2514+
2515+
# help: benchmark-rate-limiter-capacity-rust - Capacity test with Rust engine enabled (default)
2516+
.PHONY: benchmark-rate-limiter-capacity-rust
2517+
benchmark-rate-limiter-capacity-rust: ## Capacity test with Rust engine
2518+
RATE_LIMITER_FORCE_PYTHON=0 $(MAKE) benchmark-rate-limiter-redis-capacity
2519+
2520+
# help: benchmark-rate-limiter-capacity-python - Capacity test with Python fallback (forced)
2521+
.PHONY: benchmark-rate-limiter-capacity-python
2522+
benchmark-rate-limiter-capacity-python: ## Capacity test with Python fallback
2523+
RATE_LIMITER_FORCE_PYTHON=1 $(MAKE) benchmark-rate-limiter-redis-capacity
2524+
24802525
.PHONY: benchmark-mcp-mixed-300
24812526
benchmark-mcp-mixed-300: ## Distributed 300-user mixed MCP benchmark
24822527
@echo "📊 Running distributed mixed MCP benchmark..."

mcpgateway/auth.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1058,6 +1058,7 @@ async def _set_auth_method_from_payload(payload: dict) -> None:
10581058
if request and global_context:
10591059
request.state.plugin_global_context = global_context
10601060

1061+
_propagate_tenant_id(request)
10611062
if plugin_manager and plugin_manager.config.plugin_settings.include_user_info:
10621063
_inject_userinfo_instate(request, user)
10631064

@@ -1184,6 +1185,7 @@ async def _set_auth_method_from_payload(payload: dict) -> None:
11841185
headers={"WWW-Authenticate": "Bearer"},
11851186
)
11861187

1188+
_propagate_tenant_id(request)
11871189
if plugin_manager and plugin_manager.config.plugin_settings.include_user_info:
11881190
_inject_userinfo_instate(request, _user_from_cached_dict(cached_ctx.user))
11891191

@@ -1315,6 +1317,7 @@ async def _set_auth_method_from_payload(payload: dict) -> None:
13151317
headers={"WWW-Authenticate": "Bearer"},
13161318
)
13171319

1320+
_propagate_tenant_id(request)
13181321
if plugin_manager and plugin_manager.config.plugin_settings.include_user_info:
13191322
_inject_userinfo_instate(request, _batched_user)
13201323

@@ -1490,12 +1493,32 @@ async def _set_auth_method_from_payload(payload: dict) -> None:
14901493
headers={"WWW-Authenticate": "Bearer"},
14911494
)
14921495

1496+
_propagate_tenant_id(request)
14931497
if plugin_manager and plugin_manager.config.plugin_settings.include_user_info:
14941498
_inject_userinfo_instate(request, user)
14951499

14961500
return user
14971501

14981502

1503+
def _propagate_tenant_id(request: Optional[object] = None) -> None:
1504+
"""Propagate request.state.team_id into GlobalContext.tenant_id for rate limiting.
1505+
1506+
Called unconditionally at every return path in get_current_user() — unlike
1507+
_inject_userinfo_instate() which is gated by include_user_info. This
1508+
ensures by_tenant rate limiting works even when include_user_info is False
1509+
(the default) and the middleware has already created plugin_global_context.
1510+
1511+
Only writes when tenant_id is still None (no overwrite of plugin-set values).
1512+
"""
1513+
if not request:
1514+
return
1515+
global_context = getattr(getattr(request, "state", None), "plugin_global_context", None)
1516+
if global_context and global_context.tenant_id is None:
1517+
team_id = getattr(getattr(request, "state", None), "team_id", None)
1518+
if team_id:
1519+
global_context.tenant_id = team_id
1520+
1521+
14991522
def _inject_userinfo_instate(request: Optional[object] = None, user: Optional[EmailUser] = None) -> None:
15001523
"""This function injects user related information into the plugin_global_context, if the config has
15011524
include_user_info key set as true.

plugins/config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ plugins:
214214
author: "Mihai Criveti"
215215
hooks: ["prompt_pre_fetch", "tool_pre_invoke"]
216216
tags: ["limits", "throttle"]
217-
mode: "permissive"
217+
mode: "enforce"
218218
priority: 20
219219
conditions: []
220220
config:

plugins/rate_limiter/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ Each identity (user, tenant, tool) has a bucket that holds up to `count` tokens.
9999

100100
## Backends
101101

102-
### Memory backend (default)
102+
### Memory backend (default, single-instance only)
103103

104104
- Counters are stored in a process-local dict (`_store`)
105105
- An `asyncio.Lock` serialises all counter reads and writes — safe under concurrent asyncio tasks
@@ -116,7 +116,7 @@ Each identity (user, tenant, tool) has a bucket that holds up to `count` tokens.
116116
- If `redis_fallback: true` (default) and Redis is unavailable, the plugin falls back to the in-process `MemoryBackend` automatically — requests are never blocked due to Redis downtime
117117
- If `redis_fallback: false` and Redis is unavailable, the exception is caught and the request is allowed through (fail-open)
118118

119-
**Multi-instance deployment:** use `backend: redis`. The Redis service is already included in the default Docker Compose stack at `redis://redis:6379/0`.
119+
**Multi-instance deployment (important):** The `memory` backend is local to a single gateway instance — rate limit counters are not shared across replicas. For multi-instance deployments (e.g., behind nginx or on OpenShift with multiple gateway pods), always use `backend: redis` to ensure rate limits are enforced correctly across all instances. The default production configuration (`plugins/config.yaml`) already sets `backend: redis`.
120120

121121
## Examples
122122

0 commit comments

Comments
 (0)