IBM
diff --git a/‎.dockerignore‎
Lines changed: 1 addition & 0 deletions b/‎.dockerignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.secrets.baseline‎
Lines changed: 12 additions & 12 deletions b/‎.secrets.baseline‎
Lines changed: 12 additions & 12 deletions
diff --git a/‎Makefile‎
Lines changed: 46 additions & 1 deletion b/‎Makefile‎
Lines changed: 46 additions & 1 deletion
diff --git a/‎mcpgateway/auth.py‎
Lines changed: 23 additions & 0 deletions b/‎mcpgateway/auth.py‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎plugins/config.yaml‎
Lines changed: 1 addition & 1 deletion b/‎plugins/config.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎plugins/rate_limiter/README.md‎
Lines changed: 2 additions & 2 deletions b/‎plugins/rate_limiter/README.md‎
Lines changed: 2 additions & 2 deletions
@@ -303,6 +303,7 @@ docs/build/
 # PyBuilder
 target/
 **/target/
+**/target/**
 
 # Jupyter Notebook
 .ipynb_checkpoints
 
@@ -3,7 +3,7 @@
     "files": "package-lock.json|Cargo.lock|^.secrets.baseline$|scripts/sign_image.sh|scripts/zap|sonar-project.properties|^/Users/brian/dev/github.ibm.com/contextforge-org/sps-pipeline-config/.secrets.baseline$|^./.secrets.baseline$",
     "lines": null
   },
-  "generated_at": "2026-03-27T22:09:20Z",
+  "generated_at": "2026-03-28T07:44:11Z",
   "plugins_used": [
     {
       "name": "AWSKeyDetector"
@@ -376,39 +376,39 @@
         "hashed_secret": "d3ac7a4ef1a838b4134f2f6e7f3c0d249d74b674",
         "is_secret": false,
         "is_verified": false,
-        "line_number": 5781,
+        "line_number": 5864,
         "type": "Secret Keyword",
         "verified_result": null
       },
       {
         "hashed_secret": "5932862bcd24dd27d0dc0407ec94fe9d6ea24aeb",
         "is_secret": false,
         "is_verified": false,
-        "line_number": 6278,
+        "line_number": 6361,
         "type": "Secret Keyword",
         "verified_result": null
       },
       {
         "hashed_secret": "c77c805e32f173e4321ee9187de9c29cb3804513",
         "is_secret": false,
         "is_verified": false,
-        "line_number": 6290,
+        "line_number": 6373,
         "type": "Secret Keyword",
         "verified_result": null
       },
       {
         "hashed_secret": "8fe3df8a68ddd0d4ab2214186cbb8e38ccd0e06a",
         "is_secret": false,
         "is_verified": false,
-        "line_number": 6362,
+        "line_number": 6445,
         "type": "Secret Keyword",
         "verified_result": null
       },
       {
         "hashed_secret": "93ac8946882128457cd9e283b30ca851945e6690",
         "is_secret": false,
         "is_verified": false,
-        "line_number": 7464,
+        "line_number": 7547,
         "type": "Secret Keyword",
         "verified_result": null
       }
@@ -10567,26 +10567,26 @@
         "verified_result": null
       },
       {
-        "hashed_secret": "79bead8e6d65862a00cffaa12ccde1189ec34d29",
+        "hashed_secret": "dfd99b5f25f839608a3c275c0f8ceb363f8f0bc0",
         "is_secret": false,
         "is_verified": false,
-        "line_number": 2953,
+        "line_number": 3514,
         "type": "Secret Keyword",
         "verified_result": null
       },
       {
-        "hashed_secret": "dfd99b5f25f839608a3c275c0f8ceb363f8f0bc0",
+        "hashed_secret": "5038e18712161fca54e52805726d3c70b296eff6",
         "is_secret": false,
         "is_verified": false,
-        "line_number": 3514,
+        "line_number": 3623,
         "type": "Secret Keyword",
         "verified_result": null
       },
       {
-        "hashed_secret": "5038e18712161fca54e52805726d3c70b296eff6",
+        "hashed_secret": "79bead8e6d65862a00cffaa12ccde1189ec34d29",
         "is_secret": false,
         "is_verified": false,
-        "line_number": 3623,
+        "line_number": 3822,
         "type": "Secret Keyword",
         "verified_result": null
       }
 
@@ -2324,9 +2324,13 @@ load-test-agentgateway-mcp-server-time:    ## Load test external MCP server (loc
 MCP_PROTOCOL_LOCUSTFILE ?= tests/loadtest/locustfile_mcp_protocol.py
 MCP_RATE_LIMITER_LOCUSTFILE ?= tests/loadtest/locustfile_rate_limiter_backend_correctness.py
 MCP_RATE_LIMITER_SCALE_LOCUSTFILE ?= tests/loadtest/locustfile_rate_limiter_scale.py
+MCP_RATE_LIMITER_REDIS_CAPACITY_LOCUSTFILE ?= tests/loadtest/locustfile_rate_limiter_redis_capacity.py
 RL_ALGORITHM ?= fixed_window
 RL_USERS ?= 100
 RL_SPAWN_RATE ?= 10
+RL_REQS_PER_SECOND ?= 0.25
+RL_PROMPT_ID ?=
+RATE_LIMITER_FORCE_PYTHON ?=
 MCP_PROTOCOL_HOST ?= http://localhost:4444
 MCP_BENCHMARK_HOST ?= http://localhost:8080
 MCP_BENCHMARK_SERVER_ID ?= 9779b6698cbd4b4995ee04a4fab38737
@@ -2447,7 +2451,7 @@ benchmark-rate-limiter:                     ## Rate limiter correctness test (1
 # help: benchmark-rate-limiter-scale  - Multi-user scale test showing Redis memory divergence across algorithms
 .PHONY: benchmark-rate-limiter-scale
 RL_RUN_TIME ?= 300s
-benchmark-rate-limiter-scale:               ## Scale test: 500 unique users, Redis memory timeline per algorithm
+benchmark-rate-limiter-scale:               ## Scale test: RL_USERS unique users (default 100), Redis memory timeline per algorithm
 	@echo "📈 Running rate limiter scale test (resource divergence)..."
 	@echo "   Algorithm: $(RL_ALGORITHM)  (must match plugins/config.yaml)"
 	@echo "   Users:     $(RL_USERS) unique identities  (each creates own Redis key)"
@@ -2477,6 +2481,47 @@ benchmark-rate-limiter-scale:               ## Scale test: 500 unique users, Red
 			--only-summary \
 			ScaleComparisonUser || true'
 
+
+# help: benchmark-rate-limiter-redis-capacity  - Multi-instance prompt-path concurrency benchmark for Redis rate limiting
+.PHONY: benchmark-rate-limiter-redis-capacity
+benchmark-rate-limiter-redis-capacity:      ## Capacity test: 3 gateways + Redis on prompt_pre_fetch path
+	@echo "🚀 Running rate limiter Redis capacity test..."
+	@echo "   Host:        $(MCP_BENCHMARK_HOST)"
+	@echo "   Topology:    nginx -> 3 gateways -> shared Redis"
+	@echo "   Path:        REST /prompts/{id} (prompt_pre_fetch)"
+	@echo "   Users:       $(RL_USERS)"
+	@echo "   Spawn rate:  $(RL_SPAWN_RATE)/s"
+	@echo "   Pace:        $(RL_REQS_PER_SECOND) req/s per user"
+	@echo "   Duration:    $(RL_RUN_TIME)"
+	@test -d "$(VENV_DIR)" || $(MAKE) venv
+	@/bin/bash -eu -o pipefail -c 'source $(VENV_DIR)/bin/activate && \
+		LOCUST_LOG_LEVEL=ERROR \
+		RATE_LIMITER_FORCE_PYTHON=$(RATE_LIMITER_FORCE_PYTHON) \
+		RL_USERS=$(RL_USERS) \
+		RL_SPAWN_RATE=$(RL_SPAWN_RATE) \
+		RL_RUN_TIME=$(RL_RUN_TIME) \
+		RL_REQS_PER_SECOND=$(RL_REQS_PER_SECOND) \
+		RL_LIMIT_PER_MIN=$(RL_LIMIT_PER_MIN) \
+		RL_PROMPT_ID=$(RL_PROMPT_ID) \
+		locust -f $(MCP_RATE_LIMITER_REDIS_CAPACITY_LOCUSTFILE) \
+			--host=$(MCP_BENCHMARK_HOST) \
+			--users=$(RL_USERS) \
+			--spawn-rate=$(RL_SPAWN_RATE) \
+			--run-time=$(RL_RUN_TIME) \
+			--headless \
+			--only-summary \
+			CapacityPromptUser || true'
+
+# help: benchmark-rate-limiter-capacity-rust  - Capacity test with Rust engine enabled (default)
+.PHONY: benchmark-rate-limiter-capacity-rust
+benchmark-rate-limiter-capacity-rust:       ## Capacity test with Rust engine
+	RATE_LIMITER_FORCE_PYTHON=0 $(MAKE) benchmark-rate-limiter-redis-capacity
+
+# help: benchmark-rate-limiter-capacity-python  - Capacity test with Python fallback (forced)
+.PHONY: benchmark-rate-limiter-capacity-python
+benchmark-rate-limiter-capacity-python:     ## Capacity test with Python fallback
+	RATE_LIMITER_FORCE_PYTHON=1 $(MAKE) benchmark-rate-limiter-redis-capacity
+
 .PHONY: benchmark-mcp-mixed-300
 benchmark-mcp-mixed-300:                    ## Distributed 300-user mixed MCP benchmark
 	@echo "📊 Running distributed mixed MCP benchmark..."
 
@@ -1058,6 +1058,7 @@ async def _set_auth_method_from_payload(payload: dict) -> None:
                 if request and global_context:
                     request.state.plugin_global_context = global_context
 
+                _propagate_tenant_id(request)
                 if plugin_manager and plugin_manager.config.plugin_settings.include_user_info:
                     _inject_userinfo_instate(request, user)
 
@@ -1184,6 +1185,7 @@ async def _set_auth_method_from_payload(payload: dict) -> None:
                                     headers={"WWW-Authenticate": "Bearer"},
                                 )
 
+                        _propagate_tenant_id(request)
                         if plugin_manager and plugin_manager.config.plugin_settings.include_user_info:
                             _inject_userinfo_instate(request, _user_from_cached_dict(cached_ctx.user))
 
@@ -1315,6 +1317,7 @@ async def _set_auth_method_from_payload(payload: dict) -> None:
                             headers={"WWW-Authenticate": "Bearer"},
                         )
 
+                _propagate_tenant_id(request)
                 if plugin_manager and plugin_manager.config.plugin_settings.include_user_info:
                     _inject_userinfo_instate(request, _batched_user)
 
@@ -1490,12 +1493,32 @@ async def _set_auth_method_from_payload(payload: dict) -> None:
             headers={"WWW-Authenticate": "Bearer"},
         )
 
+    _propagate_tenant_id(request)
     if plugin_manager and plugin_manager.config.plugin_settings.include_user_info:
         _inject_userinfo_instate(request, user)
 
     return user
 
 
+def _propagate_tenant_id(request: Optional[object] = None) -> None:
+    """Propagate request.state.team_id into GlobalContext.tenant_id for rate limiting.
+
+    Called unconditionally at every return path in get_current_user() — unlike
+    _inject_userinfo_instate() which is gated by include_user_info.  This
+    ensures by_tenant rate limiting works even when include_user_info is False
+    (the default) and the middleware has already created plugin_global_context.
+
+    Only writes when tenant_id is still None (no overwrite of plugin-set values).
+    """
+    if not request:
+        return
+    global_context = getattr(getattr(request, "state", None), "plugin_global_context", None)
+    if global_context and global_context.tenant_id is None:
+        team_id = getattr(getattr(request, "state", None), "team_id", None)
+        if team_id:
+            global_context.tenant_id = team_id
+
+
 def _inject_userinfo_instate(request: Optional[object] = None, user: Optional[EmailUser] = None) -> None:
     """This function injects user related information into the plugin_global_context, if the config has
     include_user_info key set as true.
 
@@ -214,7 +214,7 @@ plugins:
     author: "Mihai Criveti"
     hooks: ["prompt_pre_fetch", "tool_pre_invoke"]
     tags: ["limits", "throttle"]
-    mode: "permissive"
+    mode: "enforce"
     priority: 20
     conditions: []
     config:
 
@@ -99,7 +99,7 @@ Each identity (user, tenant, tool) has a bucket that holds up to `count` tokens.
 
 ## Backends
 
-### Memory backend (default)
+### Memory backend (default, single-instance only)
 
 - Counters are stored in a process-local dict (`_store`)
 - An `asyncio.Lock` serialises all counter reads and writes — safe under concurrent asyncio tasks
@@ -116,7 +116,7 @@ Each identity (user, tenant, tool) has a bucket that holds up to `count` tokens.
 - If `redis_fallback: true` (default) and Redis is unavailable, the plugin falls back to the in-process `MemoryBackend` automatically — requests are never blocked due to Redis downtime
 - If `redis_fallback: false` and Redis is unavailable, the exception is caught and the request is allowed through (fail-open)
 
-**Multi-instance deployment:** use `backend: redis`. The Redis service is already included in the default Docker Compose stack at `redis://redis:6379/0`.
+**Multi-instance deployment (important):** The `memory` backend is local to a single gateway instance — rate limit counters are not shared across replicas. For multi-instance deployments (e.g., behind nginx or on OpenShift with multiple gateway pods), always use `backend: redis` to ensure rate limits are enforced correctly across all instances. The default production configuration (`plugins/config.yaml`) already sets `backend: redis`.
 
 ## Examples