IBM
diff --git a/‎plugins/rate_limiter/rate_limiter.py‎
Lines changed: 26 additions & 0 deletions b/‎plugins/rate_limiter/rate_limiter.py‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎plugins_rust/rate_limiter/Cargo.lock‎
Lines changed: 0 additions & 2 deletions b/‎plugins_rust/rate_limiter/Cargo.lock‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎plugins_rust/rate_limiter/Cargo.toml‎
Lines changed: 0 additions & 2 deletions b/‎plugins_rust/rate_limiter/Cargo.toml‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎plugins_rust/rate_limiter/src/clock.rs‎
Lines changed: 15 additions & 21 deletions b/‎plugins_rust/rate_limiter/src/clock.rs‎
Lines changed: 15 additions & 21 deletions
diff --git a/‎plugins_rust/rate_limiter/src/config.rs‎
Lines changed: 6 additions & 4 deletions b/‎plugins_rust/rate_limiter/src/config.rs‎
Lines changed: 6 additions & 4 deletions
diff --git a/‎plugins_rust/rate_limiter/src/engine.rs‎
Lines changed: 0 additions & 12 deletions b/‎plugins_rust/rate_limiter/src/engine.rs‎
Lines changed: 0 additions & 12 deletions
diff --git a/‎plugins_rust/rate_limiter/src/lib.rs‎
Lines changed: 2 additions & 1 deletion b/‎plugins_rust/rate_limiter/src/lib.rs‎
Lines changed: 2 additions & 1 deletion
@@ -13,6 +13,15 @@
 All three algorithms support both memory and Redis backends with identical
 semantics. The Redis backend uses atomic Lua scripts for each algorithm —
 one round-trip per check with no race conditions.
+
+Security contract — fail-open on error:
+  Both hook methods (prompt_pre_fetch, tool_pre_invoke) catch all unexpected
+  exceptions and allow the request through.  This is a deliberate design
+  choice: an internal engine failure (Rust panic, Redis timeout, config bug)
+  must never block legitimate traffic.  The trade-off is that a sustained
+  engine failure silently disables rate limiting until the error is resolved.
+  Operators should monitor for rate-limiter error logs and treat them as
+  high-priority alerts.
 """
 
 # Future
@@ -165,6 +174,19 @@ def _select_most_restrictive(
 ) -> tuple[bool, int, int, int, dict[str, Any]]:
     """Select the most restrictive rate limit from multiple dimensions.
 
+    Multi-dimension aggregation contract:
+      - Any blocked dimension → overall result is blocked.
+      - Among blocked dimensions: the one with the **lowest** retry_after
+        (soonest unblock) determines the Retry-After header.  This signals
+        the next state change — the caller learns when at least one dimension
+        will re-open, even if other dimensions remain blocked longer.  An
+        alternative (max) would guarantee success on retry but delays the
+        first attempt and hides which dimension unblocked.  This is a
+        deliberate product-level choice shared by both the Python and Rust
+        implementations.
+      - Among allowed dimensions: the one with the fewest remaining requests
+        determines the header values (closest to exhaustion).
+
     Args:
         results: List of (allowed, limit, reset_timestamp, metadata) tuples.
 
@@ -1173,6 +1195,8 @@ async def prompt_pre_fetch(self, payload: PromptPrehookPayload, context: PluginC
             return PromptPrehookResult(metadata=meta)
 
         except Exception:
+            # Deliberate fail-open: engine errors must not block legitimate traffic.
+            # See module docstring "Security contract — fail-open on error".
             logger.exception("RateLimiterPlugin.prompt_pre_fetch encountered an unexpected error; allowing request")
             return PromptPrehookResult()
 
@@ -1262,5 +1286,7 @@ async def tool_pre_invoke(self, payload: ToolPreInvokePayload, context: PluginCo
             return ToolPreInvokeResult(metadata=meta)
 
         except Exception:
+            # Deliberate fail-open: engine errors must not block legitimate traffic.
+            # See module docstring "Security contract — fail-open on error".
             logger.exception("RateLimiterPlugin.tool_pre_invoke encountered an unexpected error; allowing request")
             return ToolPreInvokeResult()
@@ -20,8 +20,6 @@ pyo3 = { version = "0.28.2", features = ["abi3-py311"] }
 pyo3-async-runtimes = { version = "0.28", features = ["tokio-runtime"] }
 pyo3-stub-gen = "0.19"
 parking_lot = "0.12"
-serde = { version = "1.0", features = ["derive"] }
-serde_json = "1.0"
 thiserror = "2.0"
 redis = { version = "0.27", features = ["aio", "tokio-comp"] }
 tokio = { version = "1", features = ["rt-multi-thread", "sync", "time"] }
 
@@ -33,28 +33,22 @@ pub struct SystemClock;
 
 impl Clock for SystemClock {
     fn now_monotonic(&self) -> Nanos {
-        use std::time::{Duration, Instant, UNIX_EPOCH};
+        use std::sync::OnceLock;
+        use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
+
         // Instant is monotonic; we anchor it to a fixed start to get nanoseconds.
-        // We use a thread-local anchor so the monotonic counter is consistent
-        // within a process but not tied to an arbitrary boot epoch.
-        use std::cell::Cell;
-        use std::time::SystemTime;
-        thread_local! {
-            static ANCHOR: Cell<Option<(Instant, u64)>> = const { Cell::new(None) };
-        }
-        ANCHOR.with(|cell| {
-            let (anchor_instant, anchor_nanos) = cell.get().unwrap_or_else(|| {
-                let nanos = SystemTime::now()
-                    .duration_since(UNIX_EPOCH)
-                    .unwrap_or(Duration::ZERO)
-                    .as_nanos() as u64;
-                let pair = (Instant::now(), nanos);
-                cell.set(Some(pair));
-                pair
-            });
-            let elapsed = anchor_instant.elapsed().as_nanos() as u64;
-            anchor_nanos + elapsed
-        })
+        // We use a process-global anchor so monotonic values are comparable
+        // across threads — required because MemoryStore is shared via RwLock.
+        static ANCHOR: OnceLock<(Instant, u64)> = OnceLock::new();
+        let (anchor_instant, anchor_nanos) = ANCHOR.get_or_init(|| {
+            let nanos = SystemTime::now()
+                .duration_since(UNIX_EPOCH)
+                .unwrap_or(Duration::ZERO)
+                .as_nanos() as u64;
+            (Instant::now(), nanos)
+        });
+        let elapsed = anchor_instant.elapsed().as_nanos() as u64;
+        anchor_nanos + elapsed
     }
 
     fn now_unix_secs(&self) -> UnixSecs {
 
@@ -31,6 +31,8 @@ pub enum ConfigError {
     InvalidRateString(String),
     #[error("rate count must be > 0, got {0}")]
     ZeroCount(u64),
+    #[error("invalid algorithm {0:?}: expected \"fixed_window\", \"sliding_window\", or \"token_bucket\"")]
+    InvalidAlgorithm(String),
 }
 
 /// Parse a rate string like `"30/m"`, `"100/s"`, `"1000/h"`.
@@ -53,9 +55,9 @@ pub fn parse_rate(s: &str) -> Result<RateLimit, ConfigError> {
     }
 
     let window_secs: u64 = match unit_str.trim().to_ascii_lowercase().as_str() {
-        "s" | "sec" | "second" | "seconds" => 1,
-        "m" | "min" | "minute" | "minutes" => 60,
-        "h" | "hr" | "hour" | "hours" => 3600,
+        "s" | "sec" | "second" => 1,
+        "m" | "min" | "minute" => 60,
+        "h" | "hr" | "hour" => 3600,
         _ => return Err(ConfigError::InvalidRateString(s.to_string())),
     };
 
@@ -113,7 +115,7 @@ impl EngineConfig {
             })
             .collect::<Result<HashMap<_, _>, _>>()?;
         let algorithm = Algorithm::from_str(algorithm)
-            .ok_or_else(|| ConfigError::InvalidRateString(algorithm.to_string()))?;
+            .ok_or_else(|| ConfigError::InvalidAlgorithm(algorithm.to_string()))?;
         Ok(Self {
             by_user,
             by_tenant,
 
@@ -34,18 +34,6 @@ enum EngineBackend {
 // Check descriptor — one entry per active dimension
 // ---------------------------------------------------------------------------
 
-/// A single dimension check passed from Python to `evaluate_many()`.
-///
-/// Python builds this list from context (user_id, tenant_id, tool_name)
-/// and the configured limits — the engine never reads config again on the
-/// hot path (IFACE-01).
-#[derive(Debug, Clone)]
-pub struct Check {
-    pub key: String,
-    pub limit_count: u64,
-    pub window_nanos: u64,
-}
-
 // ---------------------------------------------------------------------------
 // Engine
 // ---------------------------------------------------------------------------
 
@@ -17,13 +17,14 @@ pub mod redis_backend;
 pub mod types;
 
 pub use engine::RateLimiterEngine;
-pub use types::EvalResult;
+pub use types::{EvalDimension, EvalResult};
 
 /// Python module definition.
 #[pymodule]
 fn rate_limiter_rust(m: &Bound<'_, PyModule>) -> PyResult<()> {
     m.add_class::<RateLimiterEngine>()?;
     m.add_class::<EvalResult>()?;
+    m.add_class::<EvalDimension>()?;
     Ok(())
 }