Skip to content

Commit 28a4eb8

Browse files
fix(rate-limiter): correctness, robustness, and cleanup for rust engine
- Fix thread-local monotonic clock anchor — replace with process-global OnceLock so timestamps are comparable across threads (clock.rs) - Fix Redis backend deriving now_float from SystemTime instead of the passed-in now_unix parameter, breaking CORR-02 contract (redis_backend.rs) - Register EvalDimension in PyO3 module to match .pyi stubs (lib.rs) - Remove plural unit forms from Rust rate parser to match Python parity (config.rs) - Use saturating_mul for token bucket cap to prevent overflow (memory.rs) - Add amortized key eviction for idle memory-backend keys (memory.rs) - Propagate tokio runtime init error instead of panicking (redis_backend.rs) - Add dedicated InvalidAlgorithm error variant (config.rs) - Remove dead Check struct and unused serde/serde_json deps (engine.rs, Cargo.toml) - Document fail-open security contract and retry_after=min policy (rate_limiter.py, types.rs) Signed-off-by: Pratik Gandhi <gandhipratik203@gmail.com>
1 parent f48fc3a commit 28a4eb8

10 files changed

Lines changed: 236 additions & 87 deletions

File tree

plugins/rate_limiter/rate_limiter.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,15 @@
1313
All three algorithms support both memory and Redis backends with identical
1414
semantics. The Redis backend uses atomic Lua scripts for each algorithm —
1515
one round-trip per check with no race conditions.
16+
17+
Security contract — fail-open on error:
18+
Both hook methods (prompt_pre_fetch, tool_pre_invoke) catch all unexpected
19+
exceptions and allow the request through. This is a deliberate design
20+
choice: an internal engine failure (Rust panic, Redis timeout, config bug)
21+
must never block legitimate traffic. The trade-off is that a sustained
22+
engine failure silently disables rate limiting until the error is resolved.
23+
Operators should monitor for rate-limiter error logs and treat them as
24+
high-priority alerts.
1625
"""
1726

1827
# Future
@@ -165,6 +174,19 @@ def _select_most_restrictive(
165174
) -> tuple[bool, int, int, int, dict[str, Any]]:
166175
"""Select the most restrictive rate limit from multiple dimensions.
167176
177+
Multi-dimension aggregation contract:
178+
- Any blocked dimension → overall result is blocked.
179+
- Among blocked dimensions: the one with the **lowest** retry_after
180+
(soonest unblock) determines the Retry-After header. This signals
181+
the next state change — the caller learns when at least one dimension
182+
will re-open, even if other dimensions remain blocked longer. An
183+
alternative (max) would guarantee success on retry but delays the
184+
first attempt and hides which dimension unblocked. This is a
185+
deliberate product-level choice shared by both the Python and Rust
186+
implementations.
187+
- Among allowed dimensions: the one with the fewest remaining requests
188+
determines the header values (closest to exhaustion).
189+
168190
Args:
169191
results: List of (allowed, limit, reset_timestamp, metadata) tuples.
170192
@@ -1173,6 +1195,8 @@ async def prompt_pre_fetch(self, payload: PromptPrehookPayload, context: PluginC
11731195
return PromptPrehookResult(metadata=meta)
11741196

11751197
except Exception:
1198+
# Deliberate fail-open: engine errors must not block legitimate traffic.
1199+
# See module docstring "Security contract — fail-open on error".
11761200
logger.exception("RateLimiterPlugin.prompt_pre_fetch encountered an unexpected error; allowing request")
11771201
return PromptPrehookResult()
11781202

@@ -1262,5 +1286,7 @@ async def tool_pre_invoke(self, payload: ToolPreInvokePayload, context: PluginCo
12621286
return ToolPreInvokeResult(metadata=meta)
12631287

12641288
except Exception:
1289+
# Deliberate fail-open: engine errors must not block legitimate traffic.
1290+
# See module docstring "Security contract — fail-open on error".
12651291
logger.exception("RateLimiterPlugin.tool_pre_invoke encountered an unexpected error; allowing request")
12661292
return ToolPreInvokeResult()

plugins_rust/rate_limiter/Cargo.lock

Lines changed: 0 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

plugins_rust/rate_limiter/Cargo.toml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,6 @@ pyo3 = { version = "0.28.2", features = ["abi3-py311"] }
2020
pyo3-async-runtimes = { version = "0.28", features = ["tokio-runtime"] }
2121
pyo3-stub-gen = "0.19"
2222
parking_lot = "0.12"
23-
serde = { version = "1.0", features = ["derive"] }
24-
serde_json = "1.0"
2523
thiserror = "2.0"
2624
redis = { version = "0.27", features = ["aio", "tokio-comp"] }
2725
tokio = { version = "1", features = ["rt-multi-thread", "sync", "time"] }

plugins_rust/rate_limiter/src/clock.rs

Lines changed: 15 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -33,28 +33,22 @@ pub struct SystemClock;
3333

3434
impl Clock for SystemClock {
3535
fn now_monotonic(&self) -> Nanos {
36-
use std::time::{Duration, Instant, UNIX_EPOCH};
36+
use std::sync::OnceLock;
37+
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
38+
3739
// Instant is monotonic; we anchor it to a fixed start to get nanoseconds.
38-
// We use a thread-local anchor so the monotonic counter is consistent
39-
// within a process but not tied to an arbitrary boot epoch.
40-
use std::cell::Cell;
41-
use std::time::SystemTime;
42-
thread_local! {
43-
static ANCHOR: Cell<Option<(Instant, u64)>> = const { Cell::new(None) };
44-
}
45-
ANCHOR.with(|cell| {
46-
let (anchor_instant, anchor_nanos) = cell.get().unwrap_or_else(|| {
47-
let nanos = SystemTime::now()
48-
.duration_since(UNIX_EPOCH)
49-
.unwrap_or(Duration::ZERO)
50-
.as_nanos() as u64;
51-
let pair = (Instant::now(), nanos);
52-
cell.set(Some(pair));
53-
pair
54-
});
55-
let elapsed = anchor_instant.elapsed().as_nanos() as u64;
56-
anchor_nanos + elapsed
57-
})
40+
// We use a process-global anchor so monotonic values are comparable
41+
// across threads — required because MemoryStore is shared via RwLock.
42+
static ANCHOR: OnceLock<(Instant, u64)> = OnceLock::new();
43+
let (anchor_instant, anchor_nanos) = ANCHOR.get_or_init(|| {
44+
let nanos = SystemTime::now()
45+
.duration_since(UNIX_EPOCH)
46+
.unwrap_or(Duration::ZERO)
47+
.as_nanos() as u64;
48+
(Instant::now(), nanos)
49+
});
50+
let elapsed = anchor_instant.elapsed().as_nanos() as u64;
51+
anchor_nanos + elapsed
5852
}
5953

6054
fn now_unix_secs(&self) -> UnixSecs {

plugins_rust/rate_limiter/src/config.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ pub enum ConfigError {
3131
InvalidRateString(String),
3232
#[error("rate count must be > 0, got {0}")]
3333
ZeroCount(u64),
34+
#[error("invalid algorithm {0:?}: expected \"fixed_window\", \"sliding_window\", or \"token_bucket\"")]
35+
InvalidAlgorithm(String),
3436
}
3537

3638
/// Parse a rate string like `"30/m"`, `"100/s"`, `"1000/h"`.
@@ -53,9 +55,9 @@ pub fn parse_rate(s: &str) -> Result<RateLimit, ConfigError> {
5355
}
5456

5557
let window_secs: u64 = match unit_str.trim().to_ascii_lowercase().as_str() {
56-
"s" | "sec" | "second" | "seconds" => 1,
57-
"m" | "min" | "minute" | "minutes" => 60,
58-
"h" | "hr" | "hour" | "hours" => 3600,
58+
"s" | "sec" | "second" => 1,
59+
"m" | "min" | "minute" => 60,
60+
"h" | "hr" | "hour" => 3600,
5961
_ => return Err(ConfigError::InvalidRateString(s.to_string())),
6062
};
6163

@@ -113,7 +115,7 @@ impl EngineConfig {
113115
})
114116
.collect::<Result<HashMap<_, _>, _>>()?;
115117
let algorithm = Algorithm::from_str(algorithm)
116-
.ok_or_else(|| ConfigError::InvalidRateString(algorithm.to_string()))?;
118+
.ok_or_else(|| ConfigError::InvalidAlgorithm(algorithm.to_string()))?;
117119
Ok(Self {
118120
by_user,
119121
by_tenant,

plugins_rust/rate_limiter/src/engine.rs

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -34,18 +34,6 @@ enum EngineBackend {
3434
// Check descriptor — one entry per active dimension
3535
// ---------------------------------------------------------------------------
3636

37-
/// A single dimension check passed from Python to `evaluate_many()`.
38-
///
39-
/// Python builds this list from context (user_id, tenant_id, tool_name)
40-
/// and the configured limits — the engine never reads config again on the
41-
/// hot path (IFACE-01).
42-
#[derive(Debug, Clone)]
43-
pub struct Check {
44-
pub key: String,
45-
pub limit_count: u64,
46-
pub window_nanos: u64,
47-
}
48-
4937
// ---------------------------------------------------------------------------
5038
// Engine
5139
// ---------------------------------------------------------------------------

plugins_rust/rate_limiter/src/lib.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,14 @@ pub mod redis_backend;
1717
pub mod types;
1818

1919
pub use engine::RateLimiterEngine;
20-
pub use types::EvalResult;
20+
pub use types::{EvalDimension, EvalResult};
2121

2222
/// Python module definition.
2323
#[pymodule]
2424
fn rate_limiter_rust(m: &Bound<'_, PyModule>) -> PyResult<()> {
2525
m.add_class::<RateLimiterEngine>()?;
2626
m.add_class::<EvalResult>()?;
27+
m.add_class::<EvalDimension>()?;
2728
Ok(())
2829
}
2930

0 commit comments

Comments
 (0)