@@ -83,6 +83,12 @@ pub fn get_thresholds() -> MemoryThresholds {
8383/// Check whether jemalloc says physical memory has headroom, meaning the
8484/// pool's rejection is a false positive (stale accounting).
8585///
86+ /// Uses `resident_bytes` (physical RSS) instead of `allocated_bytes` (live objects).
87+ /// `allocated_bytes` undercounts true memory pressure because jemalloc retains
88+ /// freed pages in thread caches and arenas (dirty/muzzy decay). Under concurrent
89+ /// workloads, the gap between allocated and resident can be 10-20GB, causing the
90+ /// override to fire when the system is actually near OOM.
91+ ///
8692/// Returns `true` if the override should fire (proceed despite pool rejection).
8793/// Returns `false` if pressure is real or stats are unavailable.
8894///
@@ -95,8 +101,8 @@ pub fn should_override(pool_limit_bytes: usize, context: OverrideContext) -> boo
95101 return false ;
96102 }
97103
98- let allocated = native_bridge_common:: allocator:: allocated_bytes ( ) ;
99- if allocated <= 0 {
104+ let resident = native_bridge_common:: allocator:: resident_bytes ( ) ;
105+ if resident <= 0 {
100106 return false ;
101107 }
102108
@@ -106,7 +112,31 @@ pub fn should_override(pool_limit_bytes: usize, context: OverrideContext) -> boo
106112 } ;
107113
108114 let threshold_bytes = ( pool_limit_bytes as u64 * threshold_x1000 / 1000 ) as i64 ;
109- allocated < threshold_bytes
115+ resident < threshold_bytes
116+ }
117+
118+ /// Proactive admission check: returns `true` if jemalloc resident memory
119+ /// already exceeds the admission threshold (70% of pool limit by default).
120+ ///
121+ /// Called BEFORE query execution (at budget acquisition) to reject or reduce
122+ /// concurrency early — before any hash table allocation occurs. This prevents
123+ /// the "20 queries all pass admission simultaneously" burst that causes OOM.
124+ ///
125+ /// Cost: one `epoch.advance` + stat read (~1-5µs). Called once per query at
126+ /// admission, not per-batch.
127+ pub fn is_memory_pressured ( pool_limit_bytes : usize ) -> bool {
128+ if pool_limit_bytes < MIN_POOL_FOR_OVERRIDE {
129+ return false ;
130+ }
131+
132+ let resident = native_bridge_common:: allocator:: resident_bytes ( ) ;
133+ if resident <= 0 {
134+ return false ;
135+ }
136+
137+ let threshold_x1000 = ADMISSION_THRESHOLD_X1000 . load ( Ordering :: Acquire ) ;
138+ let threshold_bytes = ( pool_limit_bytes as u64 * threshold_x1000 / 1000 ) as i64 ;
139+ resident >= threshold_bytes
110140}
111141
112142// ---------------------------------------------------------------------------
@@ -203,4 +233,76 @@ mod tests {
203233 assert ! ( !should_override( 1_000_000 , OverrideContext :: Admission ) ) ;
204234 assert ! ( !should_override( 1_000_000 , OverrideContext :: Operator ) ) ;
205235 }
236+
237+ #[ test]
238+ fn should_override_uses_resident_not_allocated ( ) {
239+ // With a large pool (1TB), resident will always be below threshold
240+ // so override should fire (resident < threshold = "headroom available")
241+ let large_pool = 1024 * 1024 * 1024 * 1024 ; // 1TB
242+ // This test validates the function runs without error and uses resident.
243+ // On a test process with < 1TB RSS, override should fire (we have headroom).
244+ let result = should_override ( large_pool, OverrideContext :: Operator ) ;
245+ assert ! ( result, "With 1TB pool limit, resident should be well below threshold — override should fire" ) ;
246+ }
247+
248+ #[ test]
249+ fn is_memory_pressured_false_for_large_pool ( ) {
250+ // With a 1TB pool, current process RSS is far below 70% → not pressured
251+ let large_pool = 1024 * 1024 * 1024 * 1024 ; // 1TB
252+ assert ! ( !is_memory_pressured( large_pool) ) ;
253+ }
254+
255+ #[ test]
256+ fn is_memory_pressured_true_when_rss_exceeds_limit ( ) {
257+ // Set pool limit to something well below current process RSS.
258+ // A Rust test process typically uses 50-200MB RSS, so a 20MB limit
259+ // should always be exceeded.
260+ let small_pool = 20 * 1024 * 1024 ; // 20MB — above MIN_POOL_FOR_OVERRIDE
261+ let resident = native_bridge_common:: allocator:: resident_bytes ( ) ;
262+ if resident <= 0 {
263+ return ; // jemalloc not available
264+ }
265+ // Only assert if RSS is actually above 70% of 20MB = 14MB (which it will be)
266+ if resident as usize > small_pool * 70 / 100 {
267+ assert ! ( is_memory_pressured( small_pool) ) ;
268+ }
269+ }
270+
271+ #[ test]
272+ fn is_memory_pressured_skips_small_pools ( ) {
273+ assert ! ( !is_memory_pressured( 1_000_000 ) ) ; // 1MB — below MIN_POOL_FOR_OVERRIDE
274+ }
275+
276+ #[ test]
277+ fn override_respects_operator_vs_admission_threshold ( ) {
278+ // Operator threshold (85%) is more permissive than admission (70%).
279+ // For a pool where RSS is between 70% and 85%:
280+ // - Admission override should NOT fire (RSS >= 70% threshold)
281+ // - Operator override SHOULD fire (RSS < 85% threshold)
282+ //
283+ // We can't precisely control RSS in a unit test, but we can verify
284+ // that the thresholds are read correctly by setting them and checking
285+ // behavior with known pool sizes.
286+ let resident = native_bridge_common:: allocator:: resident_bytes ( ) ;
287+ if resident <= 0 {
288+ return ; // jemalloc not available in this test env
289+ }
290+ let resident = resident as usize ;
291+
292+ // Set pool limit so that resident is exactly between 70% and 85%
293+ // pool = resident / 0.77 (midpoint) → resident/pool ≈ 77%
294+ let pool_at_midpoint = ( resident as f64 / 0.77 ) as usize ;
295+ if pool_at_midpoint < MIN_POOL_FOR_OVERRIDE {
296+ return ;
297+ }
298+
299+ // At 77% utilization: admission (70%) should NOT override, operator (85%) SHOULD override
300+ let admission_result = should_override ( pool_at_midpoint, OverrideContext :: Admission ) ;
301+ let operator_result = should_override ( pool_at_midpoint, OverrideContext :: Operator ) ;
302+
303+ // admission: resident (77%) >= threshold (70%) → NOT below → override = false
304+ assert ! ( !admission_result, "At 77% RSS, admission override should NOT fire (threshold 70%)" ) ;
305+ // operator: resident (77%) < threshold (85%) → below → override = true
306+ assert ! ( operator_result, "At 77% RSS, operator override SHOULD fire (threshold 85%)" ) ;
307+ }
206308}
0 commit comments