Skip to content

Commit ce8b774

Browse files
committed
fix: add cgroup resource limits to RuntimeContext + eviction metric
RuntimeContext now detects cgroup v2 memory limit and CPU quota on container environments. Fields are None on bare metal. Shutdown handler emits pod_eviction_received_total counter when SIGTERM arrives in K8s mode (for PDB monitoring).
1 parent 5b8db43 commit ce8b774

2 files changed

Lines changed: 57 additions & 0 deletions

File tree

src/env.rs

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,10 @@ pub struct RuntimeContext {
158158
pub node_name: Option<String>,
159159
/// Container ID (from `HOSTNAME` in container environments).
160160
pub container_id: Option<String>,
161+
/// cgroup memory limit in bytes (`None` if unlimited or bare metal).
162+
pub memory_limit_bytes: Option<u64>,
163+
/// cgroup CPU quota in cores (`None` if unlimited or bare metal).
164+
pub cpu_quota_cores: Option<f64>,
161165
}
162166

163167
impl RuntimeContext {
@@ -192,12 +196,27 @@ impl RuntimeContext {
192196
None
193197
};
194198

199+
// cgroup resource limits (container environments only)
200+
let memory_limit_bytes = if environment.is_container() {
201+
read_cgroup_memory_limit()
202+
} else {
203+
None
204+
};
205+
206+
let cpu_quota_cores = if environment.is_container() {
207+
read_cgroup_cpu_quota()
208+
} else {
209+
None
210+
};
211+
195212
Self {
196213
environment,
197214
pod_name,
198215
namespace,
199216
node_name,
200217
container_id,
218+
memory_limit_bytes,
219+
cpu_quota_cores,
201220
}
202221
}
203222

@@ -237,6 +256,38 @@ pub fn runtime_context() -> &'static RuntimeContext {
237256
RUNTIME_CONTEXT.get_or_init(RuntimeContext::detect)
238257
}
239258

259+
// =============================================================================
260+
// cgroup resource limit helpers
261+
// =============================================================================
262+
263+
/// Read cgroup v2 memory limit (returns None if unlimited or not in a cgroup).
264+
fn read_cgroup_memory_limit() -> Option<u64> {
265+
let content = std::fs::read_to_string("/sys/fs/cgroup/memory.max").ok()?;
266+
let trimmed = content.trim();
267+
if trimmed == "max" {
268+
return None; // No limit set
269+
}
270+
trimmed.parse::<u64>().ok()
271+
}
272+
273+
/// Read cgroup v2 CPU quota as fractional cores (returns None if unlimited).
274+
///
275+
/// Reads `/sys/fs/cgroup/cpu.max` which contains `quota period` (e.g. "200000 100000" = 2 cores).
276+
fn read_cgroup_cpu_quota() -> Option<f64> {
277+
let content = std::fs::read_to_string("/sys/fs/cgroup/cpu.max").ok()?;
278+
let parts: Vec<&str> = content.trim().split_whitespace().collect();
279+
if parts.len() < 2 || parts[0] == "max" {
280+
return None; // No limit
281+
}
282+
let quota: f64 = parts[0].parse().ok()?;
283+
let period: f64 = parts[1].parse().ok()?;
284+
if period > 0.0 {
285+
Some(quota / period)
286+
} else {
287+
None
288+
}
289+
}
290+
240291
// =============================================================================
241292
// Helm detection and app env helpers
242293
// =============================================================================

src/shutdown.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,12 @@ pub fn install_signal_handler() -> CancellationToken {
104104
tokio::time::sleep(std::time::Duration::from_secs(prestop_delay)).await;
105105
}
106106

107+
// Emit eviction metric when SIGTERM received in K8s
108+
#[cfg(any(feature = "metrics", feature = "otel-metrics"))]
109+
if crate::env::runtime_context().is_kubernetes() {
110+
metrics::counter!("pod_eviction_received_total").increment(1);
111+
}
112+
107113
cancel.cancel();
108114

109115
#[cfg(feature = "logger")]

0 commit comments

Comments
 (0)