Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion engine/sdks/rust/envoy-client/src/actor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1661,7 +1661,7 @@ mod tests {
)),
protocol_metadata: Arc::new(tokio::sync::Mutex::new(None)),
shutting_down: std::sync::atomic::AtomicBool::new(false),
last_ping_ts: std::sync::atomic::AtomicI64::new(crate::time::now_millis()),
last_ping_ts: std::sync::atomic::AtomicI64::new(0),
stopped_tx: tokio::sync::watch::channel(true).0,
});
(shared, envoy_rx)
Expand Down
3 changes: 1 addition & 2 deletions engine/sdks/rust/envoy-client/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@ pub struct SharedContext {
pub shutting_down: AtomicBool,
/// Epoch ms timestamp of the most recent ping packet received from the engine. Used by
/// `EnvoyHandle::is_ping_healthy` to surface a dead engine link to upstream health checks.
/// Initialized to the construction time so a freshly created envoy reports healthy until
/// its first ping arrives or the threshold elapses without one.
/// Zero means no ping has been received yet.
pub last_ping_ts: AtomicI64,
// Latched signal fired by `envoy_loop` after its cleanup block completes.
// Waiters observing `true` are guaranteed that the loop has exited and
Expand Down
2 changes: 1 addition & 1 deletion engine/sdks/rust/envoy-client/src/envoy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ fn start_envoy_sync_inner(config: EnvoyConfig) -> EnvoyHandle {
ws_tx: Arc::new(tokio::sync::Mutex::new(None)),
protocol_metadata: Arc::new(tokio::sync::Mutex::new(None)),
shutting_down: std::sync::atomic::AtomicBool::new(false),
last_ping_ts: std::sync::atomic::AtomicI64::new(crate::time::now_millis()),
last_ping_ts: std::sync::atomic::AtomicI64::new(0),
stopped_tx,
});

Expand Down
2 changes: 1 addition & 1 deletion engine/sdks/rust/envoy-client/src/events.rs
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ mod tests {
)),
protocol_metadata: Arc::new(tokio::sync::Mutex::new(None)),
shutting_down: std::sync::atomic::AtomicBool::new(false),
last_ping_ts: std::sync::atomic::AtomicI64::new(crate::time::now_millis()),
last_ping_ts: std::sync::atomic::AtomicI64::new(0),
stopped_tx: tokio::sync::watch::channel(true).0,
});
let handle = EnvoyHandle {
Expand Down
10 changes: 7 additions & 3 deletions engine/sdks/rust/envoy-client/src/handle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,15 @@ impl EnvoyHandle {
/// Threshold for `is_ping_healthy`.
pub const PING_HEALTHY_THRESHOLD_MS: i64 = 20_000;

/// True when the engine sent a ping within `PING_HEALTHY_THRESHOLD_MS`. Returns false once
/// the engine link has been silently dead long enough that an upstream health check should
/// treat this envoy as unhealthy and recycle it.
/// True after the engine has sent at least one ping and the most recent ping is within
/// `PING_HEALTHY_THRESHOLD_MS`. Returns false when the engine link has never completed
/// the ping handshake or has gone silently dead long enough that an upstream health check
/// should treat this envoy as unhealthy and recycle it.
pub fn is_ping_healthy(&self) -> bool {
let last = self.shared.last_ping_ts.load(Ordering::Acquire);
if last == 0 {
return false;
}
crate::time::now_millis() - last < Self::PING_HEALTHY_THRESHOLD_MS
}

Expand Down
23 changes: 12 additions & 11 deletions rivetkit-rust/packages/rivetkit-core/src/serverless.rs
Original file line number Diff line number Diff line change
Expand Up @@ -255,30 +255,31 @@ impl CoreServerlessRuntime {
"This is a RivetKit server.\n\nLearn more at https://rivet.dev",
)),
("GET", "/health") => {
// Report unhealthy when an envoy is currently running but its link to the
// engine has gone silent. A 503 is the conventional "recycle me" signal for
// container hosts (Cloud Run, k8s, etc.) running behind an HTTP health probe.
let envoy_unhealthy = {
// Healthy if no envoy is connected yet or if the envoy has received a
// recent engine ping. Unhealthy only when an envoy exists but has not
// received a recent ping. 503 is the conventional "recycle me" signal
// for container hosts running behind an HTTP health probe.
let runtime_healthy = {
let guard = self.envoy.lock().await;
guard
.as_ref()
.map(|handle| !handle.is_ping_healthy())
.unwrap_or(false)
.map(|handle| handle.is_ping_healthy())
.unwrap_or(true)
};
if envoy_unhealthy {
if runtime_healthy {
Ok(json_response(
StatusCode::SERVICE_UNAVAILABLE,
StatusCode::OK,
json!({
"status": "engine_ping_stale",
"status": "ok",
"runtime": "rivetkit",
"version": self.settings.package_version,
}),
))
} else {
Ok(json_response(
StatusCode::OK,
StatusCode::SERVICE_UNAVAILABLE,
json!({
"status": "ok",
"status": "engine_ping_stale",
"runtime": "rivetkit",
"version": self.settings.package_version,
}),
Expand Down
4 changes: 2 additions & 2 deletions rivetkit-rust/packages/rivetkit-core/tests/serverless.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,9 @@ mod moved_tests {
let health = runtime
.handle_request(test_request("GET", "/api/rivet/health"))
.await;
assert_eq!(health.status, 200);
assert_eq!(health.status, 503);
let health_body = read_body(health).await;
assert_eq!(health_body["status"], "ok");
assert_eq!(health_body["status"], "engine_ping_stale");
assert_eq!(health_body["runtime"], "rivetkit");
assert_eq!(health_body["version"], "test-version");

Expand Down
2 changes: 1 addition & 1 deletion rivetkit-typescript/packages/rivetkit-napi/src/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ impl CoreRegistry {
if envoy.ping_healthy { "ok" } else { "engine_ping_stale" },
&version,
),
None => health_response(200, "ok", &version),
None => health_response(503, "engine_ping_stale", &version),
};
Ok(response)
}
Expand Down
Loading