diff --git a/Cargo.lock b/Cargo.lock index a557cd2ced9e5..3cb41e3ac7f74 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9661,6 +9661,15 @@ dependencies = [ "untrusted", ] +[[package]] +name = "sd-notify" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e4ef7359e694bfaf1dd27a30f9d760b54c00dfae9f19bd0c05a39bc9128fe76" +dependencies = [ + "libc", +] + [[package]] name = "sdd" version = "3.0.10" @@ -12365,6 +12374,7 @@ dependencies = [ "rstest", "rumqttc", "rust_decimal", + "sd-notify", "seahash", "semver", "serde", diff --git a/Cargo.toml b/Cargo.toml index 21c40c793a9bf..28a5250ff97b3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -461,6 +461,7 @@ tikv-jemallocator = { version = "0.6.0", default-features = false, features = [" [target.'cfg(target_os = "linux")'.dependencies] procfs = { version = "0.18.0", default-features = false } +sd-notify = { version = "0.5.0", default-features = false } [build-dependencies] prost-build = { workspace = true, optional = true } diff --git a/LICENSE-3rdparty.csv b/LICENSE-3rdparty.csv index 9a135f0479f51..ab7bb1c5570bb 100644 --- a/LICENSE-3rdparty.csv +++ b/LICENSE-3rdparty.csv @@ -668,6 +668,7 @@ schemars,https://github.com/GREsau/schemars,MIT,Graham Esau scopeguard,https://github.com/bluss/scopeguard,MIT OR Apache-2.0,bluss sct,https://github.com/rustls/sct.rs,Apache-2.0 OR ISC OR MIT,Joseph Birr-Pixton +sd-notify,https://github.com/lnicola/sd-notify,MIT OR Apache-2.0,The sd-notify Authors seahash,https://gitlab.redox-os.org/redox-os/seahash,MIT,"ticki , Tom Almeida " sec1,https://github.com/RustCrypto/formats/tree/master/sec1,Apache-2.0 OR MIT,RustCrypto Developers secrecy,https://github.com/iqlusioninc/crates/tree/main/secrecy,Apache-2.0 OR MIT,Tony Arcieri diff --git a/changelog.d/systemd_notify.feature.md b/changelog.d/systemd_notify.feature.md new file mode 100644 index 0000000000000..d70fb8ed9b190 --- /dev/null +++ b/changelog.d/systemd_notify.feature.md @@ -0,0 +1,4 @@ +Add systemd notify integration. Vector now sends `READY=1` when fully started, `STOPPING=1` +when beginning a graceful shutdown, and `WATCHDOG=1` pings at half the configured `WatchdogSec` +interval. The bundled `vector.service` and `hardened-vector.service` unit files are updated +to use `Type=notify`, with an optional `WatchdogSec` directive. diff --git a/distribution/systemd/hardened-vector.service b/distribution/systemd/hardened-vector.service index c98ff1f2a7dca..61f95a5008c16 100644 --- a/distribution/systemd/hardened-vector.service +++ b/distribution/systemd/hardened-vector.service @@ -11,6 +11,7 @@ After=network-online.target Requires=network-online.target [Service] +Type=notify EnvironmentFile=-/etc/default/vector User=vector Group=vector @@ -20,6 +21,9 @@ ExecReload=/usr/bin/vector validate ExecReload=/bin/kill -HUP $MAINPID Restart=no +# Uncomment to enable watchdog. Vector will ping at half this interval. +#WatchdogSec=30 + # capabilities AmbientCapabilities=CAP_NET_BIND_SERVICE CapabilityBoundingSet=CAP_NET_BIND_SERVICE diff --git a/distribution/systemd/vector.service b/distribution/systemd/vector.service index b78ff3131e3cf..70e49359d9752 100644 --- a/distribution/systemd/vector.service +++ b/distribution/systemd/vector.service @@ -5,6 +5,7 @@ After=network-online.target Requires=network-online.target [Service] +Type=notify User=vector Group=vector ExecStartPre=/usr/bin/vector validate @@ -18,5 +19,9 @@ EnvironmentFile=-/etc/default/vector # it is also supported to have it here. StartLimitInterval=10 StartLimitBurst=5 + +# Uncomment to enable watchdog. Vector will ping at half this interval. +#WatchdogSec=30 + [Install] WantedBy=multi-user.target diff --git a/src/app.rs b/src/app.rs index 6f30f85de1d20..4e024863cb4be 100644 --- a/src/app.rs +++ b/src/app.rs @@ -257,6 +257,8 @@ impl Application { emit!(VectorStarted); handle.spawn(heartbeat::heartbeat()); + #[cfg(target_os = "linux")] + handle.spawn(crate::systemd::watchdog()); let Self { root_opts, @@ -276,6 +278,9 @@ impl Application { extra_context: config.extra_context, }); + #[cfg(target_os = "linux")] + crate::systemd::sd_notify_ready(); + Ok(StartedApplication { config_paths: config.config_paths, internal_topologies: config.internal_topologies, @@ -493,6 +498,8 @@ impl FinishedApplication { } async fn stop(topology_controller: TopologyController, mut signal_rx: SignalRx) -> ExitStatus { + #[cfg(target_os = "linux")] + crate::systemd::sd_notify_stopping(); emit!(VectorStopped); tokio::select! { _ = topology_controller.stop() => ExitStatus::from_raw({ diff --git a/src/lib.rs b/src/lib.rs index 8d26a3b080ecf..1b97f362af6ae 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -112,6 +112,8 @@ pub(crate) mod sink_ext; pub mod sinks; #[allow(unreachable_pub)] pub mod sources; +#[cfg(target_os = "linux")] +pub mod systemd; #[cfg(feature = "api-client")] #[allow(unreachable_pub)] pub mod tap; diff --git a/src/systemd.rs b/src/systemd.rs new file mode 100644 index 0000000000000..6aaa7c3f0ffbc --- /dev/null +++ b/src/systemd.rs @@ -0,0 +1,48 @@ +//! Systemd integration via `sd_notify` +//! See + +use tokio::time::interval; + +/// Sends `READY=1` to systemd via sd_notify. No-op if not Type=notify. +pub fn sd_notify_ready() { + if let Err(error) = sd_notify::notify(&[sd_notify::NotifyState::Ready]) { + warn!(message = "Failed to notify systemd of ready state.", %error); + } +} + +/// Sends `STOPPING=1` to systemd via sd_notify. No-op if not Type=notify. +pub fn sd_notify_stopping() { + if let Err(error) = sd_notify::notify(&[sd_notify::NotifyState::Stopping]) { + warn!(message = "Failed to notify systemd of stopping state.", %error); + } +} + +/// Sends `WATCHDOG=1` to systemd via sd_notify. No-op if not Type=notify. +pub fn sd_notify_watchdog() { + if let Err(error) = sd_notify::notify(&[sd_notify::NotifyState::Watchdog]) { + warn!(message = "Failed to send systemd watchdog ping.", %error); + } +} + +/// Sends `WATCHDOG=1` pings at half the `WatchdogSec` interval. No-op if not set. +pub async fn watchdog() { + let Some(duration) = sd_notify::watchdog_enabled() else { + return; + }; + let mut ticker = interval(duration / 2); + loop { + ticker.tick().await; + sd_notify_watchdog(); + } +} + +#[cfg(test)] +mod tests { + #[test] + fn sd_notify_no_socket_does_not_panic() { + // NOTIFY_SOCKET is not set in test environments - these must be no-ops. + super::sd_notify_ready(); + super::sd_notify_stopping(); + super::sd_notify_watchdog(); + } +}