Skip to content

Commit e9cb57c

Browse files
committed
cmdext: Add CmdFds allocator and take_fds() trait method
We want good support for systemd LISTEN_FDS protocol, it's a bit subtle and complex because we need to set LISTEN_PID which can only be done via custom `setenv` right now because we don't know the PID until post-fork obviously. Then there's the next problem - take_fd_n is I/O safe for the parent but not the target process - it's easy to have overlapping fds set for the child. To fix this add a `CmdFds` struct that is source of truth for all child process fds (that we control) and detects conflicts and panics. It can be constructed with a LISTEN_FDs set. Assisted-by: OpenCode (Claude claude-opus-4-6) Signed-off-by: Colin Walters <walters@verbum.org>
1 parent 5493d68 commit e9cb57c

3 files changed

Lines changed: 375 additions & 14 deletions

File tree

src/cmdext.rs

Lines changed: 239 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,24 +4,218 @@
44
//!
55
//! - File descriptor passing
66
//! - Changing to a file-descriptor relative directory
7+
//! - Systemd socket activation fd passing
78
89
use cap_std::fs::Dir;
910
use cap_std::io_lifetimes;
1011
use cap_tempfile::cap_std;
1112
use io_lifetimes::OwnedFd;
1213
use rustix::fd::{AsFd, FromRawFd, IntoRawFd};
1314
use rustix::io::FdFlags;
15+
use std::collections::BTreeSet;
16+
use std::ffi::CString;
1417
use std::os::fd::AsRawFd;
1518
use std::os::unix::process::CommandExt;
1619
use std::sync::Arc;
1720

21+
/// The file descriptor number at which systemd passes the first socket.
22+
/// See `sd_listen_fds(3)`.
23+
const SD_LISTEN_FDS_START: i32 = 3;
24+
25+
/// A validated name for a systemd socket-activation file descriptor.
26+
///
27+
/// Names appear in the `LISTEN_FDNAMES` environment variable as
28+
/// colon-separated values. The constructor validates that the name
29+
/// conforms to systemd's `fdname_is_valid()` rules: at most 255
30+
/// printable ASCII characters, excluding `:`.
31+
///
32+
/// ```
33+
/// use cap_std_ext::cmdext::SystemdFdName;
34+
/// let name = SystemdFdName::new("varlink");
35+
/// ```
36+
#[derive(Debug, Clone, Copy)]
37+
pub struct SystemdFdName<'a>(&'a str);
38+
39+
impl<'a> SystemdFdName<'a> {
40+
/// Create a new `SystemdFdName`, panicking if `name` is invalid.
41+
///
42+
/// # Panics
43+
///
44+
/// Panics if `name` is longer than 255 bytes or contains any
45+
/// character that is not printable ASCII (i.e. control characters,
46+
/// DEL, non-ASCII bytes, or `:`).
47+
pub const fn new(name: &'a str) -> Self {
48+
assert!(name.len() <= 255, "systemd fd name must be at most 255 characters");
49+
let bytes = name.as_bytes();
50+
let mut i = 0;
51+
while i < bytes.len() {
52+
let b = bytes[i];
53+
assert!(
54+
b >= b' ' && b < 127 && b != b':',
55+
"systemd fd name must only contain printable ASCII characters except ':'"
56+
);
57+
i += 1;
58+
}
59+
Self(name)
60+
}
61+
62+
/// Return the name as a string slice.
63+
pub fn as_str(&self) -> &'a str {
64+
self.0
65+
}
66+
}
67+
68+
/// File descriptor allocator for child processes.
69+
///
70+
/// Collects fd assignments and optional systemd socket-activation
71+
/// configuration, then applies them all at once via
72+
/// [`CapStdExtCommandExt::take_fds`].
73+
///
74+
/// - [`new_systemd_fds`](Self::new_systemd_fds) creates an allocator
75+
/// with systemd socket-activation fds at 3, 4, … (`SD_LISTEN_FDS_START`).
76+
/// - [`take_fd`](Self::take_fd) auto-assigns the next fd above all
77+
/// previously assigned ones (minimum 3).
78+
/// - [`take_fd_n`](Self::take_fd_n) places an fd at an explicit number,
79+
/// panicking on overlap.
80+
///
81+
/// ```no_run
82+
/// # use std::sync::Arc;
83+
/// # use cap_std_ext::cmdext::{CmdFds, CapStdExtCommandExt, SystemdFdName};
84+
/// # let varlink_fd: Arc<rustix::fd::OwnedFd> = todo!();
85+
/// # let extra_fd: Arc<rustix::fd::OwnedFd> = todo!();
86+
/// let mut cmd = std::process::Command::new("myservice");
87+
/// let mut fds = CmdFds::new_systemd_fds([(varlink_fd, SystemdFdName::new("varlink"))]);
88+
/// let extra_n = fds.take_fd(extra_fd);
89+
/// cmd.take_fds(fds);
90+
/// ```
91+
#[derive(Debug)]
92+
pub struct CmdFds {
93+
taken: BTreeSet<i32>,
94+
fds: Vec<(i32, Arc<OwnedFd>)>,
95+
/// Pre-built CStrings for the systemd env vars, set by new_systemd_fds.
96+
systemd_env: Option<(CString, CString)>,
97+
}
98+
99+
impl Default for CmdFds {
100+
fn default() -> Self {
101+
Self::new()
102+
}
103+
}
104+
105+
impl CmdFds {
106+
/// Create a new fd allocator.
107+
pub fn new() -> Self {
108+
Self {
109+
taken: BTreeSet::new(),
110+
fds: Vec::new(),
111+
systemd_env: None,
112+
}
113+
}
114+
115+
/// Create a new fd allocator with systemd socket-activation fds.
116+
///
117+
/// Each `(fd, name)` pair is assigned a consecutive fd number starting
118+
/// at `SD_LISTEN_FDS_START` (3). The `LISTEN_PID`, `LISTEN_FDS`, and
119+
/// `LISTEN_FDNAMES` environment variables will be set in the child
120+
/// when [`CapStdExtCommandExt::take_fds`] is called.
121+
///
122+
/// Additional (non-systemd) fds can be registered afterwards via
123+
/// [`take_fd`](Self::take_fd) or [`take_fd_n`](Self::take_fd_n).
124+
///
125+
/// [sd_listen_fds]: https://www.freedesktop.org/software/systemd/man/latest/sd_listen_fds.html
126+
pub fn new_systemd_fds<'a>(
127+
fds: impl IntoIterator<Item = (Arc<OwnedFd>, SystemdFdName<'a>)>,
128+
) -> Self {
129+
let mut this = Self::new();
130+
this.register_systemd_fds(fds);
131+
this
132+
}
133+
134+
/// Compute the next fd number above everything already taken
135+
/// (minimum `SD_LISTEN_FDS_START`).
136+
fn next_fd(&self) -> i32 {
137+
self.taken
138+
.last()
139+
.map(|n| n.checked_add(1).expect("fd number overflow"))
140+
.unwrap_or(SD_LISTEN_FDS_START)
141+
}
142+
143+
fn insert_fd(&mut self, n: i32) {
144+
let inserted = self.taken.insert(n);
145+
assert!(inserted, "fd {n} is already assigned");
146+
}
147+
148+
/// Register a file descriptor at the next available fd number.
149+
///
150+
/// Returns the fd number that will be assigned in the child.
151+
/// Call [`CapStdExtCommandExt::take_fds`] to apply.
152+
pub fn take_fd(&mut self, fd: Arc<OwnedFd>) -> i32 {
153+
let n = self.next_fd();
154+
self.insert_fd(n);
155+
self.fds.push((n, fd));
156+
n
157+
}
158+
159+
/// Register a file descriptor at a specific fd number.
160+
///
161+
/// Call [`CapStdExtCommandExt::take_fds`] to apply.
162+
///
163+
/// # Panics
164+
///
165+
/// Panics if `target` has already been assigned.
166+
pub fn take_fd_n(&mut self, fd: Arc<OwnedFd>, target: i32) -> &mut Self {
167+
self.insert_fd(target);
168+
self.fds.push((target, fd));
169+
self
170+
}
171+
172+
fn register_systemd_fds<'a>(
173+
&mut self,
174+
fds: impl IntoIterator<Item = (Arc<OwnedFd>, SystemdFdName<'a>)>,
175+
) {
176+
let mut n_fds: i32 = 0;
177+
let mut names = Vec::new();
178+
for (fd, name) in fds {
179+
let target = SD_LISTEN_FDS_START
180+
.checked_add(n_fds)
181+
.expect("too many fds");
182+
self.insert_fd(target);
183+
self.fds.push((target, fd));
184+
names.push(name.as_str());
185+
n_fds = n_fds.checked_add(1).expect("too many fds");
186+
}
187+
188+
let fd_count = CString::new(n_fds.to_string()).unwrap();
189+
// SAFETY: SystemdFdName guarantees no NUL bytes.
190+
let fd_names = CString::new(names.join(":")).unwrap();
191+
self.systemd_env = Some((fd_count, fd_names));
192+
}
193+
}
194+
18195
/// Extension trait for [`std::process::Command`].
19196
///
20197
/// [`cap_std::fs::Dir`]: https://docs.rs/cap-std/latest/cap_std/fs/struct.Dir.html
21198
pub trait CapStdExtCommandExt {
22-
/// Pass a file descriptor into the target process.
199+
/// Pass a file descriptor into the target process at a specific fd number.
200+
#[deprecated = "Use CmdFds with take_fds() instead"]
23201
fn take_fd_n(&mut self, fd: Arc<OwnedFd>, target: i32) -> &mut Self;
24202

203+
/// Apply a [`CmdFds`] to this command, passing all registered file
204+
/// descriptors and (if configured) setting up the systemd
205+
/// socket-activation environment.
206+
///
207+
/// # Important: Do not use `Command::env()` with systemd fds
208+
///
209+
/// When systemd socket-activation environment variables are configured
210+
/// (via [`CmdFds::new_systemd_fds`]), they are set using `setenv(3)` in
211+
/// a `pre_exec` hook. If `Command::env()` is also called, Rust will
212+
/// build an `envp` array that replaces the process environment, causing
213+
/// the `LISTEN_*` variables set by the hook to be lost. `Command::envs()`
214+
/// is equally problematic. If you need to set additional environment
215+
/// variables alongside systemd fds, set them via `pre_exec` + `setenv`
216+
/// as well.
217+
fn take_fds(&mut self, fds: CmdFds) -> &mut Self;
218+
25219
/// Use the given directory as the current working directory for the process.
26220
fn cwd_dir(&mut self, dir: Dir) -> &mut Self;
27221

@@ -39,7 +233,24 @@ pub trait CapStdExtCommandExt {
39233
fn lifecycle_bind_to_parent_thread(&mut self) -> &mut Self;
40234
}
41235

236+
/// Wrapper around `libc::setenv` that checks the return value.
237+
///
238+
/// # Safety
239+
///
240+
/// Must only be called in a single-threaded context (e.g. after `fork()`
241+
/// and before `exec()`).
42242
#[allow(unsafe_code)]
243+
unsafe fn check_setenv(key: *const i8, val: *const i8) -> std::io::Result<()> {
244+
// SAFETY: Caller guarantees we are in a single-threaded context
245+
// with valid nul-terminated C strings.
246+
if unsafe { libc::setenv(key, val, 1) } != 0 {
247+
return Err(std::io::Error::last_os_error());
248+
}
249+
Ok(())
250+
}
251+
252+
#[allow(unsafe_code)]
253+
#[allow(deprecated)]
43254
impl CapStdExtCommandExt for std::process::Command {
44255
fn take_fd_n(&mut self, fd: Arc<OwnedFd>, target: i32) -> &mut Self {
45256
unsafe {
@@ -62,6 +273,32 @@ impl CapStdExtCommandExt for std::process::Command {
62273
self
63274
}
64275

276+
fn take_fds(&mut self, fds: CmdFds) -> &mut Self {
277+
for (target, fd) in fds.fds {
278+
self.take_fd_n(fd, target);
279+
}
280+
if let Some((fd_count, fd_names)) = fds.systemd_env {
281+
// Set LISTEN_PID/FDS/FDNAMES in the forked child via setenv(3).
282+
// We cannot use Command::env() because it causes Rust to build
283+
// an envp array that replaces environ after our pre_exec setenv
284+
// calls.
285+
unsafe {
286+
self.pre_exec(move || {
287+
let pid = rustix::process::getpid();
288+
let pid_dec = rustix::path::DecInt::new(pid.as_raw_nonzero().get());
289+
// SAFETY: After fork() and before exec(), the child is
290+
// single-threaded, so setenv (which is not thread-safe)
291+
// is safe to call here.
292+
check_setenv(c"LISTEN_PID".as_ptr(), pid_dec.as_c_str().as_ptr())?;
293+
check_setenv(c"LISTEN_FDS".as_ptr(), fd_count.as_ptr())?;
294+
check_setenv(c"LISTEN_FDNAMES".as_ptr(), fd_names.as_ptr())?;
295+
Ok(())
296+
});
297+
}
298+
}
299+
self
300+
}
301+
65302
fn cwd_dir(&mut self, dir: Dir) -> &mut Self {
66303
unsafe {
67304
self.pre_exec(move || {
@@ -92,6 +329,7 @@ mod tests {
92329
use super::*;
93330
use std::sync::Arc;
94331

332+
#[allow(deprecated)]
95333
#[test]
96334
fn test_take_fdn() -> anyhow::Result<()> {
97335
// Pass srcfd == destfd and srcfd != destfd

src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ pub(crate) fn escape_attempt() -> io::Error {
4242
/// Prelude, intended for glob import.
4343
pub mod prelude {
4444
#[cfg(not(windows))]
45-
pub use super::cmdext::CapStdExtCommandExt;
45+
pub use super::cmdext::{CapStdExtCommandExt, CmdFds, SystemdFdName};
4646
pub use super::dirext::CapStdExtDirExt;
4747
#[cfg(feature = "fs_utf8")]
4848
pub use super::dirext::CapStdExtDirExtUtf8;

0 commit comments

Comments
 (0)