Skip to content

Commit bc51270

Browse files
authored
Merge pull request #92 from cgwalters/add-systemd-socket-api
cmdext: Add pass_systemd_fds() for socket activation fd passing
2 parents 5493d68 + b9bfb3d commit bc51270

4 files changed

Lines changed: 506 additions & 14 deletions

File tree

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ libc = "0.2"
2020

2121
[dev-dependencies]
2222
anyhow = "1.0"
23+
proptest = "1.11.0"
2324
rand = "0.10"
2425
uuid = "1.10"
2526

src/cmdext.rs

Lines changed: 280 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,24 +4,229 @@
44
//!
55
//! - File descriptor passing
66
//! - Changing to a file-descriptor relative directory
7+
//! - Systemd socket activation fd passing
78
89
use cap_std::fs::Dir;
910
use cap_std::io_lifetimes;
1011
use cap_tempfile::cap_std;
1112
use io_lifetimes::OwnedFd;
1213
use rustix::fd::{AsFd, FromRawFd, IntoRawFd};
1314
use rustix::io::FdFlags;
15+
use std::collections::BTreeSet;
16+
use std::ffi::CString;
1417
use std::os::fd::AsRawFd;
1518
use std::os::unix::process::CommandExt;
1619
use std::sync::Arc;
1720

21+
/// The file descriptor number at which systemd passes the first socket.
22+
/// See `sd_listen_fds(3)`.
23+
const SD_LISTEN_FDS_START: i32 = 3;
24+
25+
/// A validated name for a systemd socket-activation file descriptor.
26+
///
27+
/// Names appear in the `LISTEN_FDNAMES` environment variable as
28+
/// colon-separated values. The constructor validates that the name
29+
/// conforms to systemd's `fdname_is_valid()` rules: at most 255
30+
/// printable ASCII characters, excluding `:`.
31+
///
32+
/// ```
33+
/// use cap_std_ext::cmdext::SystemdFdName;
34+
/// let name = SystemdFdName::new("varlink");
35+
/// ```
36+
#[derive(Debug, Clone, Copy)]
37+
pub struct SystemdFdName<'a>(&'a str);
38+
39+
impl<'a> SystemdFdName<'a> {
40+
/// Create a new `SystemdFdName`, panicking if `name` is invalid.
41+
///
42+
/// # Panics
43+
///
44+
/// Panics if `name` is longer than 255 bytes or contains any
45+
/// character that is not printable ASCII (i.e. control characters,
46+
/// DEL, non-ASCII bytes, or `:`).
47+
pub const fn new(name: &'a str) -> Self {
48+
assert!(
49+
name.len() <= 255,
50+
"systemd fd name must be at most 255 characters"
51+
);
52+
let bytes = name.as_bytes();
53+
let mut i = 0;
54+
while i < bytes.len() {
55+
let b = bytes[i];
56+
assert!(
57+
b >= b' ' && b < 127 && b != b':',
58+
"systemd fd name must only contain printable ASCII characters except ':'"
59+
);
60+
i += 1;
61+
}
62+
Self(name)
63+
}
64+
65+
/// Return the name as a string slice.
66+
pub fn as_str(&self) -> &'a str {
67+
self.0
68+
}
69+
}
70+
71+
/// File descriptor allocator for child processes.
72+
///
73+
/// Collects fd assignments and optional systemd socket-activation
74+
/// configuration, then applies them all at once via
75+
/// [`CapStdExtCommandExt::take_fds`].
76+
///
77+
/// - [`new_systemd_fds`](Self::new_systemd_fds) creates an allocator
78+
/// with systemd socket-activation fds at 3, 4, … (`SD_LISTEN_FDS_START`).
79+
/// - [`take_fd`](Self::take_fd) auto-assigns the next fd above all
80+
/// previously assigned ones (minimum 3).
81+
/// - [`take_fd_n`](Self::take_fd_n) places an fd at an explicit number,
82+
/// panicking on overlap.
83+
///
84+
/// ```no_run
85+
/// # use std::sync::Arc;
86+
/// # use cap_std_ext::cmdext::{CmdFds, CapStdExtCommandExt, SystemdFdName};
87+
/// # let varlink_fd: Arc<rustix::fd::OwnedFd> = todo!();
88+
/// # let extra_fd: Arc<rustix::fd::OwnedFd> = todo!();
89+
/// let mut cmd = std::process::Command::new("myservice");
90+
/// let mut fds = CmdFds::new_systemd_fds([(varlink_fd, SystemdFdName::new("varlink"))]);
91+
/// let extra_n = fds.take_fd(extra_fd);
92+
/// cmd.take_fds(fds);
93+
/// ```
94+
#[derive(Debug)]
95+
pub struct CmdFds {
96+
taken: BTreeSet<i32>,
97+
fds: Vec<(i32, Arc<OwnedFd>)>,
98+
/// Pre-built CStrings for the systemd env vars, set by new_systemd_fds.
99+
systemd_env: Option<(CString, CString)>,
100+
}
101+
102+
impl Default for CmdFds {
103+
fn default() -> Self {
104+
Self::new()
105+
}
106+
}
107+
108+
impl CmdFds {
109+
/// Create a new fd allocator.
110+
pub fn new() -> Self {
111+
Self {
112+
taken: BTreeSet::new(),
113+
fds: Vec::new(),
114+
systemd_env: None,
115+
}
116+
}
117+
118+
/// Create a new fd allocator with systemd socket-activation fds.
119+
///
120+
/// Each `(fd, name)` pair is assigned a consecutive fd number starting
121+
/// at `SD_LISTEN_FDS_START` (3). The `LISTEN_PID`, `LISTEN_FDS`, and
122+
/// `LISTEN_FDNAMES` environment variables will be set in the child
123+
/// when [`CapStdExtCommandExt::take_fds`] is called.
124+
///
125+
/// Additional (non-systemd) fds can be registered afterwards via
126+
/// [`take_fd`](Self::take_fd) or [`take_fd_n`](Self::take_fd_n).
127+
///
128+
/// [sd_listen_fds]: https://www.freedesktop.org/software/systemd/man/latest/sd_listen_fds.html
129+
pub fn new_systemd_fds<'a>(
130+
fds: impl IntoIterator<Item = (Arc<OwnedFd>, SystemdFdName<'a>)>,
131+
) -> Self {
132+
let mut this = Self::new();
133+
this.register_systemd_fds(fds);
134+
this
135+
}
136+
137+
/// Compute the next fd number above everything already taken
138+
/// (minimum `SD_LISTEN_FDS_START`).
139+
fn next_fd(&self) -> i32 {
140+
self.taken
141+
.last()
142+
.map(|n| n.checked_add(1).expect("fd number overflow"))
143+
.unwrap_or(SD_LISTEN_FDS_START)
144+
}
145+
146+
fn insert_fd(&mut self, n: i32) {
147+
let inserted = self.taken.insert(n);
148+
assert!(inserted, "fd {n} is already assigned");
149+
}
150+
151+
/// Register a file descriptor at the next available fd number.
152+
///
153+
/// Returns the fd number that will be assigned in the child.
154+
/// Call [`CapStdExtCommandExt::take_fds`] to apply.
155+
pub fn take_fd(&mut self, fd: Arc<OwnedFd>) -> i32 {
156+
let n = self.next_fd();
157+
self.insert_fd(n);
158+
self.fds.push((n, fd));
159+
n
160+
}
161+
162+
/// Register a file descriptor at a specific fd number.
163+
///
164+
/// Call [`CapStdExtCommandExt::take_fds`] to apply.
165+
///
166+
/// # Panics
167+
///
168+
/// Panics if `target` has already been assigned.
169+
pub fn take_fd_n(&mut self, fd: Arc<OwnedFd>, target: i32) -> &mut Self {
170+
self.insert_fd(target);
171+
self.fds.push((target, fd));
172+
self
173+
}
174+
175+
fn register_systemd_fds<'a>(
176+
&mut self,
177+
fds: impl IntoIterator<Item = (Arc<OwnedFd>, SystemdFdName<'a>)>,
178+
) {
179+
let mut n_fds: i32 = 0;
180+
let mut names = Vec::new();
181+
for (fd, name) in fds {
182+
let target = SD_LISTEN_FDS_START
183+
.checked_add(n_fds)
184+
.expect("too many fds");
185+
self.insert_fd(target);
186+
self.fds.push((target, fd));
187+
names.push(name.as_str());
188+
n_fds = n_fds.checked_add(1).expect("too many fds");
189+
}
190+
191+
let fd_count = CString::new(n_fds.to_string()).unwrap();
192+
// SAFETY: SystemdFdName guarantees no NUL bytes.
193+
let fd_names = CString::new(names.join(":")).unwrap();
194+
self.systemd_env = Some((fd_count, fd_names));
195+
}
196+
}
197+
18198
/// Extension trait for [`std::process::Command`].
19199
///
20200
/// [`cap_std::fs::Dir`]: https://docs.rs/cap-std/latest/cap_std/fs/struct.Dir.html
21201
pub trait CapStdExtCommandExt {
22-
/// Pass a file descriptor into the target process.
202+
/// Pass a file descriptor into the target process at a specific fd number.
203+
///
204+
/// # Deprecated
205+
///
206+
/// Use [`CmdFds`] with [`take_fds`](Self::take_fds) instead. This method
207+
/// registers an independent `pre_exec` hook per call, which means
208+
/// multiple `take_fd_n` calls (or mixing with `take_fds`) can clobber
209+
/// each other when a source fd's raw number equals another mapping's
210+
/// target. `take_fds` handles this correctly with atomic fd shuffling.
211+
#[deprecated = "Use CmdFds with take_fds() instead"]
23212
fn take_fd_n(&mut self, fd: Arc<OwnedFd>, target: i32) -> &mut Self;
24213

214+
/// Apply a [`CmdFds`] to this command, passing all registered file
215+
/// descriptors and (if configured) setting up the systemd
216+
/// socket-activation environment.
217+
///
218+
/// # Important: Do not use `Command::env()` with systemd fds
219+
///
220+
/// When systemd socket-activation environment variables are configured
221+
/// (via [`CmdFds::new_systemd_fds`]), they are set using `setenv(3)` in
222+
/// a `pre_exec` hook. If `Command::env()` is also called, Rust will
223+
/// build an `envp` array that replaces the process environment, causing
224+
/// the `LISTEN_*` variables set by the hook to be lost. `Command::envs()`
225+
/// is equally problematic. If you need to set additional environment
226+
/// variables alongside systemd fds, set them via `pre_exec` + `setenv`
227+
/// as well.
228+
fn take_fds(&mut self, fds: CmdFds) -> &mut Self;
229+
25230
/// Use the given directory as the current working directory for the process.
26231
fn cwd_dir(&mut self, dir: Dir) -> &mut Self;
27232

@@ -39,7 +244,27 @@ pub trait CapStdExtCommandExt {
39244
fn lifecycle_bind_to_parent_thread(&mut self) -> &mut Self;
40245
}
41246

247+
/// Wrapper around `libc::setenv` that checks the return value.
248+
///
249+
/// # Safety
250+
///
251+
/// Must only be called in a single-threaded context (e.g. after `fork()`
252+
/// and before `exec()`).
42253
#[allow(unsafe_code)]
254+
unsafe fn check_setenv(
255+
key: *const std::ffi::c_char,
256+
val: *const std::ffi::c_char,
257+
) -> std::io::Result<()> {
258+
// SAFETY: Caller guarantees we are in a single-threaded context
259+
// with valid nul-terminated C strings.
260+
if unsafe { libc::setenv(key, val, 1) } != 0 {
261+
return Err(std::io::Error::last_os_error());
262+
}
263+
Ok(())
264+
}
265+
266+
#[allow(unsafe_code)]
267+
#[allow(deprecated)]
43268
impl CapStdExtCommandExt for std::process::Command {
44269
fn take_fd_n(&mut self, fd: Arc<OwnedFd>, target: i32) -> &mut Self {
45270
unsafe {
@@ -62,6 +287,59 @@ impl CapStdExtCommandExt for std::process::Command {
62287
self
63288
}
64289

290+
fn take_fds(&mut self, fds: CmdFds) -> &mut Self {
291+
// Use a single pre_exec hook that handles all fd shuffling atomically.
292+
// This avoids the problem where separate hooks clobber each other when
293+
// a source fd number equals a target fd number from a different mapping.
294+
unsafe {
295+
self.pre_exec(move || {
296+
// Dup each source fd to a temporary location above all
297+
// targets, so that no dup2() in step 2 can clobber a source.
298+
let safe_min = fds
299+
.fds
300+
.iter()
301+
.map(|(t, _)| *t)
302+
.max()
303+
.unwrap_or(0)
304+
.checked_add(1)
305+
.expect("fd number overflow");
306+
let mut safe_copies: Vec<(i32, OwnedFd)> = Vec::new();
307+
for (target, fd) in &fds.fds {
308+
let copy = rustix::io::fcntl_dupfd_cloexec(fd, safe_min)?;
309+
safe_copies.push((*target, copy));
310+
}
311+
312+
// Place each fd at its target via dup2.
313+
// We use raw dup2 to avoid fabricating an OwnedFd for a
314+
// target number we don't yet own (which would be unsound
315+
// if dup2 failed — the OwnedFd drop would close a wrong fd).
316+
for (target, copy) in safe_copies {
317+
// SAFETY: target is a non-negative fd number that dup2
318+
// will atomically (re)open; we don't own it beforehand.
319+
let r = libc::dup2(copy.as_raw_fd(), target);
320+
if r < 0 {
321+
return Err(std::io::Error::last_os_error());
322+
}
323+
// `copy` drops here, closing the temporary fd.
324+
}
325+
326+
// Handle systemd env vars, if configured
327+
if let Some((ref fd_count, ref fd_names)) = fds.systemd_env {
328+
let pid = rustix::process::getpid();
329+
let pid_dec = rustix::path::DecInt::new(pid.as_raw_nonzero().get());
330+
// SAFETY: After fork() and before exec(), the child is
331+
// single-threaded, so setenv (which is not thread-safe)
332+
// is safe to call here.
333+
check_setenv(c"LISTEN_PID".as_ptr(), pid_dec.as_c_str().as_ptr())?;
334+
check_setenv(c"LISTEN_FDS".as_ptr(), fd_count.as_ptr())?;
335+
check_setenv(c"LISTEN_FDNAMES".as_ptr(), fd_names.as_ptr())?;
336+
}
337+
Ok(())
338+
});
339+
}
340+
self
341+
}
342+
65343
fn cwd_dir(&mut self, dir: Dir) -> &mut Self {
66344
unsafe {
67345
self.pre_exec(move || {
@@ -92,6 +370,7 @@ mod tests {
92370
use super::*;
93371
use std::sync::Arc;
94372

373+
#[allow(deprecated)]
95374
#[test]
96375
fn test_take_fdn() -> anyhow::Result<()> {
97376
// Pass srcfd == destfd and srcfd != destfd

src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ pub(crate) fn escape_attempt() -> io::Error {
4242
/// Prelude, intended for glob import.
4343
pub mod prelude {
4444
#[cfg(not(windows))]
45-
pub use super::cmdext::CapStdExtCommandExt;
45+
pub use super::cmdext::{CapStdExtCommandExt, CmdFds, SystemdFdName};
4646
pub use super::dirext::CapStdExtDirExt;
4747
#[cfg(feature = "fs_utf8")]
4848
pub use super::dirext::CapStdExtDirExtUtf8;

0 commit comments

Comments
 (0)