Skip to content

Commit 07743c0

Browse files
mccormicktdmah42
andauthored
fix(cells): Use process groups and properly handle child process reaping (#599)
* fix(cells): handle exit codes and prevent process orphaning on stop Stop on a tracked executable previously left grandchildren alive on the host. tokio::process::Child::kill signals only the leader PID — even when process_group(0) was set on the Command, the spawned PGID's members are not signaled. The fix sets each executable's process group to its own PGID via process_group(0), then signals the entire group with killpg(SIGKILL) on stop. Adds: - Executable::start: process_group(0) so the spawned child is its own PGID leader. Captures the leader PID at spawn so killpg targets the right group even after Tokio reaps the child internally. - Executable::kill: replaces child.kill() with killpg(SIGKILL) on the captured PGID. Always reaps the child and joins the stdout/stderr reader tasks even if killpg fails, surfacing the killpg error after cleanup. - Executable::pid: now infallible (Option<Pid>); pid is read from the captured field, not from a possibly-reaped Child. - NestedAuraed::kill: tolerates ESRCH from nix::sys::signal::kill (process already gone) so cell teardown is idempotent. - Executables::stop: distinguishes 'never inserted' (ExecutableNotFound) from 'process already exited' (new ExecutableAlreadyExited variant) via ESRCH/ECHILD classification on the io::Error. Cache is evicted in both cases. Other errors still propagate as FailedToStopExecutable. - Executables::broadcast_stop: logs kill failures instead of dropping them silently. - ExecutablesError::ExecutableAlreadyExited: new variant; mapped to Status::not_found in CellsServiceError -> Status. - CellService::stop: reads pid from the infallible pid() and decouples observe-service channel cleanup from the stop result so channels are unregistered even when kill fails. Translates ExecutableNotFound and ExecutableAlreadyExited to an Ok response for idempotency. Tests (auraed/tests/cell_start_stop_delete.rs): - cells_start_stop_delete: happy-path allocate / start / stop / free. - cells_stop_kills_entire_process_group: regression test for #534. Spawns a bash wrapper that forks two background sleeps; after stop, every PID in the leader's PGID must be gone within 3s. - cells_double_stop_is_idempotent: pins that a second stop returns Ok. - cells_stop_after_natural_exit_is_ok: stop on a process that has already exited on its own returns Ok (drives the ESRCH/ECHILD classification path). Unit tests (executables.rs): - start_should_cache_pid_and_reject_duplicates - stop_after_natural_exit_returns_ok_and_evicts - stop_unknown_name_returns_not_found * fix(vms): remove SIGCHLD SIG_IGN that auto-reaped cells children VirtualMachines::new called libc::signal(SIGCHLD, SIG_IGN). The disposition is inherited across execve into every nested auraed, so the kernel auto-reaped spawned children. That made Executable::kill's child.wait().await return ECHILD on every cells stop and caused waitpid in nested_auraed to hang waiting for a SIGCHLD that the kernel never delivered. Cloud Hypervisor's Vm::HANDLED_SIGNALS and Vmm::HANDLED_SIGNALS do not include SIGCHLD, so the block_signal loops below are unaffected. --------- Co-authored-by: dominic <510002+dmah42@users.noreply.github.com>
1 parent 85d4ba7 commit 07743c0

8 files changed

Lines changed: 515 additions & 61 deletions

File tree

auraed/src/cells/cell_service/cell_service.rs

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,7 @@ use proto::{
3939
},
4040
observe::LogChannelType,
4141
};
42-
use std::os::unix::fs::MetadataExt;
43-
use std::time::Duration;
44-
use std::{process::ExitStatus, sync::Arc};
42+
use std::{os::unix::fs::MetadataExt, sync::Arc, time::Duration};
4543
use tokio::sync::Mutex;
4644
use tonic::{Code, Request, Response, Status};
4745
use tracing::{info, instrument, trace, warn};
@@ -223,8 +221,7 @@ impl CellService {
223221
// Retrieve the process ID (PID) of the started executable
224222
let pid = executable
225223
.pid()
226-
.map_err(CellsServiceError::Io)?
227-
.expect("pid")
224+
.expect("started executable has captured pid")
228225
.as_raw();
229226

230227
// Register the stdout log channel for the executable's PID
@@ -290,28 +287,24 @@ impl CellService {
290287
assert!(cell_name.is_none());
291288
info!("CellService: stop() executable_name={:?}", executable_name,);
292289

293-
let pid = {
290+
let (pid, stop_result) = {
294291
let mut executables = self.executables.lock().await;
295292

296-
// Retrieve the process ID (PID) of the executable to be stopped
293+
// pid is captured at spawn (see Executable::start), so it is
294+
// available for cache entries regardless of whether Tokio has
295+
// already reaped the leader.
297296
let pid = executables
298297
.get(&executable_name)
299298
.map_err(CellsServiceError::ExecutablesError)?
300299
.pid()
301-
.map_err(CellsServiceError::Io)?
302-
.expect("pid")
300+
.expect("started executable has captured pid")
303301
.as_raw();
304302

305-
// Stop the executable and handle any errors
306-
let _: ExitStatus = executables
307-
.stop(&executable_name)
308-
.await
309-
.map_err(CellsServiceError::ExecutablesError)?;
303+
let result = executables.stop(&executable_name).await;
310304

311-
pid
305+
(pid, result)
312306
};
313307

314-
// Remove the executable's logs from the observe service.
315308
if let Err(e) = self
316309
.observe_service
317310
.unregister_sub_process_channel(pid, LogChannelType::Stdout)
@@ -327,7 +320,18 @@ impl CellService {
327320
warn!("failed to unregister stderr channel for pid {pid}: {e}");
328321
}
329322

330-
Ok(Response::new(CellServiceStopResponse::default()))
323+
use super::executables::ExecutablesError;
324+
match stop_result {
325+
Ok(_)
326+
| Err(ExecutablesError::ExecutableNotFound { .. })
327+
| Err(ExecutablesError::ExecutableAlreadyExited { .. }) => {
328+
Ok(Response::new(CellServiceStopResponse::default()))
329+
}
330+
Err(e) => Err(Status::internal(format!(
331+
"executable '{}' failed to stop: {}",
332+
executable_name, e
333+
))),
334+
}
331335
}
332336

333337
#[tracing::instrument(skip(self))]

auraed/src/cells/cell_service/error.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,8 @@ impl From<CellsServiceError> for Status {
6363
ExecutablesError::ExecutableExists { .. } => {
6464
Status::already_exists(msg)
6565
}
66-
ExecutablesError::ExecutableNotFound { .. } => {
66+
ExecutablesError::ExecutableNotFound { .. }
67+
| ExecutablesError::ExecutableAlreadyExited { .. } => {
6768
Status::not_found(msg)
6869
}
6970
ExecutablesError::FailedToStartExecutable { .. }

auraed/src/cells/cell_service/executables/error.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ pub enum ExecutablesError {
2525
ExecutableExists { executable_name: ExecutableName },
2626
#[error("executable '{executable_name}' not found")]
2727
ExecutableNotFound { executable_name: ExecutableName },
28+
#[error("executable '{executable_name}' had already exited before stop")]
29+
ExecutableAlreadyExited { executable_name: ExecutableName },
2830
#[error("executable '{executable_name}' failed to start: {source}")]
2931
FailedToStartExecutable {
3032
executable_name: ExecutableName,

auraed/src/cells/cell_service/executables/executable.rs

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
\* -------------------------------------------------------------------------- */
1515
use super::{ExecutableName, ExecutableSpec};
1616
use crate::logging::log_channel::LogChannel;
17+
use nix::sys::signal::{Signal, killpg};
1718
use nix::unistd::Pid;
1819
use std::{
1920
ffi::OsString,
@@ -34,6 +35,9 @@ pub struct Executable {
3435
pub stdout: LogChannel,
3536
pub stderr: LogChannel,
3637
state: ExecutableState,
38+
// Captured at spawn so killpg targets the right group even after Tokio
39+
// internally reaps the leader (which clears `Child::id()`).
40+
pid: Option<Pid>,
3741
}
3842

3943
#[derive(Debug)]
@@ -59,7 +63,7 @@ impl Executable {
5963
let state = ExecutableState::Init { command };
6064
let stdout = LogChannel::new(format!("{name}::stdout"));
6165
let stderr = LogChannel::new(format!("{name}::stderr"));
62-
Self { name, description, stdout, stderr, state }
66+
Self { name, description, stdout, stderr, state, pid: None }
6367
}
6468

6569
/// Starts the underlying process.
@@ -77,14 +81,17 @@ impl Executable {
7781
.kill_on_drop(true)
7882
.current_dir("/")
7983
.stdout(Stdio::piped())
80-
.stderr(Stdio::piped());
84+
.stderr(Stdio::piped())
85+
.process_group(0);
86+
8187
if let Some(uid) = uid {
8288
command = command.uid(uid);
8389
}
8490
if let Some(gid) = gid {
8591
command = command.gid(gid);
8692
}
8793
let mut child = command.spawn()?;
94+
self.pid = child.id().map(|id| Pid::from_raw(id as i32));
8895

8996
let log_channel = self.stdout.clone();
9097
let stdout = child.stdout.take().expect("stdout");
@@ -140,26 +147,36 @@ impl Executable {
140147
/// Stops the executable and returns the [ExitStatus].
141148
/// If the executable has never been started, returns [None].
142149
pub async fn kill(&mut self) -> io::Result<Option<ExitStatus>> {
150+
// Pid is captured at spawn (Pid is Copy), so we can use it even
151+
// after Tokio internally reaps the leader.
152+
let captured_pid = self.pid;
143153
Ok(match &mut self.state {
144154
ExecutableState::Init { .. } => None,
145155
ExecutableState::Started { child, stdout, stderr, .. } => {
146-
child.kill().await?;
156+
// killpg the whole group (PGID == child PID via
157+
// process_group(0)); child.kill() would only signal the
158+
// leader and orphan grandchildren that joined the group.
159+
let killpg_result = killpg(
160+
captured_pid.expect("started exe has captured pid"),
161+
Signal::SIGKILL,
162+
)
163+
.map_err(io::Error::from);
164+
// Always reap and join the reader tasks, even if killpg
165+
// failed — otherwise the Child stays un-awaited and the
166+
// stdout/stderr handles leak until their pipes close.
147167
let exit_status = child.wait().await?;
148168
let _ = tokio::join!(stdout, stderr);
149169
self.state = ExecutableState::Stopped(exit_status);
170+
killpg_result?;
150171
Some(exit_status)
151172
}
152173
ExecutableState::Stopped(status) => Some(*status),
153174
})
154175
}
155176

156-
/// Returns the [Pid] while [Executable] is running, otherwise returns [None].
157-
pub fn pid(&self) -> io::Result<Option<Pid>> {
158-
let ExecutableState::Started { child: process, .. } = &self.state
159-
else {
160-
return Ok(None);
161-
};
162-
163-
Ok(process.id().map(|id| Pid::from_raw(id as i32)))
177+
/// Returns the captured [Pid] for executables that have been started.
178+
/// Returns [None] before [Executable::start] has been called.
179+
pub fn pid(&self) -> Option<Pid> {
180+
self.pid
164181
}
165182
}

auraed/src/cells/cell_service/executables/executables.rs

Lines changed: 119 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@
1616
use super::{
1717
Executable, ExecutableName, ExecutableSpec, ExecutablesError, Result,
1818
};
19+
use nix::libc;
1920
use std::{collections::HashMap, process::ExitStatus};
21+
use tracing::warn;
2022

2123
type Cache = HashMap<ExecutableName, Executable>;
2224

@@ -82,37 +84,53 @@ impl Executables {
8284
});
8385
};
8486

85-
let exit_status = executable.kill().await.map_err(|e| {
86-
ExecutablesError::FailedToStopExecutable {
87-
executable_name: executable_name.clone(),
88-
source: e,
87+
match executable.kill().await {
88+
Ok(Some(status)) => {
89+
let _ = self.cache.remove(executable_name);
90+
Ok(status)
8991
}
90-
})?;
91-
92-
let Some(exit_status) = exit_status else {
93-
// Exes that never started return None
94-
let executable =
95-
self.cache.remove(executable_name).expect("exe in cache");
96-
return Err(ExecutablesError::ExecutableNotFound {
97-
executable_name: executable.name,
98-
});
99-
};
100-
101-
let _ = self.cache.remove(executable_name).ok_or_else(|| {
102-
// get_mut would have already thrown this error, so we should never reach here
103-
ExecutablesError::ExecutableNotFound {
104-
executable_name: executable_name.clone(),
92+
Ok(None) => {
93+
// Cache invariant: only started executables are inserted
94+
// into the cache (see `start` above), so kill() on a cached
95+
// entry cannot return Ok(None).
96+
unreachable!(
97+
"executable {executable_name:?} is in cache without \
98+
having been started"
99+
);
105100
}
106-
})?;
107-
108-
Ok(exit_status)
101+
Err(e)
102+
if matches!(
103+
e.raw_os_error(),
104+
Some(libc::ESRCH) | Some(libc::ECHILD)
105+
) =>
106+
{
107+
// killpg ESRCH (group already empty) or wait ECHILD (kernel
108+
// already reaped). Process is gone; evict and report
109+
// distinctly so callers can render stop idempotent without
110+
// collapsing this with "name not in cache".
111+
warn!(
112+
"executable {executable_name:?} already exited before \
113+
stop: {e}"
114+
);
115+
let _ = self.cache.remove(executable_name);
116+
Err(ExecutablesError::ExecutableAlreadyExited {
117+
executable_name: executable_name.clone(),
118+
})
119+
}
120+
Err(e) => Err(ExecutablesError::FailedToStopExecutable {
121+
executable_name: executable_name.clone(),
122+
source: e,
123+
}),
124+
}
109125
}
110126

111127
/// Stops all executables concurrently
112128
pub async fn broadcast_stop(&mut self) {
113129
let mut names = vec![];
114130
for exe in self.cache.values_mut() {
115-
let _ = exe.kill().await;
131+
if let Err(e) = exe.kill().await {
132+
warn!("broadcast_stop: failed to kill {:?}: {e}", exe.name);
133+
}
116134
names.push(exe.name.clone())
117135
}
118136

@@ -129,9 +147,16 @@ mod tests {
129147
use tokio::process::Command;
130148

131149
fn spec_for(name: &ExecutableName) -> ExecutableSpec {
150+
spec_with_command(name, "sleep 60")
151+
}
152+
153+
fn spec_with_command(
154+
name: &ExecutableName,
155+
sh_arg: &str,
156+
) -> ExecutableSpec {
132157
let mut command = Command::new("sh");
133158
let _ = command.arg("-c");
134-
let _ = command.arg("sleep 60");
159+
let _ = command.arg(sh_arg);
135160
ExecutableSpec {
136161
name: name.clone(),
137162
description: format!("test executable {name}"),
@@ -150,8 +175,10 @@ mod tests {
150175
let executable = executables
151176
.start(spec_for(&exe_name), None, None)
152177
.expect("start executable");
153-
let pid = executable.pid().expect("read pid");
154-
assert!(pid.is_some(), "expected spawned process to expose a pid");
178+
assert!(
179+
executable.pid().is_some(),
180+
"expected spawned process to expose a pid"
181+
);
155182

156183
let err = executables
157184
.start(spec_for(&exe_name), None, None)
@@ -168,4 +195,70 @@ mod tests {
168195
"expected graceful stop or SIGKILL, got status {status:?}"
169196
);
170197
}
198+
199+
/// Stopping a short-lived executable that has already finished running
200+
/// must still return Ok (the cache holds the Stopped state) and must
201+
/// evict the cache entry.
202+
#[tokio::test]
203+
async fn stop_after_natural_exit_returns_ok_and_evicts() {
204+
let mut executables = Executables::default();
205+
let exe_name = ExecutableName::new(format!(
206+
"unit-test-self-exit-{}",
207+
uuid::Uuid::new_v4()
208+
));
209+
210+
let pid = executables
211+
.start(spec_with_command(&exe_name, "true"), None, None)
212+
.expect("start executable")
213+
.pid()
214+
.expect("captured pid")
215+
.as_raw();
216+
217+
// Give the leader time to exit. It will sit as a zombie until
218+
// child.wait() is called inside stop(); we just need to ensure the
219+
// process has actually finished its work before we test stop().
220+
let deadline =
221+
std::time::Instant::now() + std::time::Duration::from_secs(5);
222+
while std::fs::metadata(format!("/proc/{pid}/cmdline"))
223+
.map(|_| true)
224+
.unwrap_or(false)
225+
&& std::time::Instant::now() < deadline
226+
{
227+
tokio::time::sleep(std::time::Duration::from_millis(20)).await;
228+
}
229+
230+
let _ = executables
231+
.stop(&exe_name)
232+
.await
233+
.expect("stop after natural exit should be Ok");
234+
235+
// Cache must have been evicted, so a second stop reports
236+
// ExecutableNotFound (the cache-miss variant), distinct from
237+
// ExecutableAlreadyExited.
238+
let err = executables
239+
.stop(&exe_name)
240+
.await
241+
.expect_err("second stop should report ExecutableNotFound");
242+
assert!(
243+
matches!(err, ExecutablesError::ExecutableNotFound { .. }),
244+
"expected ExecutableNotFound after eviction, got {err:?}"
245+
);
246+
}
247+
248+
/// Stopping a name that was never inserted must return ExecutableNotFound,
249+
/// not the already-exited variant.
250+
#[tokio::test]
251+
async fn stop_unknown_name_returns_not_found() {
252+
let mut executables = Executables::default();
253+
let exe_name = ExecutableName::new("never-started".to_string());
254+
255+
let err = executables
256+
.stop(&exe_name)
257+
.await
258+
.expect_err("stop on unknown name should fail");
259+
assert!(
260+
matches!(err, ExecutablesError::ExecutableNotFound { .. }),
261+
"expected ExecutableNotFound, got {err:?}"
262+
);
263+
}
171264
}

auraed/src/vms/virtual_machines.rs

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ use std::{collections::HashMap, net::Ipv4Addr};
1616

1717
use anyhow::anyhow;
1818
use net_util::MacAddr;
19-
use nix::libc;
2019
use tracing::error;
2120
use vmm_sys_util::{rand, signal::block_signal};
2221

@@ -39,10 +38,6 @@ impl Default for VirtualMachines {
3938
impl VirtualMachines {
4039
/// Create a new instance of the virtual machines cache.
4140
pub fn new() -> Self {
42-
unsafe {
43-
let _ = libc::signal(libc::SIGCHLD, libc::SIG_IGN);
44-
}
45-
4641
// Mask the signals handled by the Cloud Hyupervisor VMM so they only run on the dedicated signal handling thread
4742
for sig in &vmm::vm::Vm::HANDLED_SIGNALS {
4843
if let Err(e) = block_signal(*sig) {

0 commit comments

Comments
 (0)