Skip to content

Commit 98dff21

Browse files
committed
fix(test): use waitpid+subreaper for VM teardown
Replace the pidfd+epoll wait with waitpid(). conftest.py sets PR_SET_CHILD_SUBREAPER so orphaned descendants (Firecracker, after its jailer parent exits) reparent to the pytest session, letting us waitpid() them directly. This drops a chunk of fd-management complexity and fixes: - epoll instance leaked on every kill - pidfd leaked on exception paths in Microvm.kill() - urllib3 API session never closed (6 sockets per dead VM) - unbounded epoll.poll() could hang the runner indefinitely - ProcessLookupError on the Firecracker kill leaking the screen process: split the two os.kill() calls into a per-PID loop with its own try/except so one already-dead process can't skip the other's cleanup Signed-off-by: Jack Thomson <jackabt@amazon.com>
1 parent dcce4e6 commit 98dff21

3 files changed

Lines changed: 182 additions & 56 deletions

File tree

tests/conftest.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
https://docs.pytest.org/en/7.2.x/explanation/fixtures.html
2020
"""
2121

22+
import ctypes
2223
import inspect
2324
import json
2425
import os
@@ -53,6 +54,16 @@
5354
raise PermissionError("Test session needs to be run as root.")
5455

5556

57+
# Become a child subreaper so that orphaned descendants (Firecracker, after
58+
# its jailer parent exits) reparent to us instead of init. This lets us
59+
# waitpid() on the firecracker PID directly, avoiding the pidfd notification
60+
# race for multi-threaded processes on kernels older than 6.15.
61+
_PR_SET_CHILD_SUBREAPER = 36
62+
_libc = ctypes.CDLL("libc.so.6", use_errno=True)
63+
if _libc.prctl(_PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0) != 0:
64+
raise OSError(ctypes.get_errno(), "prctl(PR_SET_CHILD_SUBREAPER) failed")
65+
66+
5667
METRICS = get_metrics_logger()
5768
PHASE_REPORT_KEY = pytest.StashKey[dict[str, pytest.CollectReport]]()
5869

tests/framework/microvm.py

Lines changed: 27 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -337,30 +337,34 @@ def kill(self, might_be_dead=False):
337337
or might_be_dead
338338
), self.log_data
339339

340-
# pylint: disable=bare-except
341-
try:
342-
if self.firecracker_pid:
343-
os.kill(self.firecracker_pid, signal.SIGKILL)
344-
345-
if self.screen_pid:
346-
os.kill(self.screen_pid, signal.SIGKILL)
347-
except:
348-
if not might_be_dead:
349-
msg = (
350-
"Failed to kill Firecracker Process. Did it already die (or did the UFFD handler process die and take it down)?"
351-
if self.uffd_handler
352-
else "Failed to kill Firecracker Process. Did it already die?"
353-
)
354-
355-
self._dump_debug_information(msg)
356-
340+
# Kill Firecracker and the screen wrapper independently so that one
341+
# already being dead (ProcessLookupError) doesn't leak the other.
342+
for pid_to_kill in (self.firecracker_pid, self.screen_pid):
343+
if not pid_to_kill:
344+
continue
345+
try:
346+
os.kill(pid_to_kill, signal.SIGKILL)
347+
except ProcessLookupError:
348+
pass
349+
except OSError:
350+
if not might_be_dead:
351+
msg = (
352+
"Failed to kill Firecracker Process. Did it already die (or did the UFFD handler process die and take it down)?"
353+
if self.uffd_handler
354+
else "Failed to kill Firecracker Process. Did it already die?"
355+
)
356+
self._dump_debug_information(msg)
357+
raise
358+
359+
if self._spawned and self.firecracker_pid:
360+
try:
361+
utils.wait_process_termination(self.firecracker_pid)
362+
except TimeoutError:
363+
utils.dump_proc_state(self.firecracker_pid)
357364
raise
358365

359366
# if microvm was spawned then check if it gets killed
360367
if self._spawned:
361-
# Wait until the Firecracker process is actually dead
362-
utils.wait_process_termination(self.firecracker_pid)
363-
364368
# The following logic guards us against the case where `firecracker_pid` for some
365369
# reason is the wrong PID, e.g. this is a regression test for
366370
# https://github.com/firecracker-microvm/firecracker/pull/4442/commits/d63eb7a65ffaaae0409d15ed55d99ecbd29bc572
@@ -387,6 +391,9 @@ def kill(self, might_be_dead=False):
387391
if self.uffd_handler and self.uffd_handler.is_running():
388392
self.uffd_handler.kill()
389393

394+
if self.api:
395+
self.api.session.close()
396+
390397
# Mark the microVM as not spawned, so we avoid trying to kill twice.
391398
self._spawned = False
392399
self._killed = True

tests/framework/utils.py

Lines changed: 144 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,12 @@
33
"""Generic utility functions that are used in the framework."""
44

55
import base64
6-
import errno
76
import hashlib
87
import json
98
import logging
109
import os
1110
import platform
1211
import re
13-
import select
1412
import signal
1513
import subprocess
1614
import time
@@ -377,46 +375,156 @@ def run_guest_cmd(ssh_connection, cmd, expected, use_json=False):
377375
assert stdout == expected
378376

379377

380-
def get_process_pidfd(pid):
381-
"""Get a pidfd file descriptor for the process with PID `pid`
378+
def dump_proc_state(pid):
379+
"""Log diagnostic info for a process that failed to exit after SIGKILL."""
380+
log = logging.getLogger(__name__)
382381

383-
Will return a pid file descriptor for the process with PID `pid` if it is
384-
still alive. If the process has already exited we will receive either a
385-
`ProcessLookupError` exception or and an `OSError` exception with errno `EINVAL`.
386-
In these cases, we will return `None`.
387-
388-
Any other error while calling the system call, will raise an OSError
389-
exception.
390-
"""
382+
# Confirm we can still see this PID at all (catches namespace issues).
391383
try:
392-
pidfd = os.pidfd_open(pid)
384+
os.kill(pid, 0)
385+
log.error("Process %d kill -0: still visible", pid)
393386
except ProcessLookupError:
394-
return None
387+
log.error("Process %d kill -0: ESRCH (gone or invisible from sender)", pid)
388+
return
395389
except OSError as err:
396-
if err.errno == errno.EINVAL:
397-
return None
398-
399-
raise
400-
401-
return pidfd
390+
log.error("Process %d kill -0: errno=%d (%s)", pid, err.errno, err.strerror)
402391

403-
404-
def wait_process_termination(p_pid):
405-
"""Wait for a process to terminate.
406-
407-
Will return successfully if the process
408-
got indeed killed or raises an exception if the process
409-
is still alive after retrying several times.
392+
try:
393+
status = Path(f"/proc/{pid}/status").read_text(encoding="utf-8")
394+
log.error(
395+
"Process %d did not exit after SIGKILL. /proc/%d/status:\n%s",
396+
pid,
397+
pid,
398+
status,
399+
)
400+
except FileNotFoundError:
401+
log.error("Process %d exited between timeout and diagnostics collection", pid)
402+
return
403+
try:
404+
cmdline = Path(f"/proc/{pid}/cmdline").read_bytes().decode(errors="replace")
405+
log.error("Process %d cmdline: %s", pid, cmdline.replace("\x00", " "))
406+
except (FileNotFoundError, PermissionError):
407+
pass
408+
# /proc/PID/stat field 41 = exit_state (non-zero = task in exit path)
409+
try:
410+
stat = Path(f"/proc/{pid}/stat").read_text(encoding="utf-8").strip()
411+
# comm in stat is parenthesized and may contain spaces; split after last ')'
412+
rest = stat.rsplit(")", 1)[1].split()
413+
# field indexing per proc(5): pid(1) comm(2) state(3) ppid(4) ...
414+
# rest[0] is field 3 onwards. exit_state is field 41 → rest[38]
415+
if len(rest) > 38:
416+
log.error("Process %d exit_state (stat[41]): %s", pid, rest[38])
417+
except (FileNotFoundError, PermissionError, IndexError, ValueError):
418+
pass
419+
# /proc/PID/sched scheduling stats (voluntary_ctxt_switches etc.)
420+
try:
421+
sched = Path(f"/proc/{pid}/sched").read_text(encoding="utf-8")
422+
relevant = [
423+
line
424+
for line in sched.splitlines()
425+
if any(
426+
k in line for k in ("voluntary", "exec_runtime", "wait_sum", "switches")
427+
)
428+
]
429+
if relevant:
430+
log.error("Process %d sched:\n%s", pid, "\n".join(relevant))
431+
except (FileNotFoundError, PermissionError):
432+
pass
433+
# Per-thread state: identify which thread is stuck
434+
try:
435+
tids = sorted(int(t) for t in os.listdir(f"/proc/{pid}/task"))
436+
except FileNotFoundError:
437+
return
438+
for tid in tids:
439+
for fname in ("comm", "wchan", "syscall", "stack"):
440+
try:
441+
content = (
442+
Path(f"/proc/{pid}/task/{tid}/{fname}")
443+
.read_text(encoding="utf-8", errors="replace")
444+
.strip()
445+
)
446+
log.error("Process %d task %d %s:\n%s", pid, tid, fname, content)
447+
except (FileNotFoundError, PermissionError):
448+
pass
449+
try:
450+
tstatus = Path(f"/proc/{pid}/task/{tid}/status").read_text(encoding="utf-8")
451+
for line in tstatus.splitlines():
452+
if line.startswith(("State:", "SigBlk:", "SigPnd:", "ShdPnd:")):
453+
log.error("Process %d task %d %s", pid, tid, line.strip())
454+
except (FileNotFoundError, PermissionError):
455+
pass
456+
457+
# Tail of dmesg may show RCU stalls, hung_task, OOM, soft lockups.
458+
try:
459+
result = subprocess.run(
460+
["dmesg", "--ctime", "--time-format", "iso"],
461+
capture_output=True,
462+
text=True,
463+
timeout=5,
464+
check=False,
465+
)
466+
tail = "\n".join(result.stdout.splitlines()[-50:])
467+
if tail:
468+
log.error("dmesg tail (last 50 lines):\n%s", tail)
469+
except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
470+
pass
471+
472+
# All processes parented to our test session (the subreaper) — reveals
473+
# any sibling processes (UFFD handlers, vhost-user backends, leftover FCs)
474+
# that might be keeping the stuck FC's resources alive.
475+
try:
476+
result = subprocess.run(
477+
["ps", "axo", "pid,ppid,pgid,sid,stat,wchan,comm"],
478+
capture_output=True,
479+
text=True,
480+
timeout=5,
481+
check=False,
482+
)
483+
own_pid = os.getpid()
484+
related = [result.stdout.splitlines()[0]]
485+
for line in result.stdout.splitlines()[1:]:
486+
fields = line.split(maxsplit=6)
487+
if len(fields) < 2:
488+
continue
489+
try:
490+
ppid = int(fields[1])
491+
except ValueError:
492+
continue
493+
if ppid in (own_pid, pid):
494+
related.append(line)
495+
if len(related) > 1:
496+
log.error(
497+
"Processes parented to test worker %d or stuck FC %d:\n%s",
498+
own_pid,
499+
pid,
500+
"\n".join(related),
501+
)
502+
except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
503+
pass
504+
505+
506+
def wait_process_termination(p_pid, timeout=10.0):
507+
"""Wait for a process to terminate via waitpid().
508+
509+
Requires the test session to be a subreaper (set via PR_SET_CHILD_SUBREAPER
510+
in conftest.py) so orphaned descendants reparent to us. Polls with WNOHANG
511+
so we can enforce a timeout.
512+
513+
Returns successfully if the process exits within `timeout` seconds.
514+
Raises TimeoutError if it does not.
410515
"""
411-
pidfd = get_process_pidfd(p_pid)
412-
413-
# If pidfd is None the process has already terminated
414-
if pidfd is not None:
415-
epoll = select.epoll()
416-
epoll.register(pidfd, select.EPOLLIN)
417-
# This will return once the process exits
418-
epoll.poll()
419-
os.close(pidfd)
516+
deadline = time.time() + timeout
517+
while True:
518+
try:
519+
pid, _status = os.waitpid(p_pid, os.WNOHANG)
520+
except ChildProcessError:
521+
# Process is not our child (already reaped, or never reparented to us).
522+
return
523+
if pid == p_pid:
524+
return
525+
if time.time() >= deadline:
526+
raise TimeoutError(f"Process {p_pid} did not exit within {timeout}s")
527+
time.sleep(0.05)
420528

421529

422530
def get_firecracker_version_from_toml():

0 commit comments

Comments
 (0)