@@ -107,6 +107,19 @@ def _is_alive(pid: int) -> bool:
107107 return True
108108
109109
110+ def _wait_until_dead (pid : int , timeout_s : float = 5.0 ) -> None :
111+ """Poll until the kernel has reaped `pid`, then assert.
112+
113+ Tolerates the latency between SIGKILL delivery and the kernel actually
114+ tearing the process down — a bare `assert not _is_alive(pid)` immediately
115+ after a kill is racy.
116+ """
117+ deadline = time .monotonic () + timeout_s
118+ while time .monotonic () < deadline and _is_alive (pid ):
119+ time .sleep (0.05 )
120+ assert not _is_alive (pid )
121+
122+
110123@pytest .mark .skipif (sys .platform == "win32" , reason = "POSIX only" )
111124def test_try_kill_process_and_group_kills_pgroup_spares_new_session () -> None :
112125 """Covers the shutdown path used by KernelManagerImpl, IPCKernelManagerImpl,
@@ -129,11 +142,7 @@ def test_try_kill_process_and_group_kills_pgroup_spares_new_session() -> None:
129142 # wait() reaps the child so it doesn't linger as a zombie.
130143 assert leader .wait (timeout = 5 ) is not None
131144
132- deadline = time .monotonic () + 5.0
133- while time .monotonic () < deadline and _is_alive (child_pg_pid ):
134- time .sleep (0.05 )
135-
136- assert not _is_alive (child_pg_pid )
145+ _wait_until_dead (child_pg_pid )
137146 assert _is_alive (child_newpg_pid )
138147 finally :
139148 for pid in (leader .pid , child_pg_pid , child_newpg_pid ):
@@ -153,13 +162,17 @@ async def test_try_kill_process_and_group_sigkills_stubborn_child(
153162 """try_kill_process_and_group schedules a reap task that escalates to
154163 SIGKILL when the child ignores SIGTERM.
155164 """
156- # Skip the reaper's real 5s/1s waits so the test is fast.
165+ # Cap the reaper's real 5s/1s waits to 50ms so the test is fast, but
166+ # leave enough wall-clock time for the kernel to deliver SIGKILL and
167+ # for the reaper's own `poll()` to reap the zombie. Zeroing the delay
168+ # entirely lets the reaper exit before the child is reaped, after which
169+ # `_is_alive` (which sees zombies as alive) would spin to timeout.
157170 real_sleep = asyncio .sleep
158171
159- async def no_wait (_delay : float ) -> None :
160- await real_sleep (0 )
172+ async def short_sleep (_delay : float ) -> None :
173+ await real_sleep (0.05 )
161174
162- monkeypatch .setattr (asyncio , "sleep" , no_wait )
175+ monkeypatch .setattr (asyncio , "sleep" , short_sleep )
163176
164177 script = (
165178 "import signal, time\n "
@@ -174,7 +187,7 @@ async def no_wait(_delay: float) -> None:
174187 try_kill_process_and_group (cast (ProcessLike , proc ))
175188 # Drain the reap task(s) scheduled on the current loop.
176189 await asyncio .gather (* list (_REAP_TASKS ), return_exceptions = True )
177- assert not _is_alive (proc .pid )
190+ _wait_until_dead (proc .pid )
178191 finally :
179192 with contextlib .suppress (ProcessLookupError ):
180193 os .killpg (pgid , signal .SIGKILL )
0 commit comments