Skip to content

Commit c0df102

Browse files
akshaykaCopilot
andauthored
test: fix flaky kernel process-group test (#9354)
The end-to-end test added in #9257 polled disk for a pid file with a 2s deadline, which was tight enough to flake on cold macOS CI runners where Python-on-spawn + marimo re-import alone can exceed it. Split into two tests that together cover strictly more surface area: - a direct test of `try_kill_process_and_group` using a bash process tree (no Python spawn, no disk polling) - a mocked unit test asserting `close_kernel` routes through the helper Runs in ~0.3s instead of ~4s and is deterministic across repeated runs. <!-- This is an auto-generated description by cubic. --> --- ## Summary by cubic Replaces a flaky end-to-end kernel process-group test with two fast, deterministic tests. Fixes macOS CI flakes and reduces runtime from ~4s to ~0.3s. - **Bug Fixes** - Added a direct test of `try_kill_process_and_group` using `bash` in a new session; confirms grandchild termination with robust polling and cleanup. - Added a unit test asserting `KernelManagerImpl.close_kernel` routes through the helper and closes queues; avoids Python spawn and disk polling. <sup>Written for commit cb70e57. Summary will update on new commits.</sup> <!-- End of auto-generated description by cubic. --> --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent 3b9995a commit c0df102

1 file changed

Lines changed: 66 additions & 63 deletions

File tree

Lines changed: 66 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,22 @@
11
# Copyright 2026 Marimo. All rights reserved.
22
from __future__ import annotations
33

4-
import inspect
4+
import multiprocessing
55
import os
6+
import signal
7+
import subprocess
68
import sys
79
import time
8-
from typing import TYPE_CHECKING
10+
from unittest.mock import Mock
911

1012
import pytest
1113

12-
if TYPE_CHECKING:
13-
from pathlib import Path
14-
1514
from marimo._ast.app_config import _AppConfig
1615
from marimo._config.manager import get_default_config_manager
17-
from marimo._runtime.commands import (
18-
AppMetadata,
19-
CreateNotebookCommand,
20-
ExecuteCellCommand,
21-
UpdateUIElementCommand,
22-
)
23-
from marimo._session.managers import KernelManagerImpl, QueueManagerImpl
16+
from marimo._runtime.commands import AppMetadata
17+
from marimo._session.managers import KernelManagerImpl
2418
from marimo._session.model import SessionMode
19+
from marimo._utils.subprocess import try_kill_process_and_group
2520

2621

2722
def _is_alive(pid: int) -> bool:
@@ -33,13 +28,60 @@ def _is_alive(pid: int) -> bool:
3328

3429

3530
@pytest.mark.skipif(sys.platform == "win32", reason="POSIX only")
36-
def test_close_kernel_kills_user_spawned_subprocess(tmp_path: Path) -> None:
37-
"""A subprocess spawned by kernel-executed user code must be killed when
38-
the kernel is closed."""
39-
pid_file = tmp_path / "child.pid"
31+
def test_try_kill_process_and_group_kills_grandchild() -> None:
32+
"""SIGTERM must propagate to the full process group, not just the leader.
33+
34+
Spawns `bash` in its own session (setsid) so it becomes a new process
35+
group leader, then forks a grandchild `sleep 60`. Verifies that
36+
`try_kill_process_and_group` on the bash parent also kills the
37+
grandchild — the property PR #9257 introduced.
38+
"""
39+
p = subprocess.Popen(
40+
["bash", "-c", "sleep 60 & echo $!; exec 1>/dev/null; wait"],
41+
start_new_session=True,
42+
stdout=subprocess.PIPE,
43+
)
44+
assert p.stdout is not None
45+
grandchild_pid = int(p.stdout.readline().strip())
46+
try:
47+
try_kill_process_and_group(p)
48+
p.wait(timeout=5)
49+
50+
# The grandchild is reparented to init once bash exits; `kill(pid, 0)`
51+
# can briefly return success during the OS teardown window, so poll
52+
# with a generous ceiling we never actually approach in practice.
53+
deadline = time.monotonic() + 5.0
54+
while time.monotonic() < deadline and _is_alive(grandchild_pid):
55+
time.sleep(0.001)
56+
assert not _is_alive(grandchild_pid), (
57+
f"grandchild {grandchild_pid} survived process-group kill"
58+
)
59+
finally:
60+
if p.poll() is None:
61+
p.kill()
62+
p.wait(timeout=5)
63+
if _is_alive(grandchild_pid):
64+
try:
65+
os.kill(grandchild_pid, signal.SIGKILL)
66+
except ProcessLookupError:
67+
pass
68+
4069

41-
queue_manager = QueueManagerImpl(use_multiprocessing=True)
42-
kernel_manager = KernelManagerImpl(
70+
def test_close_kernel_calls_try_kill_process_and_group(
71+
monkeypatch: pytest.MonkeyPatch,
72+
) -> None:
73+
"""KernelManagerImpl.close_kernel must route shutdown through
74+
try_kill_process_and_group so user-spawned subprocesses are reaped."""
75+
captured: list[object] = []
76+
monkeypatch.setattr(
77+
"marimo._session.managers.kernel.try_kill_process_and_group",
78+
captured.append,
79+
)
80+
81+
queue_manager = Mock()
82+
queue_manager.win32_interrupt_queue = None
83+
84+
manager = KernelManagerImpl(
4385
queue_manager=queue_manager,
4486
mode=SessionMode.EDIT,
4587
configs={},
@@ -55,50 +97,11 @@ def test_close_kernel_kills_user_spawned_subprocess(tmp_path: Path) -> None:
5597
redirect_console_to_browser=False,
5698
)
5799

58-
kernel_manager.start_kernel()
59-
try:
60-
assert kernel_manager.is_alive()
61-
62-
queue_manager.control_queue.put(
63-
CreateNotebookCommand(
64-
execution_requests=(
65-
ExecuteCellCommand(
66-
cell_id="1",
67-
code=inspect.cleandoc(
68-
f"""
69-
import subprocess
70-
_proc = subprocess.Popen(["sleep", "60"])
71-
with open("{pid_file}", "w") as _f:
72-
_f.write(str(_proc.pid))
73-
"""
74-
),
75-
),
76-
),
77-
cell_ids=("1",),
78-
set_ui_element_value_request=UpdateUIElementCommand(
79-
object_ids=[], values=[]
80-
),
81-
auto_run=True,
82-
)
83-
)
100+
fake_task = Mock(spec=multiprocessing.Process)
101+
fake_task.is_alive.return_value = False
102+
manager.kernel_task = fake_task
84103

85-
deadline = time.monotonic() + 2
86-
while time.monotonic() < deadline and not pid_file.exists():
87-
time.sleep(0.05)
88-
assert pid_file.exists(), "kernel never spawned the subprocess"
89-
child_pid = int(pid_file.read_text())
90-
assert _is_alive(child_pid)
91-
finally:
92-
kernel_manager.close_kernel()
104+
manager.close_kernel()
93105

94-
deadline = time.monotonic() + 2.0
95-
while time.monotonic() < deadline and _is_alive(child_pid):
96-
time.sleep(0.05)
97-
try:
98-
assert not _is_alive(child_pid)
99-
finally:
100-
if _is_alive(child_pid):
101-
try:
102-
os.kill(child_pid, 9)
103-
except ProcessLookupError:
104-
pass
106+
assert captured == [fake_task]
107+
queue_manager.close_queues.assert_called_once()

0 commit comments

Comments
 (0)