Skip to content

Commit 1630b0b

Browse files
committed
Add test to verify behaviour with xdist, change behaviour to match test
1 parent b1b25e0 commit 1630b0b

4 files changed

Lines changed: 138 additions & 18 deletions

File tree

conftest.py

Lines changed: 46 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,59 @@
11
import faulthandler
2+
import os
3+
import threading
24

35
import pytest
46

7+
DEFAULT_TIMEOUT = 300 # 5 minutes
58

6-
def pytest_runtest_setup(item: pytest.Item) -> None:
7-
"""Set faulthandler alarm as a backup timeout mechanism.
9+
_timeout_timer: threading.Timer | None = None
810

9-
This fires even if the process is stuck in C code (e.g., gRPC core).
10-
Set to pytest-timeout value + 30s so pytest-timeout handles it first.
1111

12-
Uses exit=False to avoid killing xdist worker processes — a killed worker
13-
causes 'node down: Not properly terminated' and loses the stack trace output.
14-
With exit=False, faulthandler dumps tracebacks to stderr (relayed by xdist)
15-
without terminating the process, letting pytest-timeout handle the interruption.
16-
"""
12+
def _get_timeout(item: pytest.Item) -> float:
1713
marker = item.get_closest_marker("timeout")
1814
if marker and marker.args:
19-
test_timeout = marker.args[0]
20-
else:
21-
test_timeout = item.config.getini("timeout") or 300
15+
return float(marker.args[0])
16+
return float(DEFAULT_TIMEOUT)
2217

23-
if test_timeout and float(test_timeout) > 0:
24-
faulthandler.dump_traceback_later(float(test_timeout) + 30, exit=False)
18+
19+
def pytest_runtest_setup(item: pytest.Item) -> None:
20+
"""Start a watchdog timer that dumps all thread stack traces on timeout.
21+
22+
Unlike pytest-timeout, this does NOT raise KeyboardInterrupt (which crashes
23+
xdist workers and corrupts asyncio event loops). Instead it:
24+
1. Writes the test name + all thread tracebacks directly to fd 2 (stderr).
25+
With --capture=sys in pytest.ini, fd 2 is the real stderr (not captured),
26+
so the output goes directly to the CI log even under xdist.
27+
2. Calls os._exit(1) to terminate the worker process.
28+
29+
xdist will report 'node down: Not properly terminated' which is expected —
30+
the diagnostic output will already be in the CI logs above that message.
31+
"""
32+
global _timeout_timer
33+
timeout = _get_timeout(item)
34+
if timeout <= 0:
35+
return
36+
37+
def _on_timeout() -> None:
38+
banner = "=" * 70
39+
os.write(2, f"\n\n{banner}\n".encode())
40+
os.write(2, f"TIMEOUT: {item.nodeid} exceeded {timeout}s\n".encode())
41+
os.write(2, f"{banner}\n\n".encode())
42+
# faulthandler needs a file object — wrap a dup of fd 2 to avoid closing it
43+
with os.fdopen(os.dup(2), "w") as f:
44+
faulthandler.dump_traceback(file=f)
45+
f.flush()
46+
os.write(2, f"\n{banner}\n\n".encode())
47+
os._exit(1)
48+
49+
_timeout_timer = threading.Timer(timeout, _on_timeout)
50+
_timeout_timer.daemon = True
51+
_timeout_timer.start()
2552

2653

2754
def pytest_runtest_teardown(item: pytest.Item, nextitem: pytest.Item | None) -> None:
28-
"""Cancel the faulthandler alarm after each test."""
29-
faulthandler.cancel_dump_traceback_later()
55+
"""Cancel the watchdog timer after each test completes."""
56+
global _timeout_timer
57+
if _timeout_timer is not None:
58+
_timeout_timer.cancel()
59+
_timeout_timer = None

pytest.ini

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
[pytest]
2-
addopts = -m 'not profiling' --benchmark-skip -l --timeout=300 --timeout_method=signal
2+
addopts = -m 'not profiling' --benchmark-skip -l --capture=sys --max-worker-restart=3
33
markers =
44
profiling: marks tests that can be profiled
5+
timeout: marks tests with a custom timeout in seconds (default: 300)
56
asyncio_default_fixture_loop_scope = function

requirements-test.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ pytest-cov==6.2.1
33
pytest-asyncio==1.3.0
44
pytest-benchmark==5.1.0
55
pytest-profiling==1.8.1
6-
pytest-timeout==2.3.1
76
coverage==7.10.7
87
pytest-xdist==3.7.0
98
werkzeug==3.1.6

test/test_timeout.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
"""Tests for the custom per-test timeout mechanism in conftest.py.
2+
3+
Uses subprocess because the timeout mechanism calls os._exit(1).
4+
"""
5+
6+
import subprocess
7+
import sys
8+
import textwrap
9+
from pathlib import Path
10+
11+
PROJECT_ROOT = Path(__file__).parent.parent
12+
13+
14+
def _run_pytest(tmp_path: Path, test_code: str, *extra_args: str) -> subprocess.CompletedProcess:
15+
"""Run pytest in a subprocess with a copy of our timeout conftest."""
16+
(tmp_path / "conftest.py").write_text((PROJECT_ROOT / "conftest.py").read_text())
17+
(tmp_path / "pytest.ini").write_text(
18+
"[pytest]\naddopts = --capture=sys --max-worker-restart=0\nmarkers =\n timeout: custom timeout\n"
19+
)
20+
(tmp_path / "test_it.py").write_text(textwrap.dedent(test_code))
21+
return subprocess.run(
22+
[
23+
sys.executable,
24+
"-m",
25+
"pytest",
26+
"-v",
27+
"-n",
28+
"auto",
29+
"--dist",
30+
"loadgroup",
31+
"test_it.py",
32+
*extra_args,
33+
],
34+
capture_output=True,
35+
text=True,
36+
timeout=60,
37+
cwd=str(tmp_path),
38+
)
39+
40+
41+
def test_timeout_prints_test_name_and_stacktrace(tmp_path: Path) -> None:
42+
result = _run_pytest(
43+
tmp_path,
44+
"""\
45+
import time
46+
import pytest
47+
48+
@pytest.mark.timeout(2)
49+
def test_hangs():
50+
time.sleep(999)
51+
""",
52+
)
53+
assert result.returncode != 0
54+
assert "TIMEOUT: test_it.py::test_hangs exceeded 2.0s" in result.stderr
55+
assert "test_hangs" in result.stderr
56+
57+
58+
def test_fast_test_not_killed(tmp_path: Path) -> None:
59+
result = _run_pytest(
60+
tmp_path,
61+
"""\
62+
import pytest
63+
64+
@pytest.mark.timeout(10)
65+
def test_fast():
66+
assert True
67+
""",
68+
)
69+
assert result.returncode == 0
70+
assert "TIMEOUT" not in result.stderr
71+
72+
73+
def test_timeout_with_passing_and_hanging_test(tmp_path: Path) -> None:
74+
result = _run_pytest(
75+
tmp_path,
76+
"""\
77+
import time
78+
import pytest
79+
80+
@pytest.mark.timeout(2)
81+
def test_hangs_in_worker():
82+
time.sleep(999)
83+
84+
def test_passes():
85+
assert True
86+
""",
87+
)
88+
assert result.returncode != 0
89+
assert "TIMEOUT: test_it.py::test_hangs_in_worker exceeded 2.0s" in result.stderr
90+
assert "test_hangs_in_worker" in result.stderr

0 commit comments

Comments
 (0)