Skip to content

Commit e099430

Browse files
committed
ui-smoke: fail fast and explain when the GUI process dies
The driver polled NML for up to 60s after a GUI crash, then blamed whatever stage timed out (e.g. homing); a dead task keeps serving its last stat buffer. Watch the launcher PID and fail in ~1s pointing at the crash. Enable PYTHONFAULTHANDLER for a Python traceback on fatal signals.
1 parent 0ad17aa commit e099430

2 files changed

Lines changed: 72 additions & 19 deletions

File tree

tests/ui-smoke/_lib/drive.py

Lines changed: 67 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
import argparse
1515
import linuxcnc
16+
import os
1617
import sys
1718
import time
1819

@@ -32,6 +33,38 @@
3233
STATE_STABILITY_S = 0.5
3334
STATE_RETRY_BUDGET = 6
3435

36+
# linuxcnc launcher PID, written to linuxcnc.pid by the launcher and read
37+
# once at startup. The driver watches it so a GUI crash, which tears
38+
# linuxcnc down, fails the test in ~1s with a clear message instead of
39+
# waiting out a long NML poll. A dead task keeps serving its last stat
40+
# buffer, so process liveness is the only reliable crash signal.
41+
_WATCH_PID = None
42+
43+
44+
class LauncherGone(Exception):
45+
"""linuxcnc process group exited (GUI crashed or task died)."""
46+
47+
48+
def _read_pid(path):
49+
try:
50+
with open(path) as f:
51+
return int(f.read().strip())
52+
except (OSError, ValueError):
53+
return None
54+
55+
56+
def _watchdog():
57+
"""Raise LauncherGone if the launcher PID is known and gone. Unknown
58+
PID (file not written yet) counts as alive: never false-fail."""
59+
if _WATCH_PID is None:
60+
return
61+
try:
62+
os.kill(_WATCH_PID, 0)
63+
except ProcessLookupError:
64+
raise LauncherGone()
65+
except PermissionError:
66+
pass
67+
3568

3669
def connect_and_wait_ready(timeout):
3770
"""Wait until linuxcnc.stat().poll() returns without error and
@@ -47,6 +80,7 @@ def connect_and_wait_ready(timeout):
4780
deadline = time.monotonic() + timeout
4881
last_err = None
4982
while time.monotonic() < deadline:
83+
_watchdog()
5084
try:
5185
stat = linuxcnc.stat()
5286
stat.poll()
@@ -70,6 +104,7 @@ def wait_until_quiet(stat, predicate, timeout):
70104
must not happen."""
71105
deadline = time.monotonic() + timeout
72106
while time.monotonic() < deadline:
107+
_watchdog()
73108
stat.poll()
74109
if predicate(stat):
75110
return True
@@ -195,6 +230,7 @@ def wait_program_started(stat, timeout):
195230
IDLE; we then read stat.position at (0,0,0)."""
196231
deadline = time.monotonic() + timeout
197232
while time.monotonic() < deadline:
233+
_watchdog()
198234
stat.poll()
199235
if stat.interp_state != linuxcnc.INTERP_IDLE:
200236
return True
@@ -214,6 +250,7 @@ def wait_program_idle(stat, timeout):
214250
deadline = time.monotonic() + timeout
215251
consecutive = 0
216252
while time.monotonic() < deadline:
253+
_watchdog()
217254
stat.poll()
218255
idle = (
219256
stat.interp_state == linuxcnc.INTERP_IDLE
@@ -311,30 +348,41 @@ def main():
311348
if args.run_program and args.expect_delta_mm is None:
312349
ap.error("--run-program requires --expect-delta-mm DX,DY,DZ")
313350

314-
cmd, stat = connect_and_wait_ready(CONNECT_TIMEOUT_S)
315-
if cmd is None:
316-
return 1
317-
318-
# Give the GUI process enough time to finish constructing itself
319-
# (load .ui files, compile resources.py if needed, etc.) and
320-
# settle. If the GUI was going to crash on startup it has crashed
321-
# by now.
322-
time.sleep(SETTLE_S)
351+
global _WATCH_PID
352+
_WATCH_PID = _read_pid("linuxcnc.pid")
323353

324-
# Re-check task is still alive; a GUI crash may have torn linuxcnc
325-
# down via Cleanup.
326354
try:
327-
stat.poll()
328-
except linuxcnc.error as e:
329-
sys.stderr.write(f"UI_SMOKE_FAIL: task disappeared after GUI startup: {e}\n")
330-
return 1
355+
cmd, stat = connect_and_wait_ready(CONNECT_TIMEOUT_S)
356+
if cmd is None:
357+
return 1
358+
359+
# Give the GUI process enough time to finish constructing itself
360+
# (load .ui files, compile resources.py if needed, etc.) and
361+
# settle. If the GUI was going to crash on startup it has crashed
362+
# by now.
363+
time.sleep(SETTLE_S)
364+
_watchdog()
331365

332-
if args.run_program:
333-
if not run_program(cmd, stat,
334-
args.run_program, args.expect_delta_mm,
335-
args.tol, args.run_timeout):
366+
# Re-check task is still alive; a GUI crash may have torn linuxcnc
367+
# down via Cleanup.
368+
try:
369+
stat.poll()
370+
except linuxcnc.error as e:
371+
sys.stderr.write(f"UI_SMOKE_FAIL: task disappeared after GUI startup: {e}\n")
336372
return 1
337373

374+
if args.run_program:
375+
if not run_program(cmd, stat,
376+
args.run_program, args.expect_delta_mm,
377+
args.tol, args.run_timeout):
378+
return 1
379+
except LauncherGone:
380+
sys.stderr.write(
381+
"UI_SMOKE_FAIL: linuxcnc exited before the driver finished; "
382+
"the GUI crashed or task died. See linuxcnc.out / linuxcnc.err "
383+
"above for the backtrace.\n")
384+
return 1
385+
338386
print("UI_SMOKE_OK")
339387
return 0
340388

tests/ui-smoke/_lib/launch-env.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,8 @@ export CANBERRA_DRIVER=null
2424
export GST_PLUGIN_FEATURE_RANK="pulsesink:NONE,alsasink:NONE,osssink:NONE,oss4sink:NONE,jackaudiosink:NONE,pipewiresink:NONE,openalsink:NONE"
2525
export PULSE_SERVER=/dev/null
2626
export SDL_AUDIODRIVER=dummy
27+
28+
# Dump a Python traceback on a fatal signal. For a pure-Python crash this
29+
# names the line; for a C/C++ crash (Qt, dbus, GL) it shows the Python
30+
# frame that called in. The native side is captured by crashdump.sh.
31+
export PYTHONFAULTHANDLER=1

0 commit comments

Comments
 (0)