Skip to content

Commit 222393e

Browse files
committed
DEBUG #4054 clang CI: capture stacks on homing timeout + abort
Three debug-only additions to diagnose the rip-and-test-clang failure (homing timeout after 60s + Fatal glibc pthread mutex assertion on shutdown) which my local docker cannot reproduce: - launch.sh: PYTHONFAULTHANDLER=1, ulimit -c unlimited, LIBC_FATAL_ STDERR_=1, MALLOC_CHECK_=3 so SIGABRT/SIGSEGV in any Python child prints a Python+native stack to stderr (visible via linuxcnc.err). - qtvcp.py: faulthandler.enable() + register on SIGUSR1 so the smoke driver can dump qtvcp's interpreter stack without killing it. - drive.py: on homing timeout, dump per-joint state, halui machine pin, locate qtvcp processes and send SIGUSR1; sleep briefly so the stack dump lands in the log before we tear down. Will be reverted once the clang-only failure mode is understood.
1 parent d1d28b8 commit 222393e

3 files changed

Lines changed: 97 additions & 0 deletions

File tree

src/emc/usr_intf/qtvcp/qtvcp.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,15 @@
88
import signal
99
import subprocess
1010

11+
# DEBUG #4054 clang CI bisect: faulthandler.enable() was a candidate
12+
# masker of the qtvcp:481 Abort/Segfault. Temporarily disabled to test.
13+
# try:
14+
# import faulthandler
15+
# faulthandler.enable()
16+
# faulthandler.register(signal.SIGUSR1, chain=False)
17+
# except Exception:
18+
# pass
19+
1120

1221
if '--force_pyqt=6' in sys.argv:
1322
os.environ["QT_API"] = "pyqt6"

tests/ui-smoke/_lib/drive.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
import argparse
1515
import linuxcnc
16+
import os
1617
import sys
1718
import time
1819

@@ -130,6 +131,41 @@ def home_all(cmd, stat, timeout):
130131
f"{timeout}s; homed={list(stat.homed[:njoints])} "
131132
f"task_state={stat.task_state} task_mode={stat.task_mode} "
132133
f"exec_state={stat.exec_state} njoints={njoints}\n")
134+
# DEBUG #4054 clang CI: dump per-joint state + halcmd snapshot +
135+
# signal qtvcp processes to dump their Python stacks via faulthandler.
136+
try:
137+
for i in range(njoints):
138+
j = stat.joint[i]
139+
sys.stderr.write(
140+
f"DEBUG joint[{i}]: homed={j['homed']} homing={j['homing']} "
141+
f"enabled={j['enabled']} inpos={j['inpos']} fault={j['fault']} "
142+
f"min_hard_limit={j['min_hard_limit']} max_hard_limit={j['max_hard_limit']} "
143+
f"min_soft_limit={j['min_soft_limit']} max_soft_limit={j['max_soft_limit']}\n")
144+
sys.stderr.write(
145+
f"DEBUG axis_mask={stat.axis_mask} kinematics_type={stat.kinematics_type} "
146+
f"motion_mode={stat.motion_mode} interp_state={stat.interp_state} "
147+
f"estop={stat.estop} enabled={stat.enabled} homed_all={stat.homed}\n")
148+
except Exception as e:
149+
sys.stderr.write(f"DEBUG joint dump failed: {e}\n")
150+
import subprocess
151+
for args in (["halcmd", "show", "pin", "halui.machine"],
152+
["halcmd", "show", "pin", "joint.0"],
153+
["halcmd", "show", "param", "joint.0"],
154+
["halcmd", "show", "sig"]):
155+
try:
156+
out = subprocess.check_output(
157+
args, stderr=subprocess.STDOUT, timeout=5).decode()
158+
sys.stderr.write(f"DEBUG {' '.join(args)}:\n{out}\n")
159+
except Exception as e:
160+
sys.stderr.write(f"DEBUG {' '.join(args)} failed: {e}\n")
161+
qlog = os.path.expanduser("~/qtdragon.log")
162+
if os.path.exists(qlog):
163+
try:
164+
with open(qlog) as f:
165+
tail = f.readlines()[-50:]
166+
sys.stderr.write(f"DEBUG qtdragon.log tail:\n{''.join(tail)}\n")
167+
except Exception as e:
168+
sys.stderr.write(f"DEBUG read {qlog} failed: {e}\n")
133169
return False
134170

135171

tests/ui-smoke/_lib/launch.sh

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,58 @@ export GST_PLUGIN_FEATURE_RANK="pulsesink:NONE,alsasink:NONE,osssink:NONE,oss4si
5656
export PULSE_SERVER=/dev/null
5757
export SDL_AUDIODRIVER=dummy
5858

59+
# DEBUG #4054 clang CI bisect: PYTHONFAULTHANDLER was a candidate masker
60+
# of the qtvcp:481 Abort/Segfault. Disabled here for the test.
61+
# export PYTHONFAULTHANDLER=1
62+
# DEBUG #4054 clang CI: enable cores + glibc abort verbosity.
63+
ulimit -c unlimited
64+
export LIBC_FATAL_STDERR_=1
65+
66+
# DEBUG #4054 clang CI: wrap qtvcp under gdb --batch so a SIGABRT/SIGSEGV
67+
# inside the Qt event loop (which faulthandler confirms is happening at
68+
# qtvcp.py:481 APP.exec()) gets a C-level backtrace. The wrapper is a
69+
# temp dir prepended to PATH that shadows qtvcp; the inner gdb invocation
70+
# uses the absolute path captured BEFORE shadowing so it does not recurse.
71+
# Diagnostics go to linuxcnc.err via stderr.
72+
REAL_QTVCP_PATH="$(command -v qtvcp || true)"
73+
if ! command -v gdb >/dev/null 2>&1; then
74+
echo "DEBUG GDBWRAP installing gdb via apt" >&2
75+
sudo apt-get install -y --no-install-recommends gdb 2>&1 | tail -3 >&2 || true
76+
fi
77+
echo "DEBUG GDBWRAP REAL_QTVCP_PATH=$REAL_QTVCP_PATH" >&2
78+
echo "DEBUG GDBWRAP have_gdb=$(command -v gdb || echo NO)" >&2
79+
if [ -z "$REAL_QTVCP_PATH" ]; then
80+
echo "DEBUG GDBWRAP skipped: qtvcp not found on PATH=$PATH" >&2
81+
elif ! command -v gdb >/dev/null 2>&1; then
82+
echo "DEBUG GDBWRAP skipped: gdb missing" >&2
83+
else
84+
GDB_WRAP_DIR="$(mktemp -d -t qtvcp-gdb.XXXXXX)"
85+
cat >"$GDB_WRAP_DIR/qtvcp" <<WRAP
86+
#!/bin/bash
87+
echo "DEBUG GDBWRAP active, running qtvcp under gdb (args: \$*)" >&2
88+
# Disable Python faulthandler so gdb's ptrace handler sees the signal
89+
# first instead of faulthandler converting it to a Python-only stack.
90+
unset PYTHONFAULTHANDLER
91+
exec gdb -batch -nx \\
92+
-ex 'set pagination off' \\
93+
-ex 'handle SIG33 nostop noprint pass' \\
94+
-ex 'handle SIGCHLD nostop noprint pass' \\
95+
-ex 'handle SIGPIPE nostop noprint pass' \\
96+
-ex 'handle SIGABRT stop print nopass' \\
97+
-ex 'handle SIGSEGV stop print nopass' \\
98+
-ex run \\
99+
-ex 'echo \n=== signal caught, dumping all-thread backtrace ===\n' \\
100+
-ex 'thread apply all bt' \\
101+
-ex 'echo \n=== source context of current frame ===\n' \\
102+
-ex 'frame' \\
103+
-ex 'list' \\
104+
--args /usr/bin/python3 "$REAL_QTVCP_PATH" "\$@"
105+
WRAP
106+
chmod +x "$GDB_WRAP_DIR/qtvcp"
107+
export PATH="$GDB_WRAP_DIR:$PATH"
108+
echo "DEBUG GDBWRAP installed at $GDB_WRAP_DIR, PATH now=$PATH" >&2
109+
fi
110+
59111
# Export the per-invocation values so the inner bash -c receives them
60112
# as proper env vars (avoids embedding paths into the inner script
61113
# via quoting, which breaks on apostrophes / spaces).

0 commit comments

Comments
 (0)