11#!/usr/bin/env python3
2- # Minimal UI smoke driver: confirm linuxcnc task came up and the GUI
3- # did not crash. The smoke layer answers Bertho's "does it start"
4- # question only; functional behaviour (home, run a file, verify
5- # position) belongs in tests/ui-functional/ (Phase 2).
2+ # UI smoke driver.
3+ #
4+ # Default mode (Phase 1): confirm linuxcnc task came up and the GUI did
5+ # not crash. The driver only proves the GUI started and NML is reachable.
6+ #
7+ # --run-program mode (Phase 2): also estop-reset, machine-on, home,
8+ # program_open + auto(RUN), wait for sustained INTERP_IDLE, and assert
9+ # (stat.position_after - stat.position_after_home) equals --expect-delta-mm
10+ # converted to machine units via stat.linear_units. Snapshot-and-delta
11+ # sidesteps per-sim HOME offsets; mm-input + linear_units conversion
12+ # sidesteps per-sim LINEAR_UNITS (axis and touchy sims are inch).
613
14+ import argparse
715import linuxcnc
816import sys
917import time
1018
1119CONNECT_TIMEOUT_S = 60.0
1220SETTLE_S = 3.0
21+ SETTLE_POLLS = 5
22+ POLL_INTERVAL_S = 0.01
23+ # Per-attempt wait timeout for ensure_state / ensure_mode. The state
24+ # normally lands well under 1s; profiling showed nothing benefits from
25+ # more than 3s here, and shorter timeouts trim wall time when a retry
26+ # is needed (notably gmoccapy reverting task_mode AUTO -> MANUAL).
27+ ENSURE_ATTEMPT_TIMEOUT_S = 3.0
28+ # After the desired task_state / task_mode is reached, re-check after
29+ # this long. Some GUIs (notably gmoccapy and qtdragon) run their own
30+ # startup commands that can revert a state we just set; the post-reach
31+ # stability check catches that.
32+ STATE_STABILITY_S = 0.5
33+ STATE_RETRY_BUDGET = 6
1334
1435
1536def connect_and_wait_ready (timeout ):
1637 """Wait until linuxcnc.stat().poll() returns without error and
1738 reports a non-negative echo_serial_number. The NML status buffer
1839 can be 'invalid err=3' for the first ~30s while linuxcncsvr is
1940 still initialising; recreate the stat object on every iteration so
20- a stale invalid buffer does not stick after linuxcncsvr is ready."""
41+ a stale invalid buffer does not stick after linuxcncsvr is ready.
42+
43+ Catch the full Exception hierarchy: in early startup stat.poll()
44+ can raise SystemError ('error return without exception set') when
45+ the underlying C function reports failure without setting a Python
46+ exception. Treat that the same as linuxcnc.error and retry."""
2147 deadline = time .monotonic () + timeout
2248 last_err = None
2349 while time .monotonic () < deadline :
@@ -26,7 +52,7 @@ def connect_and_wait_ready(timeout):
2652 stat .poll ()
2753 if stat .echo_serial_number >= 0 :
2854 return linuxcnc .command (), stat
29- except linuxcnc . error as e :
55+ except Exception as e :
3056 last_err = e
3157 time .sleep (0.5 )
3258 sys .stderr .write (
@@ -35,7 +61,256 @@ def connect_and_wait_ready(timeout):
3561 return None , None
3662
3763
64+ def wait_until_quiet (stat , predicate , timeout ):
65+ """Poll stat until predicate(stat) is true. Returns True on success,
66+ False on timeout. Never writes UI_SMOKE_FAIL: caller decides whether
67+ a timeout here is fatal (and writes its own UI_SMOKE_FAIL line) or
68+ is part of a retry that may still succeed. checkresult.sh greps for
69+ any '^UI_SMOKE_FAIL' line, so spurious emissions during retries
70+ must not happen."""
71+ deadline = time .monotonic () + timeout
72+ while time .monotonic () < deadline :
73+ stat .poll ()
74+ if predicate (stat ):
75+ return True
76+ time .sleep (POLL_INTERVAL_S )
77+ return False
78+
79+
80+ def wait_until (stat , predicate , timeout , label ):
81+ """Like wait_until_quiet but emits UI_SMOKE_FAIL on timeout. Use
82+ only when timeout is fatal at the call site (no retry above)."""
83+ if wait_until_quiet (stat , predicate , timeout ):
84+ return True
85+ sys .stderr .write (f"UI_SMOKE_FAIL: timeout waiting for { label } after { timeout } s\n " )
86+ return False
87+
88+
89+ def home_all (cmd , stat , timeout ):
90+ """Home every joint. Uses c.home(-1) which respects HOME_SEQUENCE
91+ if configured. Caller must have already ensured task_state is ON
92+ via ensure_state; otherwise the home command is rejected with
93+ 'cannot be executed until the machine is out of E-stop and turned
94+ on'. Mode change uses ensure_mode so a GUI that reverts mode mid-
95+ sequence (gmoccapy) is detected and retried."""
96+ if not ensure_mode (cmd , stat , linuxcnc .MODE_MANUAL , "MODE_MANUAL" ):
97+ return False
98+ cmd .teleop_enable (0 )
99+ cmd .wait_complete ()
100+ stat .poll ()
101+ njoints = stat .joints
102+ cmd .home (- 1 )
103+ if not wait_until (
104+ stat ,
105+ lambda s : all (s .homed [i ] for i in range (njoints )),
106+ timeout , "all joints homed" ):
107+ return False
108+ cmd .teleop_enable (1 )
109+ cmd .wait_complete ()
110+ return True
111+
112+
113+ def wait_state (stat , target_state , timeout , label ):
114+ """Poll until stat.task_state == target_state. wait_complete on a
115+ state-change command only proves task ack'd the NML message, not
116+ that the underlying state machine has transitioned. Polling
117+ task_state is the only deterministic signal."""
118+ return wait_until (
119+ stat ,
120+ lambda s : s .task_state == target_state ,
121+ timeout , label )
122+
123+
124+ def ensure_state (cmd , stat , target_state , label ):
125+ """Issue c.state(target_state), wait for stat.task_state to reach
126+ target_state, then verify it stays there across STATE_STABILITY_S.
127+ If the GUI reverts (e.g. gmoccapy re-issues its own ESTOP on
128+ startup), retry up to STATE_RETRY_BUDGET times. Returns True on
129+ stable success, False on exhausted budget."""
130+ for attempt in range (1 , STATE_RETRY_BUDGET + 1 ):
131+ cmd .state (target_state )
132+ cmd .wait_complete ()
133+ if not wait_until_quiet (
134+ stat , lambda s : s .task_state == target_state ,
135+ ENSURE_ATTEMPT_TIMEOUT_S ):
136+ sys .stderr .write (
137+ f"WARN: { label } not reached on attempt { attempt } , retrying\n " )
138+ continue
139+ time .sleep (STATE_STABILITY_S )
140+ stat .poll ()
141+ if stat .task_state == target_state :
142+ return True
143+ sys .stderr .write (
144+ f"WARN: { label } reverted to task_state={ stat .task_state } "
145+ f"after attempt { attempt } , retrying\n " )
146+ sys .stderr .write (
147+ f"UI_SMOKE_FAIL: { label } did not hold stable across "
148+ f"{ STATE_RETRY_BUDGET } attempts\n " )
149+ return False
150+
151+
152+ def ensure_mode (cmd , stat , target_mode , label ):
153+ """Same retry+stability pattern as ensure_state, for task_mode."""
154+ for attempt in range (1 , STATE_RETRY_BUDGET + 1 ):
155+ cmd .mode (target_mode )
156+ cmd .wait_complete ()
157+ if not wait_until_quiet (
158+ stat , lambda s : s .task_mode == target_mode ,
159+ ENSURE_ATTEMPT_TIMEOUT_S ):
160+ sys .stderr .write (
161+ f"WARN: { label } not reached on attempt { attempt } , retrying\n " )
162+ continue
163+ time .sleep (STATE_STABILITY_S )
164+ stat .poll ()
165+ if stat .task_mode == target_mode :
166+ return True
167+ sys .stderr .write (
168+ f"WARN: { label } reverted to task_mode={ stat .task_mode } "
169+ f"after attempt { attempt } , retrying\n " )
170+ sys .stderr .write (
171+ f"UI_SMOKE_FAIL: { label } did not hold stable across "
172+ f"{ STATE_RETRY_BUDGET } attempts\n " )
173+ return False
174+
175+
176+ PROGRAM_START_TIMEOUT_S = 5.0
177+
178+
179+ def snapshot (stat ):
180+ """Best-effort one-line summary of state fields relevant to Phase 2
181+ debugging. Caller is expected to have just polled."""
182+ return (
183+ f"task_state={ stat .task_state } task_mode={ stat .task_mode } "
184+ f"interp_state={ stat .interp_state } exec_state={ stat .exec_state } "
185+ f"motion_type={ stat .motion_type } queue={ stat .queue } "
186+ f"queued_mdi_commands={ stat .queued_mdi_commands } "
187+ f"file={ stat .file !r} " )
188+
189+
190+ def wait_program_started (stat , timeout ):
191+ """Wait until interp_state leaves INTERP_IDLE, i.e. the program
192+ has actually begun executing. Without this guard, a short program
193+ can finish before wait_program_idle gets its first poll, and the
194+ settle-window then mistakes the pre-start IDLE for the post-end
195+ IDLE; we then read stat.position at (0,0,0)."""
196+ deadline = time .monotonic () + timeout
197+ while time .monotonic () < deadline :
198+ stat .poll ()
199+ if stat .interp_state != linuxcnc .INTERP_IDLE :
200+ return True
201+ time .sleep (POLL_INTERVAL_S )
202+ stat .poll ()
203+ sys .stderr .write (
204+ f"UI_SMOKE_FAIL: program did not start within { timeout } s "
205+ f"(interp_state stayed INTERP_IDLE) state: { snapshot (stat )} \n " )
206+ return False
207+
208+
209+ def wait_program_idle (stat , timeout ):
210+ """Wait until interp_state returns to INTERP_IDLE and the motion
211+ queue is drained for SETTLE_POLLS consecutive polls. Caller must
212+ have already proven the program started via wait_program_started;
213+ otherwise this returns immediately on the pre-start IDLE."""
214+ deadline = time .monotonic () + timeout
215+ consecutive = 0
216+ while time .monotonic () < deadline :
217+ stat .poll ()
218+ idle = (
219+ stat .interp_state == linuxcnc .INTERP_IDLE
220+ and stat .queue == 0
221+ )
222+ if idle :
223+ consecutive += 1
224+ if consecutive >= SETTLE_POLLS :
225+ return True
226+ else :
227+ consecutive = 0
228+ time .sleep (POLL_INTERVAL_S )
229+ sys .stderr .write (f"UI_SMOKE_FAIL: program did not reach idle within { timeout } s\n " )
230+ return False
231+
232+
233+ def run_program (cmd , stat , ngc_path , expect_delta_mm , tol , run_timeout ):
234+ """Estop reset, machine on, home, snapshot position, load + run ngc,
235+ verify (final - start) delta matches expect_delta_mm converted to
236+ machine units."""
237+ if not ensure_state (cmd , stat , linuxcnc .STATE_ESTOP_RESET ,
238+ "STATE_ESTOP_RESET" ):
239+ return False
240+ if not ensure_state (cmd , stat , linuxcnc .STATE_ON , "STATE_ON" ):
241+ return False
242+
243+ if not home_all (cmd , stat , timeout = 60.0 ):
244+ return False
245+
246+ if not ensure_mode (cmd , stat , linuxcnc .MODE_AUTO , "MODE_AUTO" ):
247+ return False
248+
249+ # Snapshot start position AFTER homing + AFTER mode transition. The
250+ # GUI might re-issue mode commands during its own startup; doing the
251+ # snapshot last means we record the position right before AUTO_RUN.
252+ stat .poll ()
253+ start_pos = stat .position [:3 ]
254+
255+ cmd .program_open (ngc_path )
256+ cmd .wait_complete ()
257+ # No wait_complete after auto(AUTO_RUN, 0): wait_complete blocks
258+ # until the operation finishes, which for AUTO_RUN means the whole
259+ # program completes. That would race wait_program_started; by the
260+ # time we polled, interp would already be back at INTERP_IDLE.
261+ cmd .auto (linuxcnc .AUTO_RUN , 0 )
262+
263+ if not wait_program_started (stat , PROGRAM_START_TIMEOUT_S ):
264+ return False
265+ if not wait_program_idle (stat , run_timeout ):
266+ return False
267+
268+ # stat.linear_units: machine units per mm. mm machine -> 1.0;
269+ # inch machine -> 1/25.4 = 0.03937. Multiplying the expected mm
270+ # delta by linear_units gives the expected delta in machine units,
271+ # which is what stat.position reports.
272+ units_per_mm = stat .linear_units
273+ expect_machine = [d * units_per_mm for d in expect_delta_mm ]
274+ final_pos = stat .position [:3 ]
275+ actual_delta = [final_pos [i ] - start_pos [i ] for i in range (3 )]
276+ err = [abs (actual_delta [i ] - expect_machine [i ]) for i in range (3 )]
277+ if any (e > tol for e in err ):
278+ sys .stderr .write (
279+ f"UI_SMOKE_FAIL: delta mismatch "
280+ f"expect_mm={ expect_delta_mm } units_per_mm={ units_per_mm } "
281+ f"expect_machine={ expect_machine } "
282+ f"start={ start_pos } final={ final_pos } "
283+ f"actual_delta={ actual_delta } err={ err } tol={ tol } \n " )
284+ return False
285+ return True
286+
287+
288+ def parse_xyz (s ):
289+ parts = [float (p ) for p in s .split ("," )]
290+ if len (parts ) != 3 :
291+ raise argparse .ArgumentTypeError ("expected x,y,z (three comma-separated floats)" )
292+ return parts
293+
294+
38295def main ():
296+ ap = argparse .ArgumentParser ()
297+ ap .add_argument ("--run-program" , metavar = "NGC" ,
298+ help = "g-code file to load and run (enables Phase 2 mode)" )
299+ ap .add_argument ("--expect-delta-mm" , type = parse_xyz , metavar = "DX,DY,DZ" ,
300+ help = "expected XYZ delta in mm from post-home position "
301+ "(required with --run-program). Driver converts to "
302+ "machine units via stat.linear_units so the same "
303+ "value works on inch and mm sims." )
304+ ap .add_argument ("--tol" , type = float , default = 1e-4 ,
305+ help = "position tolerance per axis in machine units "
306+ "(default: 1e-4)" )
307+ ap .add_argument ("--run-timeout" , type = float , default = 60.0 ,
308+ help = "program-completion timeout in seconds (default: 60)" )
309+ args = ap .parse_args ()
310+
311+ if args .run_program and args .expect_delta_mm is None :
312+ ap .error ("--run-program requires --expect-delta-mm DX,DY,DZ" )
313+
39314 cmd , stat = connect_and_wait_ready (CONNECT_TIMEOUT_S )
40315 if cmd is None :
41316 return 1
@@ -54,6 +329,12 @@ def main():
54329 sys .stderr .write (f"UI_SMOKE_FAIL: task disappeared after GUI startup: { e } \n " )
55330 return 1
56331
332+ if args .run_program :
333+ if not run_program (cmd , stat ,
334+ args .run_program , args .expect_delta_mm ,
335+ args .tol , args .run_timeout ):
336+ return 1
337+
57338 print ("UI_SMOKE_OK" )
58339 return 0
59340
0 commit comments