@@ -63,9 +63,37 @@ static struct {
6363 int skip_bootstrap ; /* Set to skip bootstrap waiting */
6464} sm ;
6565
66+ /* Shutdown trace: set SHUTDOWN_DEBUG to 1 in sm.h to enable */
67+ #define shutdbg (fmt , args ...) do { \
68+ if (SHUTDOWN_DEBUG && (runlevel == 0 || runlevel == 6)) \
69+ cprintf("shutdown[%c]: " fmt "\n", \
70+ sm_rl2ch(runlevel), ##args); \
71+ } while (0)
72+
6673static uev_t console_watcher ;
6774static int console_fd = -1 ;
6875
76+ /*
77+ * Shutdown watchdog: if graceful service stop takes too long,
78+ * force do_shutdown() directly from the event loop callback.
79+ * Default timeout is 30 seconds, adjustable at build time.
80+ */
81+ #ifndef SHUTDOWN_WDT_TIMEOUT
82+ #define SHUTDOWN_WDT_TIMEOUT 30
83+ #endif
84+ static uev_t shutdown_wdt ;
85+
86+ static void shutdown_wdt_cb (uev_t * w , void * arg , int events )
87+ {
88+ (void )w ;
89+ (void )arg ;
90+ (void )events ;
91+
92+ logit (LOG_CONSOLE | LOG_CRIT , "Timed out (%d sec) waiting for services to stop, forcing shutdown." ,
93+ SHUTDOWN_WDT_TIMEOUT );
94+ do_shutdown (halt );
95+ }
96+
6997/*
7098 * Console input callback - handles Ctrl-C during bootstrap wait
7199 */
@@ -350,6 +378,7 @@ void sm_step(void)
350378
351379 /* reload ? */
352380 if (sm .reload ) {
381+ shutdbg ("RUNNING got reload request!" );
353382 sm .reload = 0 ;
354383 sm .state = SM_RELOAD_CHANGE_STATE ;
355384 }
@@ -362,9 +391,15 @@ void sm_step(void)
362391
363392 /* Restore terse mode and run hooks before shutdown */
364393 if (runlevel == 0 || runlevel == 6 ) {
394+ shutdbg ("entering CHANGE state, starting %d sec watchdog" ,
395+ SHUTDOWN_WDT_TIMEOUT );
396+ uev_timer_init (ctx , & shutdown_wdt , shutdown_wdt_cb ,
397+ NULL , SHUTDOWN_WDT_TIMEOUT * 1000 , 0 );
365398 api_exit ();
366399 log_exit ();
400+ shutdbg ("running HOOK_SHUTDOWN ..." );
367401 plugin_run_hooks (HOOK_SHUTDOWN );
402+ shutdbg ("HOOK_SHUTDOWN done" );
368403 }
369404
370405 dbg ("Setting new runlevel --> %c <-- previous %c" , sm_rl2ch (runlevel ), sm_rl2ch (prevlevel ));
@@ -384,12 +419,14 @@ void sm_step(void)
384419 }
385420
386421 /* Reset once flag of runtasks */
422+ shutdbg ("runtask_clean + step_all ..." );
387423 service_runtask_clean ();
388424
389425 dbg ("Stopping services not allowed in new runlevel ..." );
390426 sm .in_reload = 1 ;
391427 service_step_all (SVC_TYPE_ANY );
392428
429+ shutdbg ("CHANGE done, entering WAIT" );
393430 sm .state = SM_RUNLEVEL_WAIT_STATE ;
394431 break ;
395432
@@ -400,10 +437,13 @@ void sm_step(void)
400437 */
401438 svc = svc_stop_completed ();
402439 if (svc ) {
440+ shutdbg ("WAIT, collecting %s[%d] ..." , svc_ident (svc , NULL , 0 ), svc -> pid );
403441 dbg ("Waiting to collect %s, cmd %s(%d) ..." , svc_ident (svc , NULL , 0 ), svc -> cmd , svc -> pid );
404442 break ;
405443 }
406444
445+ shutdbg ("WAIT done, all services stopped" );
446+
407447 /* Prev runlevel services stopped, call hooks before starting new runlevel ... */
408448 dbg ("All services have been stopped, calling runlevel change hooks ..." );
409449 plugin_run_hooks (HOOK_RUNLEVEL_CHANGE ); /* Reconfigure HW/VLANs/etc here */
@@ -416,6 +456,7 @@ void sm_step(void)
416456 sm .in_reload = 0 ;
417457 service_step_all (SVC_TYPE_ANY );
418458
459+ shutdbg ("entering CLEAN" );
419460 sm .state = SM_RUNLEVEL_CLEAN_STATE ;
420461 break ;
421462
@@ -427,11 +468,15 @@ void sm_step(void)
427468 */
428469 svc = svc_clean_completed ();
429470 if (svc ) {
471+ shutdbg ("CLEAN, waiting for %s[%d] (%s) ..." ,
472+ svc_ident (svc , NULL , 0 ), svc -> pid , svc_status (svc ));
430473 dbg ("Waiting to collect post/cleanup script for %s, cmd %s(%d) ..." ,
431474 svc_ident (svc , NULL , 0 ), svc -> cmd , svc -> pid );
432475 break ;
433476 }
434477
478+ shutdbg ("CLEAN done, all post/cleanup scripts collected" );
479+
435480 /* Cleanup stale services */
436481 svc_clean_dynamic (service_unregister );
437482
@@ -447,9 +492,15 @@ void sm_step(void)
447492 * Tannhäuser Gate. All those .. moments .. will be lost in time, like
448493 * tears ... in ... rain."
449494 */
450- if (runlevel == 0 || runlevel == 6 )
495+ if (runlevel == 0 || runlevel == 6 ) {
496+ shutdbg ("calling do_shutdown()" );
497+ uev_timer_stop (& shutdown_wdt );
451498 do_shutdown (halt );
499+ shutdbg ("do_shutdown() returned to SM!" );
500+ }
452501
502+ shutdbg ("entering RUNNING (reload=%d, newlevel=%d)" ,
503+ sm .reload , sm .newlevel );
453504 sm .state = SM_RUNNING_STATE ;
454505 break ;
455506
0 commit comments