Skip to content

Commit 6670e47

Browse files
committed
fix(rpc): more robust shutdown section with PID logging
1 parent a37eee0 commit 6670e47

5 files changed

Lines changed: 359 additions & 27 deletions

File tree

bec_widgets/applications/companion_app.py

Lines changed: 64 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import os
66
import signal
77
import sys
8+
import traceback
89
from contextlib import redirect_stderr, redirect_stdout
910

1011
import darkdetect
@@ -63,6 +64,7 @@ def __init__(self, args):
6364
self.app: QApplication | None = None
6465
self.launcher_window: LaunchWindow | None = None
6566
self.dispatcher: BECDispatcher | None = None
67+
self._shutdown_started = False
6668

6769
def start(self):
6870
"""
@@ -123,17 +125,8 @@ def _run(self):
123125
self.app.aboutToQuit.connect(self.shutdown)
124126
self.app.setQuitOnLastWindowClosed(True)
125127

126-
def sigint_handler(*args):
127-
# display message, for people to let it terminate gracefully
128-
print("Caught SIGINT, exiting")
129-
# Widgets should be all closed.
130-
with RPCRegister.delayed_broadcast():
131-
for widget in QApplication.instance().topLevelWidgets(): # type: ignore
132-
widget.close()
133-
self.shutdown()
134-
135-
signal.signal(signal.SIGINT, sigint_handler)
136-
signal.signal(signal.SIGTERM, sigint_handler)
128+
signal.signal(signal.SIGINT, self.request_shutdown)
129+
signal.signal(signal.SIGTERM, self.request_shutdown)
137130

138131
sys.exit(self.app.exec())
139132

@@ -150,16 +143,67 @@ def setup_bec_icon(self):
150143
)
151144
self.app.setWindowIcon(icon)
152145

146+
def request_shutdown(self, signum=None, _frame=None):
147+
"""
148+
Request Qt application shutdown from an RPC call or OS signal.
149+
150+
Cleanup itself is handled by ``shutdown()``, which is connected to
151+
``QApplication.aboutToQuit``. Calling it directly here would run BEC/RPC
152+
teardown before Qt has processed the widget close events.
153+
"""
154+
signal_name = signal.Signals(signum).name if signum is not None else "shutdown"
155+
pid = os.getpid()
156+
if self.app is None:
157+
logger.info(f"Caught {signal_name}, shutting down GUI server pid={pid} without app")
158+
self.shutdown()
159+
return
160+
161+
widgets = [
162+
f"{widget.__class__.__name__}(objectName={widget.objectName()!r})"
163+
for widget in self.app.topLevelWidgets()
164+
]
165+
logger.info(
166+
f"Caught {signal_name}, requesting GUI server shutdown pid={pid} "
167+
f"top_level_widgets={widgets}"
168+
)
169+
with RPCRegister.delayed_broadcast():
170+
for widget in self.app.topLevelWidgets():
171+
widget.close()
172+
self.app.quit()
173+
174+
@staticmethod
175+
def _run_shutdown_step(step: str, callback):
176+
try:
177+
callback()
178+
except Exception as exc:
179+
logger.error(
180+
f"GUIServer shutdown step failed pid={os.getpid()} step={step}: {exc}\n"
181+
f"{traceback.format_exc()}"
182+
)
183+
153184
def shutdown(self):
154-
logger.info("Shutdown GUIServer", repr(self))
155-
if self.launcher_window and shiboken6.isValid(self.launcher_window):
156-
self.launcher_window.close()
157-
self.launcher_window.deleteLater()
158-
if pylsp_server.is_running():
159-
pylsp_server.stop()
160-
if self.dispatcher:
161-
self.dispatcher.stop_cli_server()
162-
self.dispatcher.disconnect_all()
185+
if self._shutdown_started:
186+
return
187+
self._shutdown_started = True
188+
logger.info(f"Shutdown GUIServer pid={os.getpid()} {repr(self)}")
189+
190+
def close_launcher_window():
191+
if self.launcher_window and shiboken6.isValid(self.launcher_window):
192+
self.launcher_window.close()
193+
self.launcher_window.deleteLater()
194+
195+
def stop_pylsp_server():
196+
if pylsp_server.is_running():
197+
pylsp_server.stop()
198+
199+
def stop_dispatcher():
200+
if self.dispatcher:
201+
self.dispatcher.stop_cli_server()
202+
self.dispatcher.disconnect_all()
203+
204+
self._run_shutdown_step("close_launcher_window", close_launcher_window)
205+
self._run_shutdown_step("stop_pylsp_server", stop_pylsp_server)
206+
self._run_shutdown_step("stop_dispatcher", stop_dispatcher)
163207

164208

165209
def main():

bec_widgets/cli/client_utils.py

Lines changed: 151 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import json
66
import os
77
import select
8+
import signal
89
import subprocess
910
import threading
1011
import time
@@ -33,6 +34,9 @@
3334
logger = bec_logger.logger
3435

3536
IGNORE_WIDGETS = ["LaunchWindow"]
37+
PROCESS_TERMINATION_TIMEOUT = 10
38+
PROCESS_OUTPUT_THREAD_JOIN_TIMEOUT = 2
39+
GRACEFUL_SERVER_SHUTDOWN_TIMEOUT = 5
3640

3741
RegistryState: TypeAlias = dict[
3842
Literal["gui_id", "name", "widget_class", "config", "__rpc__", "container_proxy"],
@@ -75,6 +79,123 @@ def _get_output(process, logger) -> None:
7579
logger.error(f"Error reading process output: {str(e)}")
7680

7781

82+
def _process_group_id(process) -> int | None:
83+
pid = getattr(process, "pid", None)
84+
if os.name != "posix" or not isinstance(pid, int):
85+
return None
86+
try:
87+
return os.getpgid(pid)
88+
except ProcessLookupError:
89+
return None
90+
91+
92+
def _process_details(process) -> str:
93+
args = getattr(process, "args", None)
94+
if isinstance(args, list):
95+
command = " ".join(str(arg) for arg in args)
96+
else:
97+
command = str(args)
98+
return (
99+
f"pid={getattr(process, 'pid', None)} pgid={_process_group_id(process)} command={command}"
100+
)
101+
102+
103+
def _process_group_snapshot(process) -> str:
104+
pgid = _process_group_id(process)
105+
if pgid is None:
106+
return "Process group snapshot unavailable: process group no longer exists"
107+
try:
108+
result = subprocess.run(
109+
["ps", "-o", "pid,ppid,pgid,stat,command", "-g", str(pgid)],
110+
check=False,
111+
capture_output=True,
112+
text=True,
113+
timeout=2,
114+
)
115+
except Exception as exc:
116+
return f"Process group snapshot unavailable: {exc}"
117+
output = result.stdout.strip()
118+
if not output:
119+
return f"Process group snapshot empty for pgid={pgid}"
120+
return output
121+
122+
123+
def _terminate_plot_process(process, logger, timeout: float = PROCESS_TERMINATION_TIMEOUT) -> None:
124+
if process.poll() is not None:
125+
return
126+
127+
process_details = _process_details(process)
128+
try:
129+
pgid = _process_group_id(process)
130+
if pgid is not None:
131+
logger.info(f"Terminating GUI process group {process_details}")
132+
os.killpg(pgid, signal.SIGTERM)
133+
else:
134+
logger.info(f"Terminating GUI process {process_details}")
135+
process.terminate()
136+
except ProcessLookupError:
137+
process.wait(timeout=timeout)
138+
return
139+
except Exception as exc:
140+
logger.warning(
141+
f"Failed to terminate GUI process group: {exc}; terminating process only. "
142+
f"{process_details}"
143+
)
144+
process.terminate()
145+
146+
try:
147+
process.wait(timeout=timeout)
148+
return
149+
except subprocess.TimeoutExpired:
150+
logger.warning(
151+
f"GUI process did not stop within {timeout}s; killing it. "
152+
f"{process_details}\n{_process_group_snapshot(process)}"
153+
)
154+
155+
try:
156+
pgid = _process_group_id(process)
157+
if pgid is not None:
158+
os.killpg(pgid, signal.SIGKILL)
159+
else:
160+
process.kill()
161+
except ProcessLookupError as e:
162+
logger.error(f"Failed to kill GUI process group: {e}")
163+
process.wait(timeout=timeout)
164+
return
165+
process.wait(timeout=timeout)
166+
167+
168+
def _wait_for_process_exit(process, timeout: float) -> bool:
169+
try:
170+
process.wait(timeout=timeout)
171+
except subprocess.TimeoutExpired:
172+
return False
173+
return True
174+
175+
176+
def _join_process_output_thread(process, thread: threading.Thread | None, logger) -> None:
177+
if thread is None:
178+
return
179+
thread.join(timeout=PROCESS_OUTPUT_THREAD_JOIN_TIMEOUT)
180+
if not thread.is_alive():
181+
return
182+
183+
for stream in (process.stdout, process.stderr):
184+
if stream is None:
185+
continue
186+
try:
187+
stream.close()
188+
except OSError as e:
189+
logger.error(f"Failed to close stream {str(e)}")
190+
pass
191+
thread.join(timeout=PROCESS_OUTPUT_THREAD_JOIN_TIMEOUT)
192+
if thread.is_alive():
193+
logger.warning(
194+
"GUI process output reader thread did not stop after process shutdown. "
195+
f"{_process_details(process)}"
196+
)
197+
198+
78199
def _start_plot_process(
79200
gui_id: str,
80201
gui_class_id: str,
@@ -465,11 +586,13 @@ def kill_server(self) -> None:
465586

466587
if self._process:
467588
logger.success("Stopping GUI...")
468-
self._process.terminate()
469-
if self._process_output_processing_thread:
470-
self._process_output_processing_thread.join()
471-
self._process.wait()
589+
if not self._request_server_shutdown():
590+
_terminate_plot_process(self._process, logger)
591+
_join_process_output_thread(
592+
self._process, self._process_output_processing_thread, logger
593+
)
472594
self._process = None
595+
self._process_output_processing_thread = None
473596

474597
# Unregister the registry state
475598
self._client.connector.unregister(
@@ -488,6 +611,30 @@ def close(self):
488611
#### Private methods ####
489612
#########################
490613

614+
def _request_server_shutdown(self) -> bool:
615+
if self._process is None or self._process.poll() is not None:
616+
return True
617+
process_details = _process_details(self._process)
618+
logger.info(f"Requesting graceful GUI shutdown {process_details}")
619+
try:
620+
self.launcher._run_rpc( # pylint: disable=protected-access
621+
"system.shutdown", wait_for_rpc_response=False
622+
)
623+
except Exception as exc:
624+
logger.warning(
625+
f"Could not request graceful GUI shutdown via RPC: {exc}. " f"{process_details}"
626+
)
627+
return False
628+
if _wait_for_process_exit(self._process, GRACEFUL_SERVER_SHUTDOWN_TIMEOUT):
629+
logger.info(f"GUI server exited after graceful shutdown {process_details}")
630+
return True
631+
logger.warning(
632+
"GUI server did not exit after graceful shutdown request; "
633+
f"falling back to process termination. {process_details}\n"
634+
f"{_process_group_snapshot(self._process)}"
635+
)
636+
return False
637+
491638
def _check_if_server_is_alive(self):
492639
"""Checks if the process is alive"""
493640
if self._process is None:

bec_widgets/utils/rpc_server.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from bec_lib.logger import bec_logger
1313
from bec_lib.utils.import_utils import lazy_import
1414
from qtpy.QtCore import Qt, QTimer
15-
from qtpy.QtWidgets import QWidget
15+
from qtpy.QtWidgets import QApplication, QWidget
1616
from redis.exceptions import RedisError
1717

1818
from bec_widgets.utils.bec_connector import BECConnector
@@ -290,10 +290,23 @@ def _resolve_rpc_target(self, obj, method: str) -> tuple[object, object]:
290290
def run_system_rpc(self, method: str, args: list, kwargs: dict):
291291
if method == "system.launch_dock_area":
292292
return self._launch_dock_area(*args, **kwargs)
293+
if method == "system.shutdown":
294+
return self._shutdown_gui_server()
293295
if method == "system.list_capabilities":
294-
return {"system.launch_dock_area": True}
296+
return {"system.launch_dock_area": True, "system.shutdown": True}
295297
raise ValueError(f"Unknown system RPC method: {method}")
296298

299+
@staticmethod
300+
def _shutdown_gui_server() -> None:
301+
app = QApplication.instance()
302+
if app is None:
303+
return
304+
gui_server = getattr(app, "gui_server", None)
305+
if gui_server is not None and hasattr(gui_server, "request_shutdown"):
306+
QTimer.singleShot(0, gui_server.request_shutdown)
307+
return
308+
QTimer.singleShot(0, app.quit)
309+
297310
@staticmethod
298311
def _launch_dock_area(
299312
name: str | None = None,
@@ -468,8 +481,9 @@ def _serialize_bec_connector(self, connector: BECConnector, wait=False) -> dict:
468481
container_proxy = parent.gui_id
469482
else:
470483
container_proxy = None
471-
except Exception:
484+
except Exception as e:
472485
container_proxy = None
486+
logger.error(f"Error while serializing RPC result: {e}")
473487

474488
if wait and not self.rpc_register.object_is_registered(connector):
475489
raise RegistryNotReadyError(f"Connector {connector} not registered yet")

0 commit comments

Comments
 (0)