Skip to content

Commit 64d9f25

Browse files
committed
fix(rpc): client/server rpc handshake for shutdown
1 parent 674048d commit 64d9f25

4 files changed

Lines changed: 67 additions & 15 deletions

File tree

bec_widgets/cli/client_utils.py

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
PROCESS_TERMINATION_TIMEOUT = 10
3838
PROCESS_OUTPUT_THREAD_JOIN_TIMEOUT = 2
3939
PROCESS_OUTPUT_SELECT_TIMEOUT = 0.2
40+
GRACEFUL_SERVER_SHUTDOWN_RPC_TIMEOUT = 3
4041
GRACEFUL_SERVER_SHUTDOWN_TIMEOUT = 5
4142
OUTPUT_READER_STOP_EVENT_ATTR = "_bec_output_reader_stop_event"
4243

@@ -141,19 +142,18 @@ def _terminate_plot_process(process, logger, timeout: float = PROCESS_TERMINATIO
141142
process.wait(timeout=timeout)
142143
return
143144
except Exception as exc:
144-
logger.warning(
145-
f"Failed to terminate GUI process group: {exc}; terminating process only. "
146-
f"{process_details}"
147-
)
145+
logger.warning("Failed to terminate GUI process group; terminating process only.")
146+
logger.info(f"GUI process termination failure details: {exc}. {process_details}")
148147
process.terminate()
149148

150149
try:
151150
process.wait(timeout=timeout)
152151
return
153152
except subprocess.TimeoutExpired:
154-
logger.warning(
155-
f"GUI process did not stop within {timeout}s; killing it. "
156-
f"{process_details}\n{_process_group_snapshot(process)}"
153+
logger.warning(f"GUI process did not stop within {timeout}s; killing it.")
154+
logger.info(
155+
f"GUI process force-kill details: {process_details}\n"
156+
f"{_process_group_snapshot(process)}"
157157
)
158158

159159
try:
@@ -196,10 +196,8 @@ def _join_process_output_thread(process, thread: threading.Thread | None, logger
196196
logger.error(f"Failed to close stream {str(e)}")
197197
thread.join(timeout=PROCESS_OUTPUT_THREAD_JOIN_TIMEOUT)
198198
if thread.is_alive():
199-
logger.warning(
200-
"GUI process output reader thread did not stop after process shutdown. "
201-
f"{_process_details(process)}"
202-
)
199+
logger.warning("GUI process output reader thread did not stop after process shutdown.")
200+
logger.info(f"GUI process output reader thread details: {_process_details(process)}")
203201

204202

205203
def _start_plot_process(
@@ -630,19 +628,26 @@ def _request_server_shutdown(self) -> bool:
630628
logger.info(f"Requesting graceful GUI shutdown {process_details}")
631629
try:
632630
self.launcher._run_rpc( # pylint: disable=protected-access
633-
"system.shutdown", wait_for_rpc_response=False
631+
"system.shutdown",
632+
wait_for_rpc_response=True,
633+
timeout=GRACEFUL_SERVER_SHUTDOWN_RPC_TIMEOUT,
634634
)
635635
except Exception as exc:
636636
logger.warning(
637-
f"Could not request graceful GUI shutdown via RPC: {exc}. " f"{process_details}"
637+
"Could not confirm graceful GUI shutdown via RPC; "
638+
"falling back to process termination."
638639
)
640+
logger.info(f"Graceful GUI shutdown RPC failure details: {exc}. {process_details}")
639641
return False
640642
if _wait_for_process_exit(self._process, GRACEFUL_SERVER_SHUTDOWN_TIMEOUT):
641643
logger.info(f"GUI server exited after graceful shutdown {process_details}")
642644
return True
643645
logger.warning(
644646
"GUI server did not exit after graceful shutdown request; "
645-
f"falling back to process termination. {process_details}\n"
647+
"falling back to process termination."
648+
)
649+
logger.info(
650+
f"Graceful GUI shutdown timeout details: {process_details}\n"
646651
f"{_process_group_snapshot(self._process)}"
647652
)
648653
return False

bec_widgets/utils/rpc_server.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,34 @@ def on_rpc_update(self, msg: dict, metadata: dict):
143143
f"receive_latency_s={format_elapsed(receive_latency)}"
144144
)
145145
logger.debug(f"Received RPC instruction: {msg}, metadata: {metadata}")
146+
147+
# Shutdown must acknowledge before teardown starts. The generic RPC path
148+
# below publishes successful responses through QTimer.singleShot(0);
149+
# for system.shutdown that would race with the queued app quit and
150+
# dispatcher shutdown scheduled by _shutdown_gui_server().
151+
if method == "system.shutdown":
152+
execution_start = time.perf_counter()
153+
try:
154+
self.run_system_rpc(method, args, kwargs)
155+
except Exception:
156+
execution_duration = time.perf_counter() - execution_start
157+
content = traceback.format_exc()
158+
logger.error(
159+
"GUI RPC server shutdown request failed "
160+
f"request_id={request_id} method={method} gui_id={self.gui_id} "
161+
f"execution_duration_s={execution_duration:.3f}\n{content}"
162+
)
163+
self.send_response(request_id, False, {"error": content})
164+
else:
165+
execution_duration = time.perf_counter() - execution_start
166+
logger.info(
167+
"GUI RPC server acknowledged shutdown request "
168+
f"request_id={request_id} method={method} gui_id={self.gui_id} "
169+
f"execution_duration_s={execution_duration:.3f}"
170+
)
171+
self.send_response(request_id, True, {"result": None})
172+
return
173+
146174
execution_start = time.perf_counter()
147175
with rpc_exception_hook(functools.partial(self.send_response, request_id, False)):
148176
try:

tests/unit_tests/test_client_utils.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
from bec_widgets.cli.client import BECDockArea
99
from bec_widgets.cli.client_utils import (
10+
GRACEFUL_SERVER_SHUTDOWN_RPC_TIMEOUT,
1011
OUTPUT_READER_STOP_EVENT_ATTR,
1112
BECGuiClient,
1213
_join_process_output_thread,
@@ -318,7 +319,9 @@ def test_client_utils_kill_server_requests_graceful_shutdown_before_signal():
318319
launcher_prop.return_value = launcher
319320
gui.kill_server()
320321

321-
launcher._run_rpc.assert_called_once_with("system.shutdown", wait_for_rpc_response=False)
322+
launcher._run_rpc.assert_called_once_with(
323+
"system.shutdown", wait_for_rpc_response=True, timeout=GRACEFUL_SERVER_SHUTDOWN_RPC_TIMEOUT
324+
)
322325
process.wait.assert_called_once_with(timeout=5)
323326
killpg.assert_not_called()
324327
assert gui._process is None

tests/unit_tests/test_rpc_server.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,22 @@ def test_rpc_server_system_shutdown_requests_gui_server_shutdown(rpc_server, qap
106106
del qapp.gui_server
107107

108108

109+
def test_on_rpc_update_system_shutdown_sends_response_before_return(rpc_server):
110+
order = []
111+
rpc_server.run_system_rpc = MagicMock(side_effect=lambda *_args: order.append("shutdown"))
112+
rpc_server.send_response = MagicMock(side_effect=lambda *_args: order.append("response"))
113+
rpc_server.serialize_result_and_send = MagicMock()
114+
115+
rpc_server.on_rpc_update(
116+
{"action": "system.shutdown", "parameter": {"args": [], "kwargs": {}}},
117+
{"request_id": "shutdown-request", "sent_at": 1.0, "deadline": 10.0, "timeout": 2},
118+
)
119+
120+
assert order == ["shutdown", "response"]
121+
rpc_server.send_response.assert_called_once_with("shutdown-request", True, {"result": None})
122+
rpc_server.serialize_result_and_send.assert_not_called()
123+
124+
109125
def test_singleshot_rpc_repeat_raises_on_repeated_singleshot(rpc_server):
110126
"""
111127
Test that a singleshot RPC method raises an error when called multiple times.

0 commit comments

Comments
 (0)