Skip to content

Commit af96288

Browse files
committed
Fixed restart behavior on the same instance, Hardened shutdown race handling
1 parent b5363e3 commit af96288

1 file changed

Lines changed: 38 additions & 11 deletions

File tree

feature_integration_tests/test_cases/daemon_helpers.py

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
import time
2626
from collections.abc import Generator
2727
from pathlib import Path
28-
from typing import Any
28+
from typing import Any, TextIO
2929

3030
import pytest
3131
from testing_utils import BazelTools
@@ -155,6 +155,7 @@ def __init__(self, daemon_binary: Path, config_file: Path, working_dir: Path):
155155
self.working_dir = working_dir
156156
self.process: subprocess.Popen | None = None
157157
self.log_file: Path | None = None
158+
self._log_fd: TextIO | None = None
158159

159160
def start(self, startup_timeout: float = 2.0) -> None:
160161
"""
@@ -165,19 +166,23 @@ def start(self, startup_timeout: float = 2.0) -> None:
165166
startup_timeout : float
166167
Time to wait after starting the daemon (seconds).
167168
"""
169+
# If a stale process reference exists from a previous run, clear it.
170+
if self.process is not None and self.process.poll() is not None:
171+
self.process = None
172+
168173
if self.process is not None:
169174
raise RuntimeError("Daemon already started")
170175

171176
# Create log file
172177
self.log_file = self.working_dir / "launch_manager.log"
173-
log_fd = open(self.log_file, "w")
178+
self._log_fd = open(self.log_file, "w")
174179

175180
# Start daemon process
176181
cmd = [str(self.daemon_binary), str(self.config_file)]
177182
self.process = subprocess.Popen(
178183
cmd,
179184
cwd=self.working_dir,
180-
stdout=log_fd,
185+
stdout=self._log_fd,
181186
stderr=subprocess.STDOUT,
182187
text=True,
183188
)
@@ -187,9 +192,12 @@ def start(self, startup_timeout: float = 2.0) -> None:
187192

188193
# Check if daemon is still running
189194
if self.process.poll() is not None:
195+
return_code = self.process.returncode
190196
log_content = self.log_file.read_text() if self.log_file.exists() else "No logs available"
197+
self._close_log_fd()
198+
self.process = None
191199
raise RuntimeError(
192-
f"Launch Manager daemon failed to start. Exit code: {self.process.returncode}\nLogs:\n{log_content}"
200+
f"Launch Manager daemon failed to start. Exit code: {return_code}\nLogs:\n{log_content}"
193201
)
194202

195203
def stop(self, shutdown_timeout: float = 5.0) -> None:
@@ -202,17 +210,36 @@ def stop(self, shutdown_timeout: float = 5.0) -> None:
202210
Maximum time to wait for graceful shutdown (seconds).
203211
"""
204212
if self.process is None:
213+
self._close_log_fd()
205214
return
206215

207-
# Send SIGTERM for graceful shutdown
208-
self.process.send_signal(signal.SIGTERM)
216+
try:
217+
# Send SIGTERM for graceful shutdown if still running.
218+
if self.process.poll() is None:
219+
try:
220+
self.process.send_signal(signal.SIGTERM)
221+
except ProcessLookupError:
222+
pass
223+
224+
try:
225+
self.process.wait(timeout=shutdown_timeout)
226+
except subprocess.TimeoutExpired:
227+
# Force kill if graceful shutdown fails.
228+
self.process.kill()
229+
self.process.wait()
230+
finally:
231+
self.process = None
232+
self._close_log_fd()
233+
234+
def _close_log_fd(self) -> None:
235+
"""Close daemon log file descriptor if it is open."""
236+
if self._log_fd is None:
237+
return
209238

210239
try:
211-
self.process.wait(timeout=shutdown_timeout)
212-
except subprocess.TimeoutExpired:
213-
# Force kill if graceful shutdown fails
214-
self.process.kill()
215-
self.process.wait()
240+
self._log_fd.close()
241+
finally:
242+
self._log_fd = None
216243

217244
def __enter__(self):
218245
self.start()

0 commit comments

Comments
 (0)