Skip to content

Commit 50f7c9d

Browse files
committed
fix: make sure daemons are brought down cleanly
1 parent 57e7a20 commit 50f7c9d

3 files changed

Lines changed: 89 additions & 36 deletions

File tree

src/cocoindex_code/cli.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -467,26 +467,26 @@ def daemon_restart() -> None:
467467
@daemon_app.command("stop")
468468
def daemon_stop() -> None:
469469
"""Stop the daemon."""
470-
from .client import stop_daemon
470+
from .client import is_daemon_running, stop_daemon
471471
from .daemon import daemon_pid_path
472472

473473
pid_path = daemon_pid_path()
474-
if not pid_path.exists():
474+
if not pid_path.exists() and not is_daemon_running():
475475
_typer.echo("Daemon is not running.")
476476
return
477477

478478
stop_daemon()
479479

480-
# Wait for process to exit
480+
# Wait for process to exit (check both pid file and socket)
481481
import time
482482

483483
deadline = time.monotonic() + 5.0
484484
while time.monotonic() < deadline:
485-
if not pid_path.exists():
485+
if not pid_path.exists() and not is_daemon_running():
486486
break
487487
time.sleep(0.1)
488488

489-
if pid_path.exists():
489+
if pid_path.exists() or is_daemon_running():
490490
_typer.echo("Warning: daemon may not have stopped cleanly.", err=True)
491491
else:
492492
_typer.echo("Daemon stopped.")

src/cocoindex_code/client.py

Lines changed: 72 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -196,12 +196,36 @@ def _find_ccc_executable() -> str | None:
196196
return None
197197

198198

199+
def _pid_alive(pid: int) -> bool:
200+
"""Return True if *pid* is still running."""
201+
try:
202+
os.kill(pid, 0) # signal 0: check existence without killing
203+
return True
204+
except ProcessLookupError:
205+
return False
206+
except PermissionError:
207+
return True # process exists but we can't signal it
208+
209+
199210
def stop_daemon() -> None:
200211
"""Stop the daemon gracefully.
201212
202-
Sends a StopRequest, waits for the process to exit, falls back to SIGTERM.
213+
Sends a StopRequest, waits for the process to exit, falls back to
214+
SIGTERM → SIGKILL. Only removes the PID file after confirming that
215+
the specific PID is no longer alive.
203216
"""
204-
# Step 1: try sending StopRequest
217+
pid_path = daemon_pid_path()
218+
219+
# Read the PID early so we can track the actual process.
220+
pid: int | None = None
221+
try:
222+
pid = int(pid_path.read_text().strip())
223+
if pid == os.getpid():
224+
pid = None # safety: never kill ourselves
225+
except (FileNotFoundError, ValueError):
226+
pass
227+
228+
# Step 1: try sending StopRequest via socket
205229
try:
206230
client = DaemonClient.connect()
207231
client.handshake()
@@ -211,49 +235,71 @@ def stop_daemon() -> None:
211235
pass
212236

213237
# Step 2: wait for process to exit (up to 5s)
214-
pid_path = daemon_pid_path()
215-
deadline = time.monotonic() + 5.0
216-
while time.monotonic() < deadline and pid_path.exists():
217-
time.sleep(0.1)
218-
219-
if not pid_path.exists():
220-
return # Clean exit
238+
if pid is not None:
239+
deadline = time.monotonic() + 5.0
240+
while time.monotonic() < deadline and _pid_alive(pid):
241+
time.sleep(0.1)
242+
if not _pid_alive(pid):
243+
_cleanup_stale_files(pid_path, pid)
244+
return
221245

222246
# Step 3: if still running, try SIGTERM
223-
if pid_path.exists():
247+
if pid is not None and _pid_alive(pid):
224248
try:
225-
pid = int(pid_path.read_text().strip())
226-
if pid != os.getpid():
227-
os.kill(pid, signal.SIGTERM)
228-
except (ValueError, ProcessLookupError, PermissionError):
249+
os.kill(pid, signal.SIGTERM)
250+
except (ProcessLookupError, PermissionError):
229251
pass
230252

231-
# Wait a bit more
232253
deadline = time.monotonic() + 2.0
233-
while time.monotonic() < deadline and pid_path.exists():
254+
while time.monotonic() < deadline and _pid_alive(pid):
234255
time.sleep(0.1)
235256

236-
# Step 4: if still running, escalate to SIGKILL (Unix only;
257+
if not _pid_alive(pid):
258+
_cleanup_stale_files(pid_path, pid)
259+
return
260+
261+
# Step 4: escalate to SIGKILL (Unix only;
237262
# on Windows SIGTERM already calls TerminateProcess)
238-
if sys.platform != "win32" and pid_path.exists():
263+
if sys.platform != "win32" and pid is not None and _pid_alive(pid):
239264
try:
240-
pid = int(pid_path.read_text().strip())
241-
if pid != os.getpid():
242-
os.kill(pid, signal.SIGKILL)
243-
except (ValueError, ProcessLookupError, PermissionError):
265+
os.kill(pid, signal.SIGKILL)
266+
except (ProcessLookupError, PermissionError):
244267
pass
245268

269+
# SIGKILL is async; give the kernel a moment to reap
270+
deadline = time.monotonic() + 1.0
271+
while time.monotonic() < deadline and _pid_alive(pid):
272+
time.sleep(0.1)
273+
246274
# Step 5: clean up stale files
275+
_cleanup_stale_files(pid_path, pid)
276+
277+
278+
def _cleanup_stale_files(pid_path: Path, pid: int | None) -> None:
279+
"""Remove socket and PID file after the daemon has exited.
280+
281+
Only removes the PID file when *pid* matches what is on disk, to
282+
avoid accidentally deleting a newer daemon's PID file.
283+
"""
247284
if sys.platform != "win32":
248285
sock = daemon_socket_path()
249286
try:
250287
Path(sock).unlink(missing_ok=True)
251288
except Exception:
252289
pass
253-
try:
254-
pid_path.unlink(missing_ok=True)
255-
except Exception:
256-
pass
290+
if pid is not None:
291+
try:
292+
stored = pid_path.read_text().strip()
293+
if stored == str(pid):
294+
pid_path.unlink(missing_ok=True)
295+
except (FileNotFoundError, ValueError):
296+
pass
297+
else:
298+
# No PID known — cautiously remove if file exists
299+
try:
300+
pid_path.unlink(missing_ok=True)
301+
except Exception:
302+
pass
257303

258304

259305
def _wait_for_daemon(timeout: float = 10.0) -> None:

src/cocoindex_code/daemon.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -447,17 +447,24 @@ def run_daemon() -> None:
447447
try:
448448
asyncio.run(_async_daemon_main(embedder))
449449
finally:
450-
# Clean up PID file and socket (named pipes on Windows clean up automatically)
451-
try:
452-
pid_path.unlink(missing_ok=True)
453-
except Exception:
454-
pass
450+
# Clean up socket first, then PID file last.
451+
# The PID file is the authoritative "daemon is alive" indicator, so it
452+
# must be the very last thing removed to avoid races where a client
453+
# sees the PID gone but the socket (or process) is still lingering.
455454
if sys.platform != "win32":
456455
sock = daemon_socket_path()
457456
try:
458457
Path(sock).unlink(missing_ok=True)
459458
except Exception:
460459
pass
460+
# Only remove the PID file if it still contains *our* PID.
461+
# A new daemon may have already overwritten it during a restart race.
462+
try:
463+
stored = pid_path.read_text().strip()
464+
if stored == str(os.getpid()):
465+
pid_path.unlink(missing_ok=True)
466+
except Exception:
467+
pass
461468
logger.info("Daemon stopped")
462469

463470

0 commit comments

Comments
 (0)