Skip to content

Commit edd7f4d

Browse files
committed
fix: make sure daemons are brought down cleanly
1 parent 7d9a0dc commit edd7f4d

3 files changed

Lines changed: 91 additions & 47 deletions

File tree

src/cocoindex_code/cli.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -467,26 +467,26 @@ def daemon_restart() -> None:
467467
@daemon_app.command("stop")
468468
def daemon_stop() -> None:
469469
"""Stop the daemon."""
470-
from .client import stop_daemon
470+
from .client import is_daemon_running, stop_daemon
471471
from .daemon import daemon_pid_path
472472

473473
pid_path = daemon_pid_path()
474-
if not pid_path.exists():
474+
if not pid_path.exists() and not is_daemon_running():
475475
_typer.echo("Daemon is not running.")
476476
return
477477

478478
stop_daemon()
479479

480-
# Wait for process to exit
480+
# Wait for process to exit (check both pid file and socket)
481481
import time
482482

483483
deadline = time.monotonic() + 5.0
484484
while time.monotonic() < deadline:
485-
if not pid_path.exists():
485+
if not pid_path.exists() and not is_daemon_running():
486486
break
487487
time.sleep(0.1)
488488

489-
if pid_path.exists():
489+
if pid_path.exists() or is_daemon_running():
490490
_typer.echo("Warning: daemon may not have stopped cleanly.", err=True)
491491
else:
492492
_typer.echo("Daemon stopped.")

src/cocoindex_code/client.py

Lines changed: 74 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -205,12 +205,36 @@ def _find_ccc_executable() -> str | None:
205205
return None
206206

207207

208+
def _pid_alive(pid: int) -> bool:
209+
"""Return True if *pid* is still running."""
210+
try:
211+
os.kill(pid, 0) # signal 0: check existence without killing
212+
return True
213+
except ProcessLookupError:
214+
return False
215+
except PermissionError:
216+
return True # process exists but we can't signal it
217+
218+
208219
def stop_daemon() -> None:
209220
"""Stop the daemon gracefully.
210221
211-
Sends a StopRequest, waits for the process to exit, falls back to SIGTERM.
222+
Sends a StopRequest, waits for the process to exit, falls back to
223+
SIGTERM → SIGKILL. Only removes the PID file after confirming that
224+
the specific PID is no longer alive.
212225
"""
213-
# Step 1: try sending StopRequest
226+
pid_path = daemon_pid_path()
227+
228+
# Read the PID early so we can track the actual process.
229+
pid: int | None = None
230+
try:
231+
pid = int(pid_path.read_text().strip())
232+
if pid == os.getpid():
233+
pid = None # safety: never kill ourselves
234+
except (FileNotFoundError, ValueError):
235+
pass
236+
237+
# Step 1: try sending StopRequest via socket
214238
try:
215239
client = DaemonClient.connect()
216240
client.handshake()
@@ -220,65 +244,78 @@ def stop_daemon() -> None:
220244
pass
221245

222246
# Step 2: wait for process to exit (up to 5s)
223-
pid_path = daemon_pid_path()
224-
deadline = time.monotonic() + 5.0
225-
while time.monotonic() < deadline and pid_path.exists():
226-
time.sleep(0.1)
227-
228-
if not pid_path.exists():
229-
return # Clean exit
247+
if pid is not None:
248+
deadline = time.monotonic() + 5.0
249+
while time.monotonic() < deadline and _pid_alive(pid):
250+
time.sleep(0.1)
251+
if not _pid_alive(pid):
252+
_cleanup_stale_files(pid_path, pid)
253+
return
230254

231255
# Step 3: if still running, try SIGTERM
232-
pid: int | None = None
233-
if pid_path.exists():
256+
if pid is not None and _pid_alive(pid):
234257
try:
235-
pid = int(pid_path.read_text().strip())
236-
if pid != os.getpid():
237-
os.kill(pid, signal.SIGTERM)
238-
else:
239-
pid = None
240-
except (ValueError, ProcessLookupError, PermissionError):
258+
os.kill(pid, signal.SIGTERM)
259+
except (ProcessLookupError, PermissionError):
241260
pass
242261

243-
# Wait a bit more
244262
deadline = time.monotonic() + 2.0
245-
while time.monotonic() < deadline and pid_path.exists():
263+
while time.monotonic() < deadline and _pid_alive(pid):
246264
time.sleep(0.1)
247265

248-
# Step 4: if still running, escalate to SIGKILL (Unix only;
266+
if not _pid_alive(pid):
267+
_cleanup_stale_files(pid_path, pid)
268+
return
269+
270+
# Step 4: escalate to SIGKILL (Unix only;
249271
# on Windows SIGTERM already calls TerminateProcess)
250-
if sys.platform != "win32" and pid_path.exists():
272+
if sys.platform != "win32" and pid is not None and _pid_alive(pid):
251273
try:
252-
pid = int(pid_path.read_text().strip())
253-
if pid != os.getpid():
254-
os.kill(pid, signal.SIGKILL)
255-
else:
256-
pid = None
257-
except (ValueError, ProcessLookupError, PermissionError):
274+
os.kill(pid, signal.SIGKILL)
275+
except (ProcessLookupError, PermissionError):
258276
pass
259277

278+
# SIGKILL is async; give the kernel a moment to reap
279+
deadline = time.monotonic() + 1.0
280+
while time.monotonic() < deadline and _pid_alive(pid):
281+
time.sleep(0.1)
282+
260283
# Step 4b: on Windows, wait for the process to fully exit after TerminateProcess
261284
# so that named pipe handles are released before starting a new daemon.
262285
if sys.platform == "win32" and pid is not None:
263286
deadline = time.monotonic() + 3.0
264-
while time.monotonic() < deadline:
265-
try:
266-
os.kill(pid, 0) # Check if process still exists
267-
time.sleep(0.1)
268-
except (ProcessLookupError, PermissionError, OSError):
269-
break # Process has exited
287+
while time.monotonic() < deadline and _pid_alive(pid):
288+
time.sleep(0.1)
270289

271290
# Step 5: clean up stale files
291+
_cleanup_stale_files(pid_path, pid)
292+
293+
294+
def _cleanup_stale_files(pid_path: Path, pid: int | None) -> None:
295+
"""Remove socket and PID file after the daemon has exited.
296+
297+
Only removes the PID file when *pid* matches what is on disk, to
298+
avoid accidentally deleting a newer daemon's PID file.
299+
"""
272300
if sys.platform != "win32":
273301
sock = daemon_socket_path()
274302
try:
275303
Path(sock).unlink(missing_ok=True)
276304
except Exception:
277305
pass
278-
try:
279-
pid_path.unlink(missing_ok=True)
280-
except Exception:
281-
pass
306+
if pid is not None:
307+
try:
308+
stored = pid_path.read_text().strip()
309+
if stored == str(pid):
310+
pid_path.unlink(missing_ok=True)
311+
except (FileNotFoundError, ValueError):
312+
pass
313+
else:
314+
# No PID known — cautiously remove if file exists
315+
try:
316+
pid_path.unlink(missing_ok=True)
317+
except Exception:
318+
pass
282319

283320

284321
def _wait_for_daemon(timeout: float = 30.0) -> None:

src/cocoindex_code/daemon.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -447,17 +447,24 @@ def run_daemon() -> None:
447447
try:
448448
asyncio.run(_async_daemon_main(embedder))
449449
finally:
450-
# Clean up PID file and socket (named pipes on Windows clean up automatically)
451-
try:
452-
pid_path.unlink(missing_ok=True)
453-
except Exception:
454-
pass
450+
# Clean up socket first, then PID file last.
451+
# The PID file is the authoritative "daemon is alive" indicator, so it
452+
# must be the very last thing removed to avoid races where a client
453+
# sees the PID gone but the socket (or process) is still lingering.
455454
if sys.platform != "win32":
456455
sock = daemon_socket_path()
457456
try:
458457
Path(sock).unlink(missing_ok=True)
459458
except Exception:
460459
pass
460+
# Only remove the PID file if it still contains *our* PID.
461+
# A new daemon may have already overwritten it during a restart race.
462+
try:
463+
stored = pid_path.read_text().strip()
464+
if stored == str(os.getpid()):
465+
pid_path.unlink(missing_ok=True)
466+
except Exception:
467+
pass
461468
logger.info("Daemon stopped")
462469

463470

0 commit comments

Comments
 (0)