Skip to content

Commit f15bd1b

Browse files
Frank Guoclaude
andcommitted
Fix daemon: non-blocking spawn, skip in test binaries
ensureDaemon was blocking for 10s polling for the daemon socket, causing CI integration tests to timeout. Redesigned: connectDaemon only tries an existing socket; spawnDaemon fires-and-forgets a background daemon for future calls. NewClient falls back to in-process immediately when no daemon is running. Also skips spawning from test binaries (*.test). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent b3ef5cd commit f15bd1b

2 files changed

Lines changed: 36 additions & 53 deletions

File tree

cmd/rekal/cli/nomic/client.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,25 +12,27 @@ type Client struct {
1212
}
1313

1414
// NewClient creates a Client that tries the daemon first for fast embedding.
15-
// If the daemon is unavailable, it falls back to loading the model in-process.
15+
// If no daemon is running, it falls back to loading the model in-process and
16+
// spawns a daemon in the background for future invocations.
1617
// gitRoot is the git repository root (used to locate .rekal/nomic/).
1718
func NewClient(gitRoot string) (*Client, error) {
1819
if !Supported() {
1920
return nil, ErrNotSupported
2021
}
2122

22-
// Try daemon first.
23-
dc, err := ensureDaemon(gitRoot)
23+
// Try connecting to a running daemon.
24+
dc, err := connectDaemon(gitRoot)
2425
if err == nil {
2526
return &Client{daemon: dc}, nil
2627
}
2728

28-
// Fall back to in-process.
29+
// No daemon running — fall back to in-process and spawn one for next time.
2930
cacheDir := filepath.Join(gitRoot, ".rekal", "nomic")
3031
embedder, err := NewEmbedder(cacheDir)
3132
if err != nil {
3233
return nil, err
3334
}
35+
spawnDaemon(gitRoot)
3436
return &Client{embedder: embedder}, nil
3537
}
3638

cmd/rekal/cli/nomic/daemon.go

Lines changed: 30 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ import (
2020
const (
2121
idleTimeout = 5 * time.Minute
2222
dialTimeout = 2 * time.Second
23-
startPoll = 10 * time.Second
2423
)
2524

2625
// daemonRequest is the JSON wire format for client→daemon messages.
@@ -281,73 +280,55 @@ func (c *daemonClient) Close() {
281280
c.conn.Close() //nolint:errcheck
282281
}
283282

284-
// ensureDaemon connects to an existing daemon or spawns a new one.
285-
// Returns a connected daemonClient, or an error if the daemon cannot be reached.
286-
func ensureDaemon(gitRoot string) (*daemonClient, error) {
283+
// connectDaemon tries to connect to a running daemon.
284+
// Returns a connected daemonClient, or an error if no daemon is reachable.
285+
// Does NOT spawn a new daemon — use spawnDaemon for that.
286+
func connectDaemon(gitRoot string) (*daemonClient, error) {
287287
sock := socketPath(gitRoot)
288288

289-
// Try connecting to existing daemon.
290-
if conn, err := net.DialTimeout("unix", sock, dialTimeout); err == nil {
291-
dc := &daemonClient{conn: conn}
292-
if err := dc.ping(); err == nil {
293-
return dc, nil
294-
}
289+
conn, err := net.DialTimeout("unix", sock, dialTimeout)
290+
if err != nil {
291+
return nil, fmt.Errorf("nomic: no daemon running")
292+
}
293+
dc := &daemonClient{conn: conn}
294+
if err := dc.ping(); err != nil {
295295
dc.Close()
296+
return nil, fmt.Errorf("nomic: daemon not responding: %w", err)
296297
}
298+
return dc, nil
299+
}
300+
301+
// spawnDaemon launches a daemon process in the background.
302+
// It does not wait for the daemon to become ready — callers should
303+
// fall back to in-process embedding and benefit from the daemon on
304+
// subsequent invocations.
305+
func spawnDaemon(gitRoot string) {
306+
sock := socketPath(gitRoot)
297307

298-
// Stale socket/pid — clean up.
308+
// Clean up stale socket/pid.
299309
os.Remove(sock) //nolint:errcheck
300310
os.Remove(pidPath(gitRoot)) //nolint:errcheck
301311

302-
// Spawn daemon process.
303312
exe, err := os.Executable()
304313
if err != nil {
305-
return nil, fmt.Errorf("nomic: resolve executable: %w", err)
314+
return
315+
}
316+
317+
// Don't spawn from test binaries — they can't serve the daemon command.
318+
if strings.HasSuffix(exe, ".test") || strings.Contains(exe, "/_test/") {
319+
return
306320
}
307321

308322
cmd := exec.Command(exe, "_nomic-daemon", "--git-root", gitRoot)
309323
cmd.Stdout = nil
310324
cmd.Stderr = nil
311325
cmd.Stdin = nil
312-
// Detach from parent process group.
313326
setSysProcAttr(cmd)
314327
if err := cmd.Start(); err != nil {
315-
return nil, fmt.Errorf("nomic: start daemon: %w", err)
328+
return
316329
}
317-
318-
// Wait in background so we can detect early exit.
319-
waitCh := make(chan error, 1)
320-
go func() { waitCh <- cmd.Wait() }()
321-
322-
// Poll for socket readiness.
323-
deadline := time.Now().Add(startPoll)
324-
for time.Now().Before(deadline) {
325-
// Check if daemon already exited (failed to start).
326-
select {
327-
case err := <-waitCh:
328-
if err != nil {
329-
return nil, fmt.Errorf("nomic: daemon exited: %w", err)
330-
}
331-
return nil, fmt.Errorf("nomic: daemon exited unexpectedly")
332-
default:
333-
}
334-
335-
time.Sleep(100 * time.Millisecond)
336-
conn, err := net.DialTimeout("unix", sock, dialTimeout)
337-
if err != nil {
338-
continue
339-
}
340-
dc := &daemonClient{conn: conn}
341-
if err := dc.ping(); err == nil {
342-
return dc, nil
343-
}
344-
dc.Close()
345-
}
346-
347-
// Timed out — kill the daemon process to avoid leaked goroutines.
348-
cmd.Process.Kill() //nolint:errcheck
349-
<-waitCh
350-
return nil, fmt.Errorf("nomic: daemon did not start within %v", startPoll)
330+
// Fully detach — don't wait on the child.
331+
go cmd.Wait() //nolint:errcheck
351332
}
352333

353334
// NewDaemonCmd returns the hidden _nomic-daemon cobra command.

0 commit comments

Comments
 (0)