Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 23 additions & 4 deletions backend/internal/httpd/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"net/http"
"os"
"sync"
"syscall"
"time"

"github.com/aoagents/agent-orchestrator/backend/internal/config"
Expand All @@ -30,14 +31,32 @@ type Server struct {
}

// NewWithDeps constructs a Server with API dependencies supplied by the daemon
// and binds the listener immediately so a port conflict fails fast — before any
// running.json is written. The caller owns the returned Server's lifecycle via
// Run. termMgr may be nil, in which case the /mux terminal surface is not mounted.
// and binds the listener immediately, before any running.json is written. The
// caller owns the returned Server's lifecycle via Run. termMgr may be nil, in
// which case the /mux terminal surface is not mounted.
//
// If the configured port is already held, it falls back to an OS-assigned
// ephemeral port rather than failing. A genuine peer AO daemon is ruled out
// upstream (the running.json + /healthz check in daemon.Run), so a conflict here
// means a non-AO process owns the port; exiting would only leave the desktop
// supervisor stuck on "daemon not ready". The actual bound port is logged
// ("daemon listening") and written to running.json, both of which the supervisor
// reads, so the fallback propagates to the renderer with no UI changes.
func NewWithDeps(cfg config.Config, log *slog.Logger, termMgr *terminal.Manager, deps APIDeps) (*Server, error) {
log = loggerOrDefault(log)
ln, err := net.Listen("tcp", cfg.Addr())
if err != nil {
return nil, fmt.Errorf("bind %s (is a daemon already running?): %w", cfg.Addr(), err)
if !errors.Is(err, syscall.EADDRINUSE) {
return nil, fmt.Errorf("bind %s: %w", cfg.Addr(), err)
}
// Configured port is taken by a non-AO process: retry on an ephemeral port.
fallback, ferr := net.Listen("tcp", net.JoinHostPort(cfg.Host, "0"))
if ferr != nil {
return nil, fmt.Errorf("bind %s (in use) and ephemeral fallback: %w", cfg.Addr(), ferr)
}
log.Warn("configured port in use; bound an ephemeral port instead",
"configured", cfg.Addr(), "bound", fallback.Addr().String())
ln = fallback
}

srv := &Server{
Expand Down
23 changes: 17 additions & 6 deletions backend/internal/httpd/server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,9 +193,10 @@ func waitForHealth(t *testing.T, base string) {
t.Fatal("server did not become healthy within timeout")
}

// TestNewFailsOnPortConflict confirms a second bind of the same port fails
// fast rather than silently sharing it.
func TestNewFailsOnPortConflict(t *testing.T) {
// TestNewFallsBackOnPortConflict confirms that when the configured port is
// already held, the constructor binds an ephemeral port instead of failing, so
// the desktop supervisor never gets stuck on "daemon not ready".
func TestNewFallsBackOnPortConflict(t *testing.T) {
cfg := config.Config{Host: "127.0.0.1", Port: 0, RunFilePath: filepath.Join(t.TempDir(), "r.json")}

first, err := NewWithDeps(cfg, discardLogger(), nil, APIDeps{})
Expand All @@ -204,9 +205,19 @@ func TestNewFailsOnPortConflict(t *testing.T) {
}
defer first.listen.Close()

// Re-bind the exact port the first server took.
// Request the exact port the first server took; the second server should
// fall back to a different, ephemeral port rather than error out.
conflict := config.Config{Host: "127.0.0.1", Port: first.boundPort(), RunFilePath: cfg.RunFilePath}
if _, err := NewWithDeps(conflict, discardLogger(), nil, APIDeps{}); err == nil {
t.Fatal("New on an already-bound port = nil error, want bind failure")
second, err := NewWithDeps(conflict, discardLogger(), nil, APIDeps{})
if err != nil {
t.Fatalf("New on an already-bound port = %v, want ephemeral fallback", err)
}
defer second.listen.Close()

if second.boundPort() == first.boundPort() {
t.Fatalf("second server bound the same port %d; want a fallback port", second.boundPort())
}
if second.boundPort() == 0 {
t.Fatal("second server bound port 0; want a real fallback port")
}
}
12 changes: 12 additions & 0 deletions frontend/src/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -605,6 +605,18 @@ app.on("before-quit", () => {
}
});

// Last-resort teardown. before-quit covers the normal quit path, but app.exit()
// and some shutdown routes skip it, which would orphan the detached daemon and
// leave it holding the port for the next launch. The Node 'exit' event fires
// synchronously on those paths too, so the daemon's process group is always
// signalled when the supervisor goes away. (A hard SIGKILL/crash still can't run
// JS; the daemon's port-conflict fallback covers the orphan that leaves behind.)
process.on("exit", () => {
if (daemonProcess) {
killDaemon(daemonProcess);
}
});

app.on("window-all-closed", () => {
if (process.platform !== "darwin") {
app.quit();
Expand Down
Loading