Skip to content

Commit 3771d6b

Browse files
author
Baudbot
committed
fix: remove bridge launch from start.sh — single owner (startup-cleanup.sh)
The bridge was launched in two places: 1. start.sh — as a background subshell before pi starts 2. startup-cleanup.sh — in a tmux session after control-agent is live This caused recurring issues: - Port 7890 conflicts (both try to bind) - Orphaned supervisor loops surviving restarts - Bridge can't find pi socket (start.sh launches before UUID is known) - PID file tracking is unreliable across restarts Fix: start.sh now only CLEANS UP stale bridge processes. The actual bridge launch is solely owned by startup-cleanup.sh, which: - Runs after pi starts (UUID is known) - Sets PI_SESSION_ID correctly - Uses a tmux session with restart loop - Does full port/process cleanup before launching This eliminates the dual-owner race condition.
1 parent e004988 commit 3771d6b

1 file changed

Lines changed: 30 additions & 48 deletions

File tree

start.sh

Lines changed: 30 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ set -euo pipefail
1414
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
1515
# shellcheck source=bin/lib/runtime-node.sh
1616
source "$SCRIPT_DIR/bin/lib/runtime-node.sh"
17-
# shellcheck source=bin/lib/bridge-restart-policy.sh
18-
source "$SCRIPT_DIR/bin/lib/bridge-restart-policy.sh"
17+
# bridge-restart-policy.sh no longer needed — bridge is started by
18+
# startup-cleanup.sh, not start.sh (see PR #XXX)
1919
cd ~
2020

2121
NODE_BIN_DIR="$(bb_resolve_runtime_node_bin_dir "$HOME")"
@@ -84,53 +84,35 @@ if [ -d "$SOCKET_DIR" ]; then
8484
done
8585
fi
8686

87-
# Start Slack bridge in the background (before pi, so it's ready for messages).
88-
# Broker pull mode has priority when SLACK_BROKER_* keys are configured.
89-
# Otherwise fallback to direct Slack Socket Mode.
90-
BRIDGE_SCRIPT=""
91-
if [ -n "${SLACK_BROKER_URL:-}" ] \
92-
&& [ -n "${SLACK_BROKER_WORKSPACE_ID:-}" ] \
93-
&& [ -n "${SLACK_BROKER_SERVER_PRIVATE_KEY:-}" ] \
94-
&& [ -n "${SLACK_BROKER_SERVER_PUBLIC_KEY:-}" ] \
95-
&& [ -n "${SLACK_BROKER_SERVER_SIGNING_PRIVATE_KEY:-}" ] \
96-
&& [ -n "${SLACK_BROKER_PUBLIC_KEY:-}" ] \
97-
&& [ -n "${SLACK_BROKER_SIGNING_PUBLIC_KEY:-}" ]; then
98-
BRIDGE_SCRIPT="broker-bridge.mjs"
99-
elif [ -n "${SLACK_BOT_TOKEN:-}" ] && [ -n "${SLACK_APP_TOKEN:-}" ]; then
100-
BRIDGE_SCRIPT="bridge.mjs"
101-
fi
102-
103-
if [ -n "$BRIDGE_SCRIPT" ]; then
104-
RELEASE_BRIDGE="/opt/baudbot/current/slack-bridge"
105-
BRIDGE_LOG_DIR="$HOME/.pi/agent/logs"
106-
BRIDGE_LOG_FILE="$BRIDGE_LOG_DIR/slack-bridge.log"
107-
BRIDGE_STATUS_FILE="$HOME/.pi/agent/slack-bridge-supervisor.json"
108-
BRIDGE_PID_FILE="$HOME/.pi/agent/slack-bridge.pid"
109-
110-
mkdir -p "$BRIDGE_LOG_DIR"
111-
112-
# Stop any previous bridge process tracked by pid file.
113-
if [ -f "$BRIDGE_PID_FILE" ]; then
114-
old_pid="$(cat "$BRIDGE_PID_FILE" 2>/dev/null || true)"
115-
if [ -n "$old_pid" ] && kill -0 "$old_pid" 2>/dev/null; then
116-
kill "$old_pid" 2>/dev/null || true
117-
sleep 1
118-
kill -9 "$old_pid" 2>/dev/null || true
119-
fi
120-
rm -f "$BRIDGE_PID_FILE"
87+
# ── Slack bridge cleanup (bridge is started by startup-cleanup.sh) ──
88+
# The bridge needs the control-agent's session UUID (PI_SESSION_ID) to deliver
89+
# messages to the correct socket. That UUID isn't known until pi starts and
90+
# registers its socket. So we DON'T start the bridge here — the control-agent's
91+
# startup-cleanup.sh handles it after the session is live.
92+
#
93+
# We DO kill any stale bridge processes from previous runs to avoid port
94+
# conflicts when startup-cleanup.sh launches a fresh one.
95+
BRIDGE_PID_FILE="$HOME/.pi/agent/slack-bridge.pid"
96+
if [ -f "$BRIDGE_PID_FILE" ]; then
97+
old_pid="$(cat "$BRIDGE_PID_FILE" 2>/dev/null || true)"
98+
if [ -n "$old_pid" ] && kill -0 "$old_pid" 2>/dev/null; then
99+
echo "Stopping stale bridge supervisor (PID $old_pid)..."
100+
kill "$old_pid" 2>/dev/null || true
101+
sleep 1
102+
kill -9 "$old_pid" 2>/dev/null || true
121103
fi
122-
123-
echo "Starting Slack bridge ($BRIDGE_SCRIPT)... logs: $BRIDGE_LOG_FILE"
124-
(
125-
export PATH="$HOME/.varlock/bin:$NODE_BIN_DIR:$PATH"
126-
cd "$RELEASE_BRIDGE"
127-
bb_bridge_supervise "$BRIDGE_LOG_FILE" "$BRIDGE_STATUS_FILE" "$BRIDGE_SCRIPT" \
128-
varlock run --path ~/.config/ -- node "$BRIDGE_SCRIPT"
129-
) &
130-
# Intentionally track the supervisor subshell PID (not per-restart node child PID)
131-
# so a single kill stops the entire bridge restart loop.
132-
echo $! > "$BRIDGE_PID_FILE"
133-
chmod 600 "$BRIDGE_PID_FILE"
104+
rm -f "$BRIDGE_PID_FILE"
105+
fi
106+
# Kill the tmux session too (startup-cleanup.sh uses this)
107+
tmux kill-session -t slack-bridge 2>/dev/null || true
108+
# Force-release port 7890 in case anything survived
109+
PORT_PIDS="$(lsof -ti :7890 2>/dev/null || true)"
110+
if [ -n "$PORT_PIDS" ]; then
111+
echo "Releasing port 7890 (PIDs: $PORT_PIDS)..."
112+
echo "$PORT_PIDS" | xargs kill 2>/dev/null || true
113+
sleep 1
114+
PORT_PIDS="$(lsof -ti :7890 2>/dev/null || true)"
115+
[ -n "$PORT_PIDS" ] && echo "$PORT_PIDS" | xargs kill -9 2>/dev/null || true
134116
fi
135117

136118
# Set session name (read by auto-name.ts extension)

0 commit comments

Comments
 (0)