Skip to content

Commit 29677d4

Browse files
authored
ops: show broker connection in baudbot status (#101)
1 parent ea5ffdf commit 29677d4

4 files changed

Lines changed: 296 additions & 20 deletions

File tree

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ sudo baudbot deploy
7070
# start the service
7171
sudo baudbot start
7272

73-
# check health
73+
# check health (includes deployed version + broker connection/health status)
7474
sudo baudbot status
7575
sudo baudbot doctor
7676
```

bin/baudbot

Lines changed: 131 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ usage() {
103103
echo " start Start the agent (systemd, or --direct for foreground)"
104104
echo " stop Stop the agent"
105105
echo " restart Restart the agent"
106-
echo " status Show agent status + deployed version"
106+
echo " status Show agent status + deployed version + broker connection"
107107
echo " logs Tail agent logs"
108108
echo " attach Attach to control-agent by default; supports --pi/--tmux"
109109
echo " sessions List agent tmux and pi sessions (name → id)"
@@ -326,6 +326,134 @@ print_deployed_version() {
326326
echo -e "${BOLD}deployed version:${RESET} $line"
327327
}
328328

329+
broker_mode_configured() {
330+
local env_file="/home/${1:-baudbot_agent}/.config/.env"
331+
[ -r "$env_file" ] || return 1
332+
grep -Eq '^SLACK_BROKER_URL=[^[:space:]].*$' "$env_file" || return 1
333+
grep -Eq '^SLACK_BROKER_WORKSPACE_ID=[^[:space:]].*$' "$env_file" || return 1
334+
}
335+
336+
print_broker_connection_status() {
337+
local agent_user="${BAUDBOT_AGENT_USER:-baudbot_agent}"
338+
local health_file="/home/$agent_user/.pi/agent/broker-health.json"
339+
local health_summary=""
340+
local connection_state=""
341+
local components_line=""
342+
343+
if ! broker_mode_configured "$agent_user"; then
344+
echo -e "${BOLD}broker connection:${RESET} not configured"
345+
return 0
346+
fi
347+
348+
if [ "$(id -u)" -eq 0 ]; then
349+
sudo -u "$agent_user" tmux has-session -t slack-bridge 2>/dev/null || {
350+
echo -e "${BOLD}broker connection:${RESET} disconnected (bridge tmux session not running)"
351+
return 0
352+
}
353+
elif [ "$(id -un)" = "$agent_user" ]; then
354+
tmux has-session -t slack-bridge 2>/dev/null || {
355+
echo -e "${BOLD}broker connection:${RESET} disconnected (bridge tmux session not running)"
356+
return 0
357+
}
358+
else
359+
echo -e "${BOLD}broker connection:${RESET} configured (run with sudo for runtime status)"
360+
return 0
361+
fi
362+
363+
if [ ! -r "$health_file" ]; then
364+
echo -e "${BOLD}broker connection:${RESET} starting"
365+
echo -e "${BOLD}broker health:${RESET} unavailable (waiting for bridge health file)"
366+
return 0
367+
fi
368+
369+
health_summary="$(python3 - "$health_file" <<'PY'
370+
import json
371+
import sys
372+
from datetime import datetime, timezone
373+
374+
path = sys.argv[1]
375+
with open(path, 'r', encoding='utf-8') as f:
376+
h = json.load(f)
377+
378+
def parse_iso(s):
379+
if not s:
380+
return None
381+
try:
382+
if s.endswith('Z'):
383+
s = s[:-1] + '+00:00'
384+
dt = datetime.fromisoformat(s)
385+
if dt.tzinfo is None:
386+
dt = dt.replace(tzinfo=timezone.utc)
387+
return dt
388+
except Exception:
389+
return None
390+
391+
def age_seconds(ts):
392+
dt = parse_iso(ts)
393+
if not dt:
394+
return None
395+
return (datetime.now(timezone.utc) - dt).total_seconds()
396+
397+
def status(ok_ts, err_ts):
398+
ok_dt = parse_iso(ok_ts)
399+
err_dt = parse_iso(err_ts)
400+
if err_dt and (not ok_dt or err_dt >= ok_dt):
401+
return 'error'
402+
if ok_dt:
403+
return 'ok'
404+
return 'unknown'
405+
406+
poll = h.get('poll', {})
407+
inbound = h.get('inbound', {})
408+
ack = h.get('ack', {})
409+
outbound = h.get('outbound', {})
410+
411+
poll_age = age_seconds(poll.get('last_ok_at'))
412+
poll_failures = int(poll.get('consecutive_failures') or 0)
413+
poll_state = status(poll.get('last_ok_at'), poll.get('last_error_at'))
414+
415+
if poll_state == 'error' and poll_failures > 0:
416+
connection = 'reconnecting'
417+
elif poll_age is not None and poll_age <= 120:
418+
connection = 'connected'
419+
elif poll_age is not None:
420+
connection = 'stale'
421+
else:
422+
connection = 'starting'
423+
424+
inbound_state = status(inbound.get('last_process_ok_at'), inbound.get('last_process_error_at'))
425+
ack_state = status(ack.get('last_ok_at'), ack.get('last_error_at'))
426+
outbound_state = status(outbound.get('last_ok_at'), outbound.get('last_error_at'))
427+
428+
print(connection)
429+
print(f'poll={poll_state} inbound={inbound_state} ack={ack_state} outbound={outbound_state}')
430+
PY
431+
)"
432+
433+
connection_state="$(printf '%s\n' "$health_summary" | sed -n '1p')"
434+
components_line="$(printf '%s\n' "$health_summary" | sed -n '2p')"
435+
436+
case "$connection_state" in
437+
connected)
438+
echo -e "${BOLD}broker connection:${RESET} connected"
439+
;;
440+
reconnecting)
441+
echo -e "${BOLD}broker connection:${RESET} reconnecting"
442+
;;
443+
stale)
444+
echo -e "${BOLD}broker connection:${RESET} stale (no recent successful poll)"
445+
;;
446+
starting)
447+
echo -e "${BOLD}broker connection:${RESET} starting"
448+
;;
449+
*)
450+
echo -e "${BOLD}broker connection:${RESET} unknown"
451+
;;
452+
esac
453+
454+
[ -n "$components_line" ] && echo -e "${BOLD}broker health:${RESET} $components_line"
455+
}
456+
329457
pi_control_dir() {
330458
local agent_user="${1:-baudbot_agent}"
331459
echo "/home/$agent_user/.pi/session-control"
@@ -473,6 +601,7 @@ case "${1:-}" in
473601
systemctl status baudbot "$@" || status_rc=$?
474602
echo ""
475603
print_deployed_version
604+
print_broker_connection_status
476605
exit "$status_rc"
477606
else
478607
# Fallback: check if baudbot_agent has pi running
@@ -484,6 +613,7 @@ case "${1:-}" in
484613
fi
485614
echo ""
486615
print_deployed_version
616+
print_broker_connection_status
487617
fi
488618
;;
489619

docs/operations.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ sudo baudbot start
1010
sudo baudbot stop
1111
sudo baudbot restart
1212

13-
# Status and logs
13+
# Status and logs (status includes deployed version + broker connection/health state)
1414
sudo baudbot status
1515
sudo baudbot logs
1616

0 commit comments

Comments
 (0)