Skip to content

Commit 75374b4

Browse files
committed
ci: add inference smoke test on every PR
Add an end-to-end inference smoke test that verifies the control-agent can complete a real LLM turn via session-control RPC. - bin/ci/smoke-agent-inference.sh: sends a prompt via Unix socket RPC, subscribes to turn_end, validates the response contains expected token. - Uses anthropic/claude-haiku (cheap) via new BAUDBOT_MODEL env override. - CI_ANTHROPIC_API_KEY injected into agent .env for provider auth. - start.sh: respect BAUDBOT_MODEL override before auto-detect. - .env.schema: add BAUDBOT_MODEL. - integration.yml: pass CI_ANTHROPIC_API_KEY to every droplet run. - bin/ci/droplet.sh run: accept optional KEY=VALUE env var forwarding. - Wired into setup-ubuntu.sh and setup-arch.sh after runtime smoke.
1 parent 7fc9762 commit 75374b4

7 files changed

Lines changed: 238 additions & 7 deletions

File tree

.env.schema

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ GEMINI_API_KEY=
3131
# @docs(https://opencode.ai)
3232
OPENCODE_ZEN_API_KEY=
3333

34+
# Override auto-detected model (e.g. anthropic/claude-haiku for CI)
35+
# @sensitive=false @type=string
36+
BAUDBOT_MODEL=
37+
3438
# ── Slack ────────────────────────────────────────────────────────────────────
3539

3640
# Slack bot OAuth token (required for direct Socket Mode, optional in broker mode)

.github/workflows/integration.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,8 @@ jobs:
115115
bash bin/ci/droplet.sh run \
116116
"${{ steps.droplet.outputs.DROPLET_IP }}" \
117117
~/.ssh/ci_key \
118-
"${{ matrix.setup_script }}"
118+
"${{ matrix.setup_script }}" \
119+
"CI_ANTHROPIC_API_KEY=${{ secrets.CI_ANTHROPIC_API_KEY }}"
119120
120121
- name: Cleanup
121122
if: always()

bin/ci/droplet.sh

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -184,12 +184,20 @@ cmd_wait_ssh() {
184184

185185
# ── run <ip> <ssh_private_key_file> <script> ──────────────────────────────────
186186
cmd_run() {
187-
local ip="${1:?Usage: droplet.sh run <ip> <ssh_private_key_file> <script>}"
187+
local ip="${1:?Usage: droplet.sh run <ip> <ssh_private_key_file> <script> [env_vars...]}"
188188
local key_file="${2:?}"
189189
local script="${3:?}"
190-
191-
ssh -o StrictHostKeyChecking=no -o BatchMode=yes \
192-
-i "$key_file" "root@$ip" bash -s < "$script"
190+
shift 3
191+
192+
# Remaining args are KEY=VALUE env vars forwarded to the remote script.
193+
# Prepend export statements so the remote bash -s session inherits them.
194+
{
195+
for var in "$@"; do
196+
printf 'export %s\n' "$var"
197+
done
198+
cat "$script"
199+
} | ssh -o StrictHostKeyChecking=no -o BatchMode=yes \
200+
-i "$key_file" "root@$ip" bash -s
193201
}
194202

195203
# ── list ──────────────────────────────────────────────────────────────────────

bin/ci/setup-arch.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@ bash /home/baudbot_admin/baudbot/bin/ci/smoke-cli.sh
7070
echo "=== Running runtime smoke checks ==="
7171
bash /home/baudbot_admin/baudbot/bin/ci/smoke-agent-runtime.sh
7272

73+
echo "=== Running inference smoke check ==="
74+
bash /home/baudbot_admin/baudbot/bin/ci/smoke-agent-inference.sh
75+
7376
echo "=== Installing test dependencies ==="
7477
export PATH="/home/baudbot_agent/opt/node/bin:$PATH"
7578
cd /home/baudbot_admin/baudbot

bin/ci/setup-ubuntu.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,9 @@ bash /home/baudbot_admin/baudbot/bin/ci/smoke-cli.sh
108108
echo "=== Running runtime smoke checks ==="
109109
bash /home/baudbot_admin/baudbot/bin/ci/smoke-agent-runtime.sh
110110

111+
echo "=== Running inference smoke check ==="
112+
bash /home/baudbot_admin/baudbot/bin/ci/smoke-agent-inference.sh
113+
111114
echo "=== Installing test dependencies ==="
112115
export PATH="/home/baudbot_agent/opt/node/bin:$PATH"
113116
cd /home/baudbot_admin/baudbot

bin/ci/smoke-agent-inference.sh

Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
#!/usr/bin/env bash
2+
# Inference smoke-test for baudbot.
3+
#
4+
# Verifies that the control-agent can complete at least one real LLM turn
5+
# end-to-end via session-control RPC.
6+
#
7+
# Requires CI_ANTHROPIC_API_KEY in the environment (injected into the
8+
# agent's .env before starting baudbot).
9+
#
10+
# Expects baudbot to be already installed and stoppable via `sudo baudbot`.
11+
12+
set -Eeuo pipefail
13+
14+
readonly AGENT_USER="baudbot_agent"
15+
readonly AGENT_HOME="/home/${AGENT_USER}"
16+
readonly AGENT_ENV="${AGENT_HOME}/.config/.env"
17+
readonly CONTROL_DIR="${AGENT_HOME}/.pi/session-control"
18+
readonly CONTROL_ALIAS="${CONTROL_DIR}/control-agent.alias"
19+
readonly START_TIMEOUT_SECONDS=60
20+
readonly INFERENCE_TIMEOUT_SECONDS=120
21+
readonly EXPECTED_TOKEN="CI_INFERENCE_OK"
22+
23+
started=0
24+
25+
log() {
26+
printf '[inference-smoke] %s\n' "$*"
27+
}
28+
29+
cleanup() {
30+
local exit_code=$?
31+
if [[ $started -eq 1 ]]; then
32+
log "cleanup: stopping baudbot"
33+
sudo baudbot stop >/dev/null 2>&1 || true
34+
fi
35+
exit "$exit_code"
36+
}
37+
trap cleanup EXIT
38+
39+
wait_for_control_socket() {
40+
local deadline=$((SECONDS + START_TIMEOUT_SECONDS))
41+
local target=""
42+
43+
while (( SECONDS < deadline )); do
44+
if [[ -L "$CONTROL_ALIAS" ]]; then
45+
target="$(readlink -- "$CONTROL_ALIAS" 2>/dev/null || true)"
46+
if [[ -n "$target" ]]; then
47+
if [[ "$target" != /* ]]; then
48+
target="${CONTROL_DIR}/${target}"
49+
fi
50+
if [[ -S "$target" ]]; then
51+
printf '%s\n' "$target"
52+
return 0
53+
fi
54+
fi
55+
fi
56+
sleep 1
57+
done
58+
59+
return 1
60+
}
61+
62+
dump_diagnostics() {
63+
log "--- diagnostics ---"
64+
sudo baudbot status 2>&1 || true
65+
log "--- end diagnostics ---"
66+
}
67+
68+
# Send a message via session-control RPC and wait for turn_end.
69+
# Prints the assistant response content on success, exits non-zero on failure.
70+
rpc_send_wait_turn_end() {
71+
local socket_path="$1"
72+
local message="$2"
73+
local timeout_seconds="$3"
74+
75+
sudo -u "$AGENT_USER" python3 - "$socket_path" "$message" "$timeout_seconds" <<'PY'
76+
import json
77+
import socket
78+
import sys
79+
80+
sock_path = sys.argv[1]
81+
message = sys.argv[2]
82+
timeout_seconds = int(sys.argv[3])
83+
84+
send_cmd = {"type": "send", "message": message, "mode": "steer"}
85+
subscribe_cmd = {"type": "subscribe", "event": "turn_end"}
86+
87+
client = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
88+
try:
89+
client.settimeout(timeout_seconds)
90+
client.connect(sock_path)
91+
92+
# Send both commands
93+
client.sendall((json.dumps(send_cmd) + "\n").encode("utf-8"))
94+
client.sendall((json.dumps(subscribe_cmd) + "\n").encode("utf-8"))
95+
96+
buf = b""
97+
send_response = None
98+
99+
while True:
100+
chunk = client.recv(8192)
101+
if not chunk:
102+
print("connection closed before turn_end", file=sys.stderr)
103+
sys.exit(1)
104+
buf += chunk
105+
106+
while b"\n" in buf:
107+
line, buf = buf.split(b"\n", 1)
108+
line = line.strip()
109+
if not line:
110+
continue
111+
112+
try:
113+
msg = json.loads(line.decode("utf-8", errors="replace"))
114+
except json.JSONDecodeError:
115+
continue
116+
117+
if msg.get("type") == "response":
118+
cmd = msg.get("command", "")
119+
if cmd == "send":
120+
if not msg.get("success", False):
121+
print(f"send failed: {msg.get('error', 'unknown')}", file=sys.stderr)
122+
sys.exit(1)
123+
send_response = msg
124+
# Ignore subscribe response
125+
continue
126+
127+
if msg.get("type") == "event" and msg.get("event") == "turn_end":
128+
if send_response is None:
129+
print("received turn_end before send response", file=sys.stderr)
130+
sys.exit(1)
131+
data = msg.get("data", {})
132+
assistant_msg = data.get("message", {})
133+
content = assistant_msg.get("content", "")
134+
if not content:
135+
print("turn completed but no assistant content", file=sys.stderr)
136+
sys.exit(1)
137+
print(content)
138+
sys.exit(0)
139+
140+
print("stream ended without turn_end event", file=sys.stderr)
141+
sys.exit(1)
142+
except socket.timeout:
143+
print("timeout waiting for inference response", file=sys.stderr)
144+
sys.exit(1)
145+
finally:
146+
client.close()
147+
PY
148+
}
149+
150+
readonly CI_MODEL="anthropic/claude-haiku"
151+
152+
inject_ci_config() {
153+
if [[ -z "${CI_ANTHROPIC_API_KEY:-}" ]]; then
154+
log "ERROR: CI_ANTHROPIC_API_KEY is not set"
155+
return 1
156+
fi
157+
log "injecting CI API key and model override into agent .env"
158+
sed -i "s|^ANTHROPIC_API_KEY=.*|ANTHROPIC_API_KEY=${CI_ANTHROPIC_API_KEY}|" "$AGENT_ENV"
159+
# Use a cheap model for the smoke test — no need to burn Sonnet/Opus tokens.
160+
if grep -q "^BAUDBOT_MODEL=" "$AGENT_ENV" 2>/dev/null; then
161+
sed -i "s|^BAUDBOT_MODEL=.*|BAUDBOT_MODEL=${CI_MODEL}|" "$AGENT_ENV"
162+
else
163+
echo "BAUDBOT_MODEL=${CI_MODEL}" >> "$AGENT_ENV"
164+
fi
165+
}
166+
167+
main() {
168+
inject_ci_config
169+
170+
log "starting baudbot"
171+
sudo baudbot start
172+
started=1
173+
174+
log "waiting for control-agent socket"
175+
local socket_path=""
176+
if ! socket_path="$(wait_for_control_socket)"; then
177+
log "control-agent socket did not become ready within ${START_TIMEOUT_SECONDS}s"
178+
dump_diagnostics
179+
return 1
180+
fi
181+
log "control socket ready: ${socket_path}"
182+
183+
log "sending inference prompt (timeout ${INFERENCE_TIMEOUT_SECONDS}s)"
184+
local response=""
185+
if ! response="$(rpc_send_wait_turn_end "$socket_path" \
186+
"Reply with exactly: ${EXPECTED_TOKEN}" \
187+
"$INFERENCE_TIMEOUT_SECONDS")"; then
188+
log "inference failed"
189+
dump_diagnostics
190+
return 1
191+
fi
192+
193+
# Validate response contains expected token
194+
if [[ "$response" == *"$EXPECTED_TOKEN"* ]]; then
195+
log "inference response contains expected token"
196+
else
197+
log "unexpected response (missing '${EXPECTED_TOKEN}'):"
198+
log " ${response:0:500}"
199+
dump_diagnostics
200+
return 1
201+
fi
202+
203+
log "stopping baudbot"
204+
sudo baudbot stop
205+
started=0
206+
207+
log "inference smoke passed"
208+
}
209+
210+
main "$@"

start.sh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,8 +136,10 @@ fi
136136
# Set session name (read by auto-name.ts extension)
137137
export PI_SESSION_NAME="control-agent"
138138

139-
# Pick model based on available API keys (first match wins)
140-
if [ -n "${ANTHROPIC_API_KEY:-}" ]; then
139+
# Pick model: explicit override or auto-detect from API keys (first match wins)
140+
if [ -n "${BAUDBOT_MODEL:-}" ]; then
141+
MODEL="$BAUDBOT_MODEL"
142+
elif [ -n "${ANTHROPIC_API_KEY:-}" ]; then
141143
MODEL="anthropic/claude-opus-4-6"
142144
elif [ -n "${OPENAI_API_KEY:-}" ]; then
143145
MODEL="openai/gpt-5.2-codex"

0 commit comments

Comments
 (0)