Skip to content

Commit d8e7cda

Browse files
committed
feat(sanity): add cold-start sanity check script for 27B turbo3-base model
This commit introduces a new script, `sanity-27b-turbo3-base.sh`, which performs a cold-start sanity check for the 27B turbo3-base model. The script measures throughput against a historical baseline of ~18.4 TPS to identify potential thermal or code regressions. Key features include server initialization, health checks, and performance measurement over multiple runs with a predefined prompt. The script aims to ensure the model's operational integrity and performance consistency.
1 parent 8bc7a3c commit d8e7cda

1 file changed

Lines changed: 54 additions & 0 deletions

File tree

scripts/sanity-27b-turbo3-base.sh

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
cltk#!/usr/bin/env bash
2+
# Cold-start sanity check: 27B turbo3-base short n=128 x3 (no NextN).
3+
# Compares against historical baseline of ~18.4 TPS to detect thermal vs code regression.
4+
set -uo pipefail
5+
6+
ROOT="$(cd "$(dirname "$0")/.." && pwd)"
7+
PORT="${PORT:-18080}"
8+
HP="127.0.0.1:${PORT}"
9+
N="${N:-128}"
10+
RUNS="${RUNS:-3}"
11+
MAIN="${MAIN:-$ROOT/.scratch/Qwen3.6-27B-UD-Q4_K_XL/Qwen3.6-27B-UD-Q4_K_XL.gguf}"
12+
13+
PROMPT='Write a detailed 300-word essay about the history of artificial intelligence, including early pioneers like Alan Turing and John McCarthy, key milestones such as the Dartmouth Conference and the development of expert systems, and future predictions about AGI and superintelligence.'
14+
15+
pkill -9 -f llama-server 2>/dev/null || true
16+
sleep 1
17+
18+
SRV_LOG=$(mktemp -t sanity-srv.XXXX.log)
19+
"$ROOT/build/bin/llama-server" \
20+
-m "$MAIN" -c 8192 -ngl 99 -ctk turbo3 -ctv turbo3 -fa on \
21+
--host 127.0.0.1 --port "$PORT" --parallel 1 -np 1 --cont-batching \
22+
--metrics --slots --no-warmup \
23+
>"$SRV_LOG" 2>&1 &
24+
SRV_PID=$!
25+
26+
echo "info: server pid=$SRV_PID log=$SRV_LOG" >&2
27+
28+
for i in $(seq 1 60); do
29+
if curl -fsS "http://${HP}/health" >/dev/null 2>&1; then
30+
echo "info: server ready after ${i}s" >&2
31+
break
32+
fi
33+
sleep 1
34+
done
35+
36+
echo "info: warmup n=512..." >&2
37+
curl -fsS -X POST "http://${HP}/completion" -H 'Content-Type: application/json' \
38+
-d "$(jq -n --arg p "$PROMPT" --argjson n 512 '{prompt:$p,n_predict:$n,temperature:0,cache_prompt:false}')" \
39+
| jq -r '.timings | "warmup: \(.predicted_per_second // 0)|\(.predicted_n // 0)"' || true
40+
41+
echo "info: measuring short n=${N} x${RUNS}..." >&2
42+
for i in $(seq 1 "$RUNS"); do
43+
RESP=$(curl -fsS -X POST "http://${HP}/completion" -H 'Content-Type: application/json' \
44+
-d "$(jq -n --arg p "$PROMPT" --argjson n "$N" '{prompt:$p,n_predict:$n,temperature:0,cache_prompt:false}')")
45+
TPS=$(echo "$RESP" | jq -r '.timings.predicted_per_second // 0')
46+
PRED=$(echo "$RESP" | jq -r '.timings.predicted_n // 0')
47+
echo " run $i: ${TPS}|${PRED}"
48+
done
49+
50+
kill "$SRV_PID" 2>/dev/null || true
51+
sleep 1
52+
kill -9 "$SRV_PID" 2>/dev/null || true
53+
pkill -9 -f llama-server 2>/dev/null || true
54+
echo "info: done. server log: $SRV_LOG" >&2

0 commit comments

Comments
 (0)