-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmulti-service.sh
More file actions
executable file
·84 lines (78 loc) · 3.5 KB
/
Copy pathmulti-service.sh
File metadata and controls
executable file
·84 lines (78 loc) · 3.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#!/usr/bin/env bash
# multi-service.sh — run the SAME workload against several OpenAI-compatible
# gateways in the SAME wall-clock window (simultaneously). Because all gateways
# then sample identical upstream (provider) conditions concurrently, the shared
# upstream latency cancels out when you take the per-round delta between
# gateways — so the TTFT delta is ~pure gateway overhead, not upstream variance.
# Running gateways one after another instead lets each hit a different upstream
# window, which manufactures false differences. See README "Why test multiple
# services simultaneously".
#
# Usage:
# ./multi-service.sh [profile] [conc:dur] [rounds]
# ./multi-service.sh profiles/realistic.json 50:60s 3
#
# Define the gateways under test in the SERVICES array below:
# name | target URL | model string | bearer token (bearer "" = no auth)
# Secrets come from env vars — never hardcode a key here.
set -u
cd "$(dirname "$0")"
PROFILE="${1:-profiles/realistic.json}"
STAGES="${2:-50:60s}"
ROUNDS="${3:-3}"
LT="${LOADTEST_BIN:-./loadtest}"
# Edit this list for your gateways. Each runs the same PROFILE; only the
# target/model/auth differ. Keys come from env vars — set GATEWAY_A_KEY etc.
SERVICES=(
"gateway-a|http://localhost:8080/v1/chat/completions|gpt-4o-mini|${GATEWAY_A_KEY:-}"
# "gateway-b|http://localhost:8081/v1/chat/completions|gpt-4o-mini|${GATEWAY_B_KEY:-}"
# "gateway-c|http://localhost:8082/v1/chat/completions|openai/gpt-4o-mini|${GATEWAY_C_KEY:-}"
)
# Build the generator if it isn't present.
if [ ! -x "$LT" ]; then
echo "building loadtest binary..."
go build -o ./loadtest ./cmd/loadtest || { echo "build failed"; exit 1; }
LT=./loadtest
fi
echo "profile=$PROFILE stages=$STAGES rounds=$ROUNDS services=${#SERVICES[@]}"
echo
for r in $(seq 1 "$ROUNDS"); do
echo "################ ROUND $r/$ROUNDS (all services launched simultaneously) ################"
pids=()
for svc in "${SERVICES[@]}"; do
IFS='|' read -r name target model bearer <<<"$svc"
out="runs/$name"; mkdir -p "$out"
vkflag=(); [ -n "$bearer" ] && vkflag=(-vk "$bearer")
"$LT" -config "$PROFILE" -stages "$STAGES" -target "$target" -model "$model" \
-out "$out" ${vkflag[@]+"${vkflag[@]}"} >"$out/console-r$r.log" 2>&1 &
pids+=("$!")
done
for p in "${pids[@]}"; do wait "$p"; done
python3 - "${SERVICES[@]}" <<'PY'
import sys, json, glob, os
names = [s.split('|', 1)[0] for s in sys.argv[1:]]
rows = []
for name in names:
files = sorted(glob.glob(f"runs/{name}/summary-*.json"), key=os.path.getmtime)
if not files:
rows.append((name, "NO-DATA", "", "", "", "")); continue
st = json.load(open(files[-1]))["stages"][-1]["total"]
rows.append((name,
f'{st["ttft_ms"]["P50"]:.0f}', f'{st["ttft_ms"]["P95"]:.0f}',
f'{st["latency_ms"]["P95"]:.0f}', f'{st["throughput_rps"]:.1f}',
f'{100*st["error_rate"]:.2f}%'))
hdr = ("gateway", "ttft_p50", "ttft_p95", "lat_p95", "rps", "err")
w = [max(len(str(x)) for x in col) for col in zip(hdr, *rows)]
line = lambda c: " ".join(str(v).ljust(w[i]) for i, v in enumerate(c))
print(line(hdr)); print(line(["-"*x for x in w]))
for row in rows: print(line(row))
vals = [(r[0], float(r[1])) for r in rows if r[1] != "NO-DATA"]
if len(vals) > 1:
base = min(v for _, v in vals)
print("\nttft_p50 delta vs fastest (= gateway overhead; upstream is shared this window):")
for n, v in sorted(vals, key=lambda x: x[1]):
print(f" {n:10} +{v-base:6.0f} ms")
PY
echo
done
echo "raw reports/summaries under: $(pwd)/runs/<gateway>/"