|
| 1 | +#!/usr/bin/env bash |
| 2 | +# Performance test harness for the PostalCode2NUTS service. |
| 3 | +# |
| 4 | +# Discovers max sustainable RPS, characterises the latency curve, and verifies |
| 5 | +# stability at the chosen operating point. See docs/performance.md for the |
| 6 | +# methodology and the most recent measured results. |
| 7 | +# |
| 8 | +# Required env vars: |
| 9 | +# PC2NUTS_TARGET Base URL of the service (no trailing slash). Example: |
| 10 | +# https://example.invalid |
| 11 | +# PC2NUTS_TOKEN Trusted-token value granting rate-limit bypass. Issue with: |
| 12 | +# python -m scripts.tokens add --label "perf-test-YYYY-MM-DD" |
| 13 | +# |
| 14 | +# Optional env vars: |
| 15 | +# OUTDIR Output directory for raw results (default: /tmp/perf). |
| 16 | +# CORPUS_COUNTRIES Space-separated CC list to pull from GISCO TERCET (default: |
| 17 | +# "BE AT IE LU EE" — small files, fast download). |
| 18 | +# SCENARIOS Subset of "warm A B C D E" (default: all). |
| 19 | +# |
| 20 | +# Required tools on PATH: bombardier, vegeta, curl, python3. |
| 21 | +# |
| 22 | +# Stop conditions: any 5xx >1%, p99 >5s, or any 429 → halt and inspect output. |
| 23 | +set -euo pipefail |
| 24 | + |
| 25 | +: "${PC2NUTS_TARGET:?PC2NUTS_TARGET (e.g. https://example.invalid) is required}" |
| 26 | +: "${PC2NUTS_TOKEN:?PC2NUTS_TOKEN is required (use scripts/tokens.py add)}" |
| 27 | +OUTDIR="${OUTDIR:-/tmp/perf}" |
| 28 | +CORPUS_COUNTRIES="${CORPUS_COUNTRIES:-BE AT IE LU EE}" |
| 29 | +SCENARIOS="${SCENARIOS:-warm A B C D E}" |
| 30 | +HEADER="Authorization: Bearer ${PC2NUTS_TOKEN}" |
| 31 | + |
| 32 | +mkdir -p "${OUTDIR}" |
| 33 | +CORPUS_DIR="${OUTDIR}/corpus" |
| 34 | +mkdir -p "${CORPUS_DIR}" |
| 35 | + |
| 36 | +# --- Build the corpus from public TERCET ZIPs -------------------------------- |
| 37 | +build_corpus() { |
| 38 | + echo "Building corpus from GISCO TERCET (${CORPUS_COUNTRIES})..." |
| 39 | + for cc in ${CORPUS_COUNTRIES}; do |
| 40 | + for yr in 2025 2024 2023; do |
| 41 | + url="https://gisco-services.ec.europa.eu/tercet/NUTS-2024/pc${yr}_${cc}_NUTS-2024_v1.0.zip" |
| 42 | + tmp="${CORPUS_DIR}/${cc}.zip" |
| 43 | + curl -sf -o "${tmp}" "${url}" || continue |
| 44 | + if [ "$(file -b --mime-type "${tmp}")" = "application/zip" ]; then |
| 45 | + (cd "${CORPUS_DIR}" && unzip -oq "${cc}.zip") |
| 46 | + break |
| 47 | + fi |
| 48 | + rm -f "${tmp}" |
| 49 | + done |
| 50 | + done |
| 51 | + python3 - <<'PYEOF' |
| 52 | +import csv, os, random, re |
| 53 | +random.seed(20260430) |
| 54 | +corpus_dir = os.environ["CORPUS_DIR"] |
| 55 | +target = os.environ["PC2NUTS_TARGET"] |
| 56 | +gen_invalid = { |
| 57 | + "BE": lambda: f"{random.randint(1000,9999):04d}", |
| 58 | + "AT": lambda: f"{random.randint(1000,9999):04d}", |
| 59 | + "EE": lambda: f"{random.randint(10000,99999):05d}", |
| 60 | + "LU": lambda: f"{random.randint(1000,9999):04d}", |
| 61 | +} |
| 62 | +by_cc = {} |
| 63 | +for fn in sorted(os.listdir(corpus_dir)): |
| 64 | + m = re.match(r"pc\d+_([A-Z]{2})_.*\.csv$", fn) |
| 65 | + if not m: continue |
| 66 | + cc = m.group(1) |
| 67 | + codes = set() |
| 68 | + with open(os.path.join(corpus_dir, fn), encoding="utf-8-sig") as f: |
| 69 | + r = csv.reader(f, delimiter=";") |
| 70 | + next(r, None) |
| 71 | + for row in r: |
| 72 | + if len(row) >= 2 and (code := row[1].strip().strip("'")): |
| 73 | + codes.add(code) |
| 74 | + by_cc[cc] = codes |
| 75 | +print(f"Loaded: {[(cc, len(c)) for cc, c in by_cc.items()]}") |
| 76 | +valid = [] |
| 77 | +per_cc = max(1, 5000 // len(by_cc)) |
| 78 | +for cc, codes in by_cc.items(): |
| 79 | + valid.extend((cc, c) for c in random.sample(sorted(codes), min(per_cc, len(codes)))) |
| 80 | +random.shuffle(valid) |
| 81 | +invalid, attempts = [], 0 |
| 82 | +ccs = [cc for cc in gen_invalid if cc in by_cc] |
| 83 | +while len(invalid) < 500 and attempts < 50_000: |
| 84 | + attempts += 1 |
| 85 | + cc = random.choice(ccs) |
| 86 | + pc = gen_invalid[cc]() |
| 87 | + if pc not in by_cc[cc]: |
| 88 | + invalid.append((cc, pc)) |
| 89 | +with open(os.path.join(corpus_dir, "..", "targets_B.txt"), "w") as f: |
| 90 | + for cc, pc in valid: |
| 91 | + f.write(f"GET {target}/lookup?country={cc}&postal_code={pc}\n\n") |
| 92 | +mix = [] |
| 93 | +for i in range(max(len(valid), len(invalid))): |
| 94 | + if i < len(valid): mix.append(valid[i]) |
| 95 | + if i < len(invalid): mix.append(invalid[i]) |
| 96 | +with open(os.path.join(corpus_dir, "..", "targets_C.txt"), "w") as f: |
| 97 | + for cc, pc in mix: |
| 98 | + f.write(f"GET {target}/lookup?country={cc}&postal_code={pc}\n\n") |
| 99 | +with open(os.path.join(corpus_dir, "..", "targets_D.txt"), "w") as f: |
| 100 | + f.write(f"GET {target}/health\n") |
| 101 | +print(f"valid={len(valid)} invalid={len(invalid)} mix={len(mix)}") |
| 102 | +PYEOF |
| 103 | +} |
| 104 | + |
| 105 | +run_warm() { |
| 106 | + echo "=== warm: 500 sequential mixed lookups ===" |
| 107 | + local errors=0 |
| 108 | + for _ in $(seq 1 500); do |
| 109 | + local n=$((RANDOM % 100 + 2)) |
| 110 | + local line |
| 111 | + line=$(sed -n "${n}p" "${OUTDIR}/targets_B.txt") |
| 112 | + local code |
| 113 | + code=$(curl -s -o /dev/null -w "%{http_code}" -H "${HEADER}" "${line#GET }") |
| 114 | + [ "${code}" = "200" ] || errors=$((errors + 1)) |
| 115 | + done |
| 116 | + echo "warm complete; errors=${errors}" |
| 117 | +} |
| 118 | + |
| 119 | +run_A() { |
| 120 | + echo "=== A: hot-key saturation sweep (BE 3080, c={5,10,20,40,80} × 20s) ===" |
| 121 | + local URL="${PC2NUTS_TARGET}/lookup?country=BE&postal_code=3080" |
| 122 | + for c in 5 10 20 40 80; do |
| 123 | + echo "-- A: -c ${c} --" |
| 124 | + bombardier -c "${c}" -d 20s -l --timeout 30s -H "${HEADER}" "${URL}" \ |
| 125 | + | tee "${OUTDIR}/A_c${c}.txt" \ |
| 126 | + | grep -E "Reqs/sec|Latency|^ [0-9]+%|HTTP codes|^ [0-9xa-z]" || true |
| 127 | + sleep 10 |
| 128 | + done |
| 129 | +} |
| 130 | + |
| 131 | +run_B() { |
| 132 | + echo "=== B: random-corpus rate sweep (10/20/25/30/35 RPS × 20s) ===" |
| 133 | + for r in 10 20 25 30 35; do |
| 134 | + echo "-- B: ${r}/s --" |
| 135 | + vegeta attack -duration=20s -rate="${r}/s" -header="${HEADER}" \ |
| 136 | + -targets="${OUTDIR}/targets_B.txt" > "${OUTDIR}/B_r${r}.bin" |
| 137 | + vegeta report -type=text "${OUTDIR}/B_r${r}.bin" | tee "${OUTDIR}/B_r${r}.txt" |
| 138 | + sleep 10 |
| 139 | + done |
| 140 | +} |
| 141 | + |
| 142 | +run_C() { |
| 143 | + echo "=== C: 50/50 hit-miss mix at 25/s × 20s (Tier 3 fallback cost) ===" |
| 144 | + vegeta attack -duration=20s -rate=25/s -header="${HEADER}" \ |
| 145 | + -targets="${OUTDIR}/targets_C.txt" > "${OUTDIR}/C_r25.bin" |
| 146 | + vegeta report -type=text "${OUTDIR}/C_r25.bin" | tee "${OUTDIR}/C_r25.txt" |
| 147 | + sleep 10 |
| 148 | +} |
| 149 | + |
| 150 | +run_D() { |
| 151 | + echo "=== D: /health at 25/s × 20s (FastAPI/uvicorn floor) ===" |
| 152 | + vegeta attack -duration=20s -rate=25/s -header="${HEADER}" \ |
| 153 | + -targets="${OUTDIR}/targets_D.txt" > "${OUTDIR}/D_r25.bin" |
| 154 | + vegeta report -type=text "${OUTDIR}/D_r25.bin" | tee "${OUTDIR}/D_r25.txt" |
| 155 | + sleep 10 |
| 156 | +} |
| 157 | + |
| 158 | +run_E() { |
| 159 | + echo "=== E: sustained at 27/s for 3 min (90% of knee, stability check) ===" |
| 160 | + vegeta attack -duration=3m -rate=27/s -header="${HEADER}" \ |
| 161 | + -targets="${OUTDIR}/targets_B.txt" > "${OUTDIR}/E_r27.bin" |
| 162 | + vegeta report -type=text "${OUTDIR}/E_r27.bin" | tee "${OUTDIR}/E_r27.txt" |
| 163 | + vegeta report -type='hist[0,50ms,100ms,200ms,500ms,1s,2s,5s]' \ |
| 164 | + "${OUTDIR}/E_r27.bin" | tee -a "${OUTDIR}/E_r27.txt" |
| 165 | +} |
| 166 | + |
| 167 | +# --- main -------------------------------------------------------------------- |
| 168 | +export CORPUS_DIR PC2NUTS_TARGET |
| 169 | +[ -s "${OUTDIR}/targets_B.txt" ] || build_corpus |
| 170 | + |
| 171 | +for s in ${SCENARIOS}; do |
| 172 | + case "${s}" in |
| 173 | + warm) run_warm ;; |
| 174 | + A) run_A ;; |
| 175 | + B) run_B ;; |
| 176 | + C) run_C ;; |
| 177 | + D) run_D ;; |
| 178 | + E) run_E ;; |
| 179 | + *) echo "unknown scenario: ${s}" >&2; exit 2 ;; |
| 180 | + esac |
| 181 | +done |
| 182 | + |
| 183 | +echo |
| 184 | +echo "Done. Raw outputs in ${OUTDIR}/" |
| 185 | +echo "Remember to revoke the trusted token:" |
| 186 | +echo " python -m scripts.tokens revoke <id>" |
0 commit comments