|
| 1 | +/** |
| 2 | + * Apples-to-apples benchmark against the turbopuffer `vector-10m` workload |
| 3 | + * spec, scaled to 1M Cohere Wikipedia embeddings. |
| 4 | + * |
| 5 | + * Dataset: Cohere/wikipedia-2023-11-embed-multilingual-v3 (1M × 1024-dim) |
| 6 | + * Metric: cosine |
| 7 | + * topK: 10 |
| 8 | + * Queries: random unit vectors (NOT from dataset, matches tpuf) |
| 9 | + * |
| 10 | + * Cold mode: fresh AsyncBuffer per query (no metadata cache, no binary |
| 11 | + * prefetch). Mirrors tpuf's `disable_cache: true`. |
| 12 | + * Warm mode: single client; metadata parsed once and binary prefetched once |
| 13 | + * before any query is scored. Mirrors tpuf's warm-cache namespace. |
| 14 | + * |
| 15 | + * Recall@10 is also reported (tpuf reports latency only) so the config we |
| 16 | + * pick can't cheat by trading recall for speed. |
| 17 | + * |
| 18 | + * node --max-old-space-size=8192 scripts/bench-tpuf.js [url] [local-file] [n-cold] [n-warm] |
| 19 | + * |
| 20 | + * Defaults: |
| 21 | + * url https://s3.hyperparam.app/tpuf-bench/tpuf-bench-1m.parquet |
| 22 | + * local data/tpuf-bench-1000k.parquet |
| 23 | + * n-cold 30 (each cold query is a fresh CloudFront fetch — keep small) |
| 24 | + * n-warm 200 (per-config; sampled over the entire sweep) |
| 25 | + */ |
| 26 | +// Force IPv4: home network advertises IPv6 to CloudFront but drops packets. |
| 27 | +import dns from 'node:dns' |
| 28 | +const origLookup = dns.lookup |
| 29 | +// @ts-expect-error overload signature |
| 30 | +dns.lookup = (hostname, opts, cb) => { |
| 31 | + if (typeof opts === 'function') { cb = opts; opts = {} } |
| 32 | + return origLookup.call(dns, hostname, { ...opts ?? {}, family: 4 }, cb) |
| 33 | +} |
| 34 | + |
| 35 | +import { |
| 36 | + asyncBufferFromFile, asyncBufferFromUrl, cachedAsyncBuffer, parquetMetadataAsync, parquetRead, |
| 37 | +} from 'hyparquet' |
| 38 | +import { defaultVectorColumn } from '../src/constants.js' |
| 39 | +import { prefetchBinary } from '../src/prefetch.js' |
| 40 | +import { searchVectors } from '../src/searchVectors.js' |
| 41 | +import { dotProduct, l2Normalize, parseKvMetadata, unpackFloat32 } from '../src/utils.js' |
| 42 | + |
| 43 | +const URL = process.argv[2] ?? 'https://s3.hyperparam.app/tpuf-bench/tpuf-bench-1m.parquet' |
| 44 | +const LOCAL = process.argv[3] ?? 'data/tpuf-bench-1000k.parquet' |
| 45 | +const N_COLD = parseInt(process.argv[4] ?? '30', 10) |
| 46 | +const N_WARM = parseInt(process.argv[5] ?? '200', 10) |
| 47 | +const TOP_K = 10 |
| 48 | + |
| 49 | +// Configurations to sweep. Each is run in both cold and warm modes so we can |
| 50 | +// see the recall/latency frontier and pick a fair "apples-to-apples" pick. |
| 51 | +const CONFIGS = [ |
| 52 | + { name: 'rerank=10 probe=0.10', rerankFactor: 10, probe: 0.10 }, |
| 53 | + { name: 'rerank=10 probe=0.25', rerankFactor: 10, probe: 0.25 }, |
| 54 | + { name: 'rerank=100 probe=0.10', rerankFactor: 100, probe: 0.10 }, |
| 55 | + { name: 'rerank=100 probe=0.25', rerankFactor: 100, probe: 0.25 }, |
| 56 | +] |
| 57 | + |
| 58 | +console.log(`URL: ${URL}`) |
| 59 | +console.log(`Local: ${LOCAL}`) |
| 60 | +console.log(`Queries: cold=${N_COLD}/config, warm=${N_WARM}/config, topK=${TOP_K}`) |
| 61 | + |
| 62 | +const local = await asyncBufferFromFile(LOCAL) |
| 63 | +const localMeta = await parquetMetadataAsync(local) |
| 64 | +const hv = parseKvMetadata(localMeta) |
| 65 | +console.log(`File: ${hv.count.toLocaleString()} × ${hv.dimension}-dim, metric=${hv.metric}, clusters=${hv.clusters}`) |
| 66 | + |
| 67 | +// Random unit query vectors — matches tpuf's pseudorandom template. |
| 68 | +let lcg = 1 |
| 69 | +function rand() { |
| 70 | + lcg = Math.imul(lcg, 1664525) + 1013904223 >>> 0 |
| 71 | + return lcg / 0x100000000 |
| 72 | +} |
| 73 | +function randomUnit(dim) { |
| 74 | + const v = new Float32Array(dim) |
| 75 | + for (let i = 0; i < dim; i += 1) v[i] = rand() * 2 - 1 |
| 76 | + return l2Normalize(v) |
| 77 | +} |
| 78 | +const nQueries = Math.max(N_COLD, N_WARM) |
| 79 | +console.log(`\nGenerating ${nQueries} random ${hv.dimension}-dim unit queries...`) |
| 80 | +const queries = Array.from({ length: nQueries }, () => randomUnit(hv.dimension)) |
| 81 | + |
| 82 | +// Brute-force ground truth — heavy at this scale, so we limit to the cold |
| 83 | +// sample size. Warm reuses ground truth for queries < N_COLD; queries beyond |
| 84 | +// that count only toward latency, not recall (still statistically valid). |
| 85 | +const N_GT = Math.min(N_COLD, nQueries) |
| 86 | +console.log(`Computing brute-force ground truth for ${N_GT} queries (~50s)...`) |
| 87 | +const tGT = performance.now() |
| 88 | +/** @type {Array<{score: number, idx: number}[]>} */ |
| 89 | +const groundTruthHeaps = Array.from({ length: N_GT }, () => []) |
| 90 | +let rowsScored = 0 |
| 91 | +await parquetRead({ |
| 92 | + file: local, |
| 93 | + metadata: localMeta, |
| 94 | + columns: [defaultVectorColumn], |
| 95 | + onChunk: ({ columnData, rowStart }) => { |
| 96 | + for (let r = 0; r < columnData.length; r += 1) { |
| 97 | + const vec = unpackFloat32(columnData[r]) |
| 98 | + const rowIdx = rowStart + r |
| 99 | + for (let q = 0; q < N_GT; q += 1) { |
| 100 | + const score = dotProduct(queries[q], vec) |
| 101 | + const heap = groundTruthHeaps[q] |
| 102 | + if (heap.length < TOP_K) { |
| 103 | + heap.push({ score, idx: rowIdx }) |
| 104 | + if (heap.length === TOP_K) heap.sort((a, b) => a.score - b.score) |
| 105 | + } else if (score > heap[0].score) { |
| 106 | + heap[0] = { score, idx: rowIdx } |
| 107 | + heap.sort((a, b) => a.score - b.score) |
| 108 | + } |
| 109 | + } |
| 110 | + } |
| 111 | + rowsScored += columnData.length |
| 112 | + if (rowsScored % 50000 < columnData.length) { |
| 113 | + process.stdout.write(`\r ${rowsScored.toLocaleString()} rows scored `) |
| 114 | + } |
| 115 | + }, |
| 116 | +}) |
| 117 | +process.stdout.write('\n') |
| 118 | +const groundTruth = groundTruthHeaps.map(h => new Set(h.map(e => e.idx))) |
| 119 | +console.log(` ground truth in ${((performance.now() - tGT) / 1000).toFixed(1)}s`) |
| 120 | + |
| 121 | +/** |
| 122 | + * @param {{rowIndex: number}[]} results |
| 123 | + * @param {Set<number>} truth |
| 124 | + * @returns {number} |
| 125 | + */ |
| 126 | +function recallAt(results, truth) { |
| 127 | + let matches = 0 |
| 128 | + for (const r of results) if (truth.has(r.rowIndex)) matches += 1 |
| 129 | + return matches / truth.size |
| 130 | +} |
| 131 | + |
| 132 | +/** |
| 133 | + * @param {number[]} arr sorted ascending |
| 134 | + * @param {number} q quantile in [0,1] |
| 135 | + * @returns {number} |
| 136 | + */ |
| 137 | +function pct(arr, q) { return arr[Math.min(arr.length - 1, Math.floor(q * arr.length))] } |
| 138 | + |
| 139 | +/** |
| 140 | + * @param {string} label |
| 141 | + * @param {number[]} times |
| 142 | + * @param {number[]} recalls |
| 143 | + */ |
| 144 | +function printRow(label, times, recalls) { |
| 145 | + times.sort((a, b) => a - b) |
| 146 | + const avgRecall = recalls.length ? recalls.reduce((a, b) => a + b, 0) / recalls.length : NaN |
| 147 | + const recallStr = Number.isFinite(avgRecall) ? `${(avgRecall * 100).toFixed(1).padStart(5)}%` : ' n/a' |
| 148 | + console.log( |
| 149 | + ` ${label.padEnd(28)} ` + |
| 150 | + `p50=${pct(times, 0.5).toFixed(0).padStart(5)}ms ` + |
| 151 | + `p90=${pct(times, 0.9).toFixed(0).padStart(5)}ms ` + |
| 152 | + `p99=${pct(times, 0.99).toFixed(0).padStart(5)}ms ` + |
| 153 | + `recall(n=${String(recalls.length).padStart(3)})=${recallStr}` |
| 154 | + ) |
| 155 | +} |
| 156 | + |
| 157 | +// ── COLD: fresh AsyncBuffer per query, per config ──────────────────────── |
| 158 | +console.log('\n=== COLD (fresh CloudFront client per query) ===') |
| 159 | +for (const cfg of CONFIGS) { |
| 160 | + const times = [] |
| 161 | + const recalls = [] |
| 162 | + for (let q = 0; q < N_COLD; q += 1) { |
| 163 | + const raw = await asyncBufferFromUrl({ url: URL }) |
| 164 | + const cached = cachedAsyncBuffer(raw) |
| 165 | + const t0 = performance.now() |
| 166 | + const hits = await searchVectors({ |
| 167 | + source: cached, query: queries[q], topK: TOP_K, |
| 168 | + rerankFactor: cfg.rerankFactor, probe: cfg.probe, |
| 169 | + }) |
| 170 | + times.push(performance.now() - t0) |
| 171 | + recalls.push(recallAt(hits, groundTruth[q])) |
| 172 | + process.stdout.write(`\r ${cfg.name}: ${q + 1}/${N_COLD} `) |
| 173 | + } |
| 174 | + process.stdout.write('\n') |
| 175 | + printRow(cfg.name, times, recalls) |
| 176 | +} |
| 177 | + |
| 178 | +// ── WARM: single client + prefetched binary, sweep configs ────────────── |
| 179 | +console.log('\n=== WARM (single client + prefetched binary) ===') |
| 180 | +const warmRaw = await asyncBufferFromUrl({ url: URL }) |
| 181 | +const warmCached = cachedAsyncBuffer(warmRaw) |
| 182 | +const warmMeta = await parquetMetadataAsync(warmCached) |
| 183 | +const tPrefetch = performance.now() |
| 184 | +const warmBin = await prefetchBinary({ source: warmCached, metadata: warmMeta }) |
| 185 | +console.log(` setup: metadata + ${(warmBin.byteLength / 1e6).toFixed(0)} MB binary in ${((performance.now() - tPrefetch) / 1000).toFixed(2)}s`) |
| 186 | +for (const cfg of CONFIGS) { |
| 187 | + const times = [] |
| 188 | + const recalls = [] |
| 189 | + for (let q = 0; q < N_WARM; q += 1) { |
| 190 | + const t0 = performance.now() |
| 191 | + const hits = await searchVectors({ |
| 192 | + source: warmCached, metadata: warmMeta, binary: warmBin, |
| 193 | + query: queries[q], topK: TOP_K, |
| 194 | + rerankFactor: cfg.rerankFactor, probe: cfg.probe, |
| 195 | + }) |
| 196 | + times.push(performance.now() - t0) |
| 197 | + if (q < N_GT) recalls.push(recallAt(hits, groundTruth[q])) |
| 198 | + process.stdout.write(`\r ${cfg.name}: ${q + 1}/${N_WARM} `) |
| 199 | + } |
| 200 | + process.stdout.write('\n') |
| 201 | + printRow(cfg.name, times, recalls) |
| 202 | +} |
| 203 | + |
| 204 | +console.log('\nReference (tpuf vector-1m, 768-dim, c2-standard-30 in GCP us-central1):') |
| 205 | +console.log(' warm: p50= 8ms p90= 10ms p99= 35ms') |
| 206 | +console.log(' cold: p50=343ms p90=444ms p99=554ms') |
| 207 | +console.log('Reference (tpuf vector-10m, 1024-dim — for scale, 10× our N):') |
| 208 | +console.log(' warm: p50= 14ms p90= 17ms p99= 27ms') |
| 209 | +console.log(' cold: p50=874ms p90=1214ms p99=1686ms') |
0 commit comments