Skip to content

Commit c52b436

Browse files
authored
ci: skip embedding benchmark on patch releases (#1451)
* ci: skip embedding benchmark on patch releases * ci: fix embedding benchmark cold-start failures due to stale cache key Cache key was hashing src/domain/search/**, which changes on every release even when no models change — causing a full cache miss on each publish run. When models aren't cached, workers must download large files before inference, which exceeds the 1800s per-model timeout (designed for warm runs only). Fix 1: key the HF model cache on scripts/embedding-benchmark.ts instead; models only need re-downloading when the model list in that file changes. Fix 2: expose BENCHMARK_TIMEOUT_MS env var in the script; benchmark.yml sets it to 5400000 (90 min) on a cache miss, 1800000 (30 min) on a hit. * fix(ci): strip pre-release suffix before patch-version integer comparison (#1451) * ci: raise step timeout-minutes to 360 to match job ceiling (#1451)
1 parent 68234c2 commit c52b436

2 files changed

Lines changed: 31 additions & 10 deletions

File tree

.github/workflows/benchmark.yml

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -113,9 +113,10 @@ jobs:
113113
# against the npm-installed package and open their own PR.
114114
embedding-benchmark:
115115
runs-on: ubuntu-latest
116-
# 11 models x 30 min each = 330 min worst-case (caps via TIMEOUT_MS in the
117-
# script); symbols are sampled to 1500 so typical runtime is ~23 min/model
118-
# ≈ 253 min + setup headroom
116+
# Warm run (cached models): 11 models × 30 min = 330 min worst-case.
117+
# Cold start (cache miss): 11 models × 90 min = 990 min — exceeds GitHub's
118+
# 360-min job cap, but cold starts only happen when the model list changes.
119+
# Most releases hit cache and run well within the limit.
119120
timeout-minutes: 360
120121
if: >-
121122
github.event_name == 'workflow_dispatch' ||
@@ -165,11 +166,22 @@ jobs:
165166
VERSION_RE="${VERSION//./\\.}"
166167
if [ "$VERSION" = "dev" ]; then
167168
echo "skip=false" >> "$GITHUB_OUTPUT"
168-
elif grep -qP '"version":\s*"'"$VERSION_RE"'"' generated/benchmarks/EMBEDDING-BENCHMARKS.md 2>/dev/null; then
169-
echo "Benchmark for $VERSION already exists in EMBEDDING-BENCHMARKS.md — skipping"
170-
echo "skip=true" >> "$GITHUB_OUTPUT"
171169
else
172-
echo "skip=false" >> "$GITHUB_OUTPUT"
170+
# Skip patch releases (X.Y.Z where Z > 0) — embedding benchmarks
171+
# only run on major (X.0.0) and minor/feature (X.Y.0) releases.
172+
IFS='.' read -r _MAJOR _MINOR PATCH <<< "$VERSION"
173+
PATCH_NUM="${PATCH%%[^0-9]*}" # strip any pre-release suffix (e.g. "1-rc.1" → "1")
174+
if [ "${PATCH_NUM:-0}" -gt 0 ]; then
175+
echo "Patch release ${VERSION} — skipping embedding benchmark (only runs for major/minor releases)"
176+
echo "skip=true" >> "$GITHUB_OUTPUT"
177+
exit 0
178+
fi
179+
if grep -qP '"version":\s*"'"$VERSION_RE"'"' generated/benchmarks/EMBEDDING-BENCHMARKS.md 2>/dev/null; then
180+
echo "Benchmark for $VERSION already exists in EMBEDDING-BENCHMARKS.md — skipping"
181+
echo "skip=true" >> "$GITHUB_OUTPUT"
182+
else
183+
echo "skip=false" >> "$GITHUB_OUTPUT"
184+
fi
173185
fi
174186
175187
- name: Wait for npm propagation
@@ -190,10 +202,14 @@ jobs:
190202
191203
- name: Cache HuggingFace models
192204
if: steps.existing.outputs.skip != 'true'
205+
id: hf-cache
193206
uses: actions/cache@v5
194207
with:
195208
path: ~/.cache/huggingface
196-
key: hf-models-${{ runner.os }}-${{ hashFiles('src/domain/search/**') }}
209+
# Key on the benchmark script (which owns the model list), not on
210+
# search source. Source changes don't require re-downloading models,
211+
# so the old key caused an unnecessary cache miss on every new release.
212+
key: hf-models-${{ runner.os }}-${{ hashFiles('scripts/embedding-benchmark.ts') }}
197213
restore-keys: hf-models-${{ runner.os }}-
198214

199215
- name: Build graph
@@ -202,9 +218,12 @@ jobs:
202218

203219
- name: Run embedding benchmark
204220
if: steps.existing.outputs.skip != 'true'
205-
timeout-minutes: 300
221+
timeout-minutes: 360
206222
env:
207223
HF_TOKEN: ${{ secrets.HF_TOKEN }}
224+
# On a cache miss models must be downloaded before inference can start.
225+
# Allow 90 min per model for cold starts; warm runs stay at 30 min.
226+
BENCHMARK_TIMEOUT_MS: ${{ steps.hf-cache.outputs.cache-hit == 'true' && '1800000' || '5400000' }}
208227
run: |
209228
STRIP_FLAG=$(node -e "const [M]=process.versions.node.split('.').map(Number); console.log(M>=23?'--strip-types':'--experimental-strip-types')")
210229
ARGS="--version ${{ steps.mode.outputs.version }}"

scripts/embedding-benchmark.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,9 @@ for (const sig of ['SIGINT', 'SIGTERM', 'SIGHUP'] as const) {
196196

197197
const { MODELS } = await import(srcImport(srcDir, 'domain/search/index.js'));
198198

199-
const TIMEOUT_MS = 1_800_000; // 30 min — with symbol sampling, embed (~18 min) + search (~5 min) fits comfortably
199+
// Default: 30 min (warm, models cached). CI sets BENCHMARK_TIMEOUT_MS=5400000
200+
// on a cache miss so cold-start downloads don't kill the worker prematurely.
201+
const TIMEOUT_MS = Number(process.env.BENCHMARK_TIMEOUT_MS) || 1_800_000;
200202
const modelKeys = Object.keys(MODELS);
201203
const results = {};
202204
let symbolCount = 0;

0 commit comments

Comments
 (0)