-
Notifications
You must be signed in to change notification settings - Fork 11
271 lines (244 loc) · 10.9 KB
/
benchmark.yml
File metadata and controls
271 lines (244 loc) · 10.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
name: Benchmark
on:
workflow_run:
workflows: ["Publish"]
types: [completed]
workflow_dispatch:
inputs:
version:
description: 'Version to benchmark for embedding-only ("dev" for local, or semver like "2.4.0" for npm)'
required: false
default: "dev"
permissions: {}
jobs:
# ── Record benchmark history for the just-published release ──
#
# The build/query/incremental/resolution benchmarks are measured during the
# Publish workflow's pre-publish-benchmark gate (against the just-built
# native artifact). That job uploads the modified history files as an
# artifact only when the regression guard passes — meaning a publish that
# would have regressed is aborted before reaching npm, and no PR is opened
# for an un-published release. This job consumes that artifact and opens a
# single PR with the updates.
record-benchmarks:
runs-on: ubuntu-latest
if: >-
github.event_name == 'workflow_run' &&
github.event.workflow_run.conclusion == 'success' &&
github.event.workflow_run.event != 'push'
permissions:
actions: read
contents: write
pull-requests: write
steps:
- uses: actions/checkout@v6
with:
fetch-depth: 0
ref: main
token: ${{ secrets.GITHUB_TOKEN }}
- name: Download benchmark history artifact
uses: actions/download-artifact@v8
with:
name: benchmark-files
run-id: ${{ github.event.workflow_run.id }}
github-token: ${{ secrets.GITHUB_TOKEN }}
- name: Download benchmark JSON results
uses: actions/download-artifact@v8
with:
name: benchmark-results-json
run-id: ${{ github.event.workflow_run.id }}
github-token: ${{ secrets.GITHUB_TOKEN }}
- name: Determine release version
id: version
run: |
TAG=$(git tag --sort=-version:refname --list 'v[0-9]*.[0-9]*.[0-9]*' | grep -v dev | head -1)
VERSION="${TAG#v}"
echo "version=$VERSION" >> "$GITHUB_OUTPUT"
- name: Check for changes
id: changes
run: |
CHANGED=false
if ! git diff --quiet HEAD -- generated/benchmarks/ README.md 2>/dev/null; then
CHANGED=true
fi
if [ -n "$(git ls-files --others --exclude-standard generated/benchmarks/)" ]; then
CHANGED=true
fi
echo "changed=$CHANGED" >> "$GITHUB_OUTPUT"
- name: Commit and push via PR
if: steps.changes.outputs.changed == 'true'
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
VERSION: ${{ steps.version.outputs.version }}
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
BRANCH="chore/bench-v${VERSION}-$(date +%Y%m%d-%H%M%S)"
git checkout -b "$BRANCH"
git add generated/benchmarks/BUILD-BENCHMARKS.md generated/benchmarks/QUERY-BENCHMARKS.md generated/benchmarks/INCREMENTAL-BENCHMARKS.md README.md
git commit -m "docs: update performance benchmarks (${VERSION})"
git push origin "$BRANCH"
TITLE="docs: update performance benchmarks (${VERSION})"
if gh pr list --state open --json title --jq ".[].title" | grep -qF "$TITLE"; then
echo "::notice::PR already open for '$TITLE' — skipping"
else
gh pr create \
--base main \
--head "$BRANCH" \
--title "$TITLE" \
--body "Automated benchmark history update for **${VERSION}** from publish run [#${{ github.event.workflow_run.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.event.workflow_run.id }}). These numbers were measured during the pre-publish gate and passed the regression guard before npm publish proceeded."
fi
# Engine-parity gate: surfaces wasm/native divergence as a red workflow
# status (does not block — publish has already completed). Runs after
# the doc PR is created so the PR still records data even when parity
# regresses.
- name: Engine parity gate
run: node scripts/benchmark-parity-gate.mjs benchmark-result.json
# ── Embedding benchmark (post-publish, npm-installed package) ──
#
# Embeddings have no regression guard and take 2.5+ hours to run, so they
# cannot fit in the pre-publish path. They run after a successful publish
# against the npm-installed package and open their own PR.
embedding-benchmark:
runs-on: ubuntu-latest
# 7 models x 30 min each = 210 min worst-case; symbols are sampled to 1500 so
# typical runtime is ~23 min/model ≈ 160 min + setup headroom
timeout-minutes: 240
if: >-
github.event_name == 'workflow_dispatch' ||
(github.event.workflow_run.conclusion == 'success' &&
github.event.workflow_run.event != 'push')
permissions:
actions: read
contents: write
pull-requests: write
steps:
- uses: actions/checkout@v6
with:
fetch-depth: 0
ref: main
token: ${{ secrets.GITHUB_TOKEN }}
- uses: actions/setup-node@v6
with:
node-version: "22"
cache: "npm"
- name: Install dependencies
timeout-minutes: 20
run: npm install --prefer-offline --no-audit --no-fund
- name: Determine benchmark mode
id: mode
run: |
if [ "${{ github.event_name }}" = "workflow_run" ]; then
TAG=$(git tag --sort=-version:refname --list 'v[0-9]*.[0-9]*.[0-9]*' | grep -v dev | head -1)
VERSION="${TAG#v}"
echo "source=npm" >> "$GITHUB_OUTPUT"
echo "version=$VERSION" >> "$GITHUB_OUTPUT"
elif [ "${{ inputs.version }}" = "dev" ] || [ -z "${{ inputs.version }}" ]; then
echo "source=local" >> "$GITHUB_OUTPUT"
echo "version=dev" >> "$GITHUB_OUTPUT"
else
echo "source=npm" >> "$GITHUB_OUTPUT"
echo "version=${{ inputs.version }}" >> "$GITHUB_OUTPUT"
fi
- name: Check for existing benchmark
id: existing
run: |
VERSION="${{ steps.mode.outputs.version }}"
VERSION_RE="${VERSION//./\\.}"
if [ "$VERSION" = "dev" ]; then
echo "skip=false" >> "$GITHUB_OUTPUT"
elif grep -qP '"version":\s*"'"$VERSION_RE"'"' generated/benchmarks/EMBEDDING-BENCHMARKS.md 2>/dev/null; then
echo "Benchmark for $VERSION already exists in EMBEDDING-BENCHMARKS.md — skipping"
echo "skip=true" >> "$GITHUB_OUTPUT"
else
echo "skip=false" >> "$GITHUB_OUTPUT"
fi
- name: Wait for npm propagation
if: steps.existing.outputs.skip != 'true' && steps.mode.outputs.source == 'npm'
run: |
VERSION="${{ steps.mode.outputs.version }}"
echo "Waiting for @optave/codegraph@${VERSION} on npm..."
for i in $(seq 1 20); do
if npm view "@optave/codegraph@${VERSION}" version 2>/dev/null; then
echo "Package available on npm"
exit 0
fi
echo " Attempt $i/20 — not yet available, waiting 30s..."
sleep 30
done
echo "::error::Package @optave/codegraph@${VERSION} not found on npm after 10 minutes"
exit 1
- name: Cache HuggingFace models
if: steps.existing.outputs.skip != 'true'
uses: actions/cache@v5
with:
path: ~/.cache/huggingface
key: hf-models-${{ runner.os }}-${{ hashFiles('src/domain/search/**') }}
restore-keys: hf-models-${{ runner.os }}-
- name: Build graph
if: steps.existing.outputs.skip != 'true'
run: npx codegraph build .
- name: Run embedding benchmark
if: steps.existing.outputs.skip != 'true'
timeout-minutes: 160
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: |
STRIP_FLAG=$(node -e "const [M]=process.versions.node.split('.').map(Number); console.log(M>=23?'--strip-types':'--experimental-strip-types')")
ARGS="--version ${{ steps.mode.outputs.version }}"
if [ "${{ steps.mode.outputs.source }}" = "npm" ]; then
ARGS="$ARGS --npm"
fi
node $STRIP_FLAG --import ./scripts/ts-resolve-loader.js scripts/embedding-benchmark.ts $ARGS > embedding-benchmark-result.json
- name: Update embedding report
if: steps.existing.outputs.skip != 'true'
run: |
STRIP_FLAG=$(node -e "const [M]=process.versions.node.split('.').map(Number); console.log(M>=23?'--strip-types':'--experimental-strip-types')")
node $STRIP_FLAG scripts/update-embedding-report.ts embedding-benchmark-result.json
- name: Upload embedding result
if: steps.existing.outputs.skip != 'true'
uses: actions/upload-artifact@v7
with:
name: embedding-benchmark-result
path: embedding-benchmark-result.json
- name: Check for changes
if: steps.existing.outputs.skip != 'true'
id: changes
run: |
CHANGED=false
# Detect modified tracked files
if ! git diff --quiet HEAD -- generated/benchmarks/EMBEDDING-BENCHMARKS.md 2>/dev/null; then
CHANGED=true
fi
# Detect newly created (untracked) files
if [ -n "$(git ls-files --others --exclude-standard generated/benchmarks/EMBEDDING-BENCHMARKS.md)" ]; then
CHANGED=true
fi
echo "changed=$CHANGED" >> "$GITHUB_OUTPUT"
- name: Commit and push via PR
if: steps.existing.outputs.skip != 'true' && steps.changes.outputs.changed == 'true'
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
VERSION: ${{ steps.mode.outputs.version }}
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
if [ "$VERSION" = "dev" ]; then
BRANCH="chore/embedding-bench-dev-$(date +%Y%m%d-%H%M%S)"
else
BRANCH="chore/embedding-bench-v${VERSION}-$(date +%Y%m%d-%H%M%S)"
fi
git checkout -b "$BRANCH"
git add generated/benchmarks/EMBEDDING-BENCHMARKS.md
git commit -m "docs: update embedding benchmarks (${VERSION})"
git push origin "$BRANCH"
TITLE="docs: update embedding benchmarks (${VERSION})"
if gh pr list --state open --json title --jq ".[].title" | grep -qF "$TITLE"; then
echo "::notice::PR already open for '$TITLE' — skipping"
else
gh pr create \
--base main \
--head "$BRANCH" \
--title "$TITLE" \
--body "Automated embedding benchmark update for **${VERSION}** from workflow run [#${{ github.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})."
fi