ops-codegraph-tool/.github/workflows/benchmark.yml at dc922924315f22a24f10e7ca66f3c3254f0c7b3d · optave/ops-codegraph-tool · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
name: Benchmark
on:
  workflow_run:
    workflows: ["Publish"]
    types: [completed]
  workflow_dispatch:
    inputs:
      version:
        description: 'Version to benchmark for embedding-only ("dev" for local, or semver like "2.4.0" for npm)'
        required: false
        default: "dev"

permissions: {}

jobs:
  # ── Record benchmark history for the just-published release ──
  #
  # The build/query/incremental/resolution benchmarks are measured during the
  # Publish workflow's pre-publish-benchmark gate (against the just-built
  # native artifact). That job uploads the modified history files as an
  # artifact only when the regression guard passes — meaning a publish that
  # would have regressed is aborted before reaching npm, and no PR is opened
  # for an un-published release. This job consumes that artifact and opens a
  # single PR with the updates.
  record-benchmarks:
    runs-on: ubuntu-latest
    if: >-
      github.event_name == 'workflow_run' &&
      github.event.workflow_run.conclusion == 'success' &&
      github.event.workflow_run.event != 'push'
    permissions:
      actions: read
      contents: write
      pull-requests: write

    steps:
      - uses: actions/checkout@v6
        with:
          fetch-depth: 0
          ref: main
          token: ${{ secrets.GITHUB_TOKEN }}

      - name: Download benchmark history artifact
        uses: actions/download-artifact@v8
        with:
          name: benchmark-files
          run-id: ${{ github.event.workflow_run.id }}
          github-token: ${{ secrets.GITHUB_TOKEN }}

      - name: Download benchmark JSON results
        uses: actions/download-artifact@v8
        with:
          name: benchmark-results-json
          run-id: ${{ github.event.workflow_run.id }}
          github-token: ${{ secrets.GITHUB_TOKEN }}

      - name: Determine release version
        id: version
        run: |
          TAG=$(git tag --sort=-version:refname --list 'v[0-9]*.[0-9]*.[0-9]*' | grep -v dev | head -1)
          VERSION="${TAG#v}"
          echo "version=$VERSION" >> "$GITHUB_OUTPUT"

      - name: Check for changes
        id: changes
        run: |
          CHANGED=false
          if ! git diff --quiet HEAD -- generated/benchmarks/ README.md 2>/dev/null; then
            CHANGED=true
          fi
          if [ -n "$(git ls-files --others --exclude-standard generated/benchmarks/)" ]; then
            CHANGED=true
          fi
          echo "changed=$CHANGED" >> "$GITHUB_OUTPUT"

      - name: Commit and push via PR
        if: steps.changes.outputs.changed == 'true'
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          VERSION: ${{ steps.version.outputs.version }}
        run: |
          git config user.name "github-actions[bot]"
          git config user.email "github-actions[bot]@users.noreply.github.com"

          BRANCH="chore/bench-v${VERSION}-$(date +%Y%m%d-%H%M%S)"
          git checkout -b "$BRANCH"
          git add generated/benchmarks/BUILD-BENCHMARKS.md generated/benchmarks/QUERY-BENCHMARKS.md generated/benchmarks/INCREMENTAL-BENCHMARKS.md README.md
          git commit -m "docs: update performance benchmarks (${VERSION})"
          git push origin "$BRANCH"

          TITLE="docs: update performance benchmarks (${VERSION})"
          if gh pr list --state open --json title --jq ".[].title" | grep -qF "$TITLE"; then
            echo "::notice::PR already open for '$TITLE' — skipping"
          else
            gh pr create \
              --base main \
              --head "$BRANCH" \
              --title "$TITLE" \
              --body "Automated benchmark history update for **${VERSION}** from publish run [#${{ github.event.workflow_run.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.event.workflow_run.id }}). These numbers were measured during the pre-publish gate and passed the regression guard before npm publish proceeded."
          fi

      # Engine-parity gate: surfaces wasm/native divergence as a red workflow
      # status (does not block — publish has already completed). Runs after
      # the doc PR is created so the PR still records data even when parity
      # regresses.
      - name: Engine parity gate
        run: node scripts/benchmark-parity-gate.mjs benchmark-result.json

  # ── Embedding benchmark (post-publish, npm-installed package) ──
  #
  # Embeddings have no regression guard and take 2.5+ hours to run, so they
  # cannot fit in the pre-publish path. They run after a successful publish
  # against the npm-installed package and open their own PR.
  embedding-benchmark:
    runs-on: ubuntu-latest
    # 7 models x 30 min each = 210 min worst-case; symbols are sampled to 1500 so
    # typical runtime is ~23 min/model ≈ 160 min + setup headroom
    timeout-minutes: 240
    if: >-
      github.event_name == 'workflow_dispatch' ||
      (github.event.workflow_run.conclusion == 'success' &&
       github.event.workflow_run.event != 'push')
    permissions:
      actions: read
      contents: write
      pull-requests: write

    steps:
      - uses: actions/checkout@v6
        with:
          fetch-depth: 0
          ref: main
          token: ${{ secrets.GITHUB_TOKEN }}

      - uses: actions/setup-node@v6
        with:
          node-version: "22"
          cache: "npm"

      - name: Install dependencies
        timeout-minutes: 20
        run: npm install --prefer-offline --no-audit --no-fund

      - name: Determine benchmark mode
        id: mode
        run: |
          if [ "${{ github.event_name }}" = "workflow_run" ]; then
            TAG=$(git tag --sort=-version:refname --list 'v[0-9]*.[0-9]*.[0-9]*' | grep -v dev | head -1)
            VERSION="${TAG#v}"
            echo "source=npm" >> "$GITHUB_OUTPUT"
            echo "version=$VERSION" >> "$GITHUB_OUTPUT"
          elif [ "${{ inputs.version }}" = "dev" ] || [ -z "${{ inputs.version }}" ]; then
            echo "source=local" >> "$GITHUB_OUTPUT"
            echo "version=dev" >> "$GITHUB_OUTPUT"
          else
            echo "source=npm" >> "$GITHUB_OUTPUT"
            echo "version=${{ inputs.version }}" >> "$GITHUB_OUTPUT"
          fi

      - name: Check for existing benchmark
        id: existing
        run: |
          VERSION="${{ steps.mode.outputs.version }}"
          VERSION_RE="${VERSION//./\\.}"
          if [ "$VERSION" = "dev" ]; then
            echo "skip=false" >> "$GITHUB_OUTPUT"
          elif grep -qP '"version":\s*"'"$VERSION_RE"'"' generated/benchmarks/EMBEDDING-BENCHMARKS.md 2>/dev/null; then
            echo "Benchmark for $VERSION already exists in EMBEDDING-BENCHMARKS.md — skipping"
            echo "skip=true" >> "$GITHUB_OUTPUT"
          else
            echo "skip=false" >> "$GITHUB_OUTPUT"
          fi

      - name: Wait for npm propagation
        if: steps.existing.outputs.skip != 'true' && steps.mode.outputs.source == 'npm'
        run: |
          VERSION="${{ steps.mode.outputs.version }}"
          echo "Waiting for @optave/codegraph@${VERSION} on npm..."
          for i in $(seq 1 20); do
            if npm view "@optave/codegraph@${VERSION}" version 2>/dev/null; then
              echo "Package available on npm"
              exit 0
            fi
            echo "  Attempt $i/20 — not yet available, waiting 30s..."
            sleep 30
          done
          echo "::error::Package @optave/codegraph@${VERSION} not found on npm after 10 minutes"
          exit 1

      - name: Cache HuggingFace models
        if: steps.existing.outputs.skip != 'true'
        uses: actions/cache@v5
        with:
          path: ~/.cache/huggingface
          key: hf-models-${{ runner.os }}-${{ hashFiles('src/domain/search/**') }}
          restore-keys: hf-models-${{ runner.os }}-

      - name: Build graph
        if: steps.existing.outputs.skip != 'true'
        run: npx codegraph build .

      - name: Run embedding benchmark
        if: steps.existing.outputs.skip != 'true'
        timeout-minutes: 160
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        run: |
          STRIP_FLAG=$(node -e "const [M]=process.versions.node.split('.').map(Number); console.log(M>=23?'--strip-types':'--experimental-strip-types')")
          ARGS="--version ${{ steps.mode.outputs.version }}"
          if [ "${{ steps.mode.outputs.source }}" = "npm" ]; then
            ARGS="$ARGS --npm"
          fi
          node $STRIP_FLAG --import ./scripts/ts-resolve-loader.js scripts/embedding-benchmark.ts $ARGS > embedding-benchmark-result.json

      - name: Update embedding report
        if: steps.existing.outputs.skip != 'true'
        run: |
          STRIP_FLAG=$(node -e "const [M]=process.versions.node.split('.').map(Number); console.log(M>=23?'--strip-types':'--experimental-strip-types')")
          node $STRIP_FLAG scripts/update-embedding-report.ts embedding-benchmark-result.json

      - name: Upload embedding result
        if: steps.existing.outputs.skip != 'true'
        uses: actions/upload-artifact@v7
        with:
          name: embedding-benchmark-result
          path: embedding-benchmark-result.json

      - name: Check for changes
        if: steps.existing.outputs.skip != 'true'
        id: changes
        run: |
          CHANGED=false
          # Detect modified tracked files
          if ! git diff --quiet HEAD -- generated/benchmarks/EMBEDDING-BENCHMARKS.md 2>/dev/null; then
            CHANGED=true
          fi
          # Detect newly created (untracked) files
          if [ -n "$(git ls-files --others --exclude-standard generated/benchmarks/EMBEDDING-BENCHMARKS.md)" ]; then
            CHANGED=true
          fi
          echo "changed=$CHANGED" >> "$GITHUB_OUTPUT"

      - name: Commit and push via PR
        if: steps.existing.outputs.skip != 'true' && steps.changes.outputs.changed == 'true'
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          VERSION: ${{ steps.mode.outputs.version }}
        run: |
          git config user.name "github-actions[bot]"
          git config user.email "github-actions[bot]@users.noreply.github.com"

          if [ "$VERSION" = "dev" ]; then
            BRANCH="chore/embedding-bench-dev-$(date +%Y%m%d-%H%M%S)"
          else
            BRANCH="chore/embedding-bench-v${VERSION}-$(date +%Y%m%d-%H%M%S)"
          fi
          git checkout -b "$BRANCH"
          git add generated/benchmarks/EMBEDDING-BENCHMARKS.md
          git commit -m "docs: update embedding benchmarks (${VERSION})"
          git push origin "$BRANCH"

          TITLE="docs: update embedding benchmarks (${VERSION})"
          if gh pr list --state open --json title --jq ".[].title" | grep -qF "$TITLE"; then
            echo "::notice::PR already open for '$TITLE' — skipping"
          else
            gh pr create \
              --base main \
              --head "$BRANCH" \
              --title "$TITLE" \
              --body "Automated embedding benchmark update for **${VERSION}** from workflow run [#${{ github.run_number }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})."
          fi