Semantic-Next/.github/workflows/benchmarks.yml at main · Semantic-Org/Semantic-Next · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
name: Benchmarks

on:
  pull_request:
    paths:
      # perf-sensitive code — the measurement target
      - 'packages/**'
      # bench workflow itself — changes here take effect on this PR's own
      # run (pull_request uses PR-head YAML), so self-test
      - '.github/workflows/benchmarks.yml'
      # Bench source (bench-*.js, tachometer-ci*.json, build-ci.js) and
      # reporter (tools/ci/bench/reporter/**) are deliberately NOT listed here.
      # The discover and benchmarks jobs overlay main's copy of both before
      # every run, so PR-side edits have no effect on the PR's own comment.
      # Harness changes land on main in a separate PR; test-taker cannot
      # author the test.
  # Bench every merge commit on main so bench-history.json accumulates
  # absolute CIs per commit. These runs aren't gated against anything —
  # the report workflow branches on event type and appends history instead
  # of posting a PR comment.
  push:
    branches: [main]
    paths:
      - 'packages/**'

concurrency:
  group: bench-${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: read

jobs:
  # Discover which tachometer configs are in changed packages.
  # Emits one matrix entry per tachometer-ci*.json so each runs in its own
  # parallel job with its own PR check.
  discover:
    runs-on: ubuntu-latest
    outputs:
      matrix: ${{ steps.matrix.outputs.result }}
      has-benchmarks: ${{ steps.matrix.outputs.has-benchmarks }}
    steps:
      - uses: actions/checkout@v6
        with:
          fetch-depth: 0

      - uses: actions/setup-node@v6
        with:
          node-version-file: '.node-version'

      # Pin bench harness to main. Matrix composition (tools/ci/bench/matrix,
      # bench source, tachometer configs, reporter) is author-neutral —
      # test-taker can't widen or narrow its own scope, rewrite discover.js,
      # or add a rigged config. Harness changes land on main first.
      - name: Overlay bench harness from main
        run: |
          git fetch origin main --depth=1
          git checkout origin/main -- packages/*/bench/ tools/ci/bench/reporter/ tools/ci/bench/matrix/ 2>/dev/null || true

      # Collect the PR's changed-file set vs base; discover.js filters the
      # matrix to configs whose owning package's @semantic-ui/* dep closure
      # intersects the diff. Push to main writes an empty file — discover.js
      # falls through to "run everything" for bench-history continuity.
      - name: Collect changed files
        run: |
          if [ '${{ github.event_name }}' = 'pull_request' ]; then
            git fetch origin ${{ github.event.pull_request.base.ref }} --depth=1
            git diff --name-only FETCH_HEAD...HEAD > /tmp/changed-files.txt
          else
            : > /tmp/changed-files.txt
          fi
          echo 'Changed files:'
          cat /tmp/changed-files.txt

      - name: Build matrix
        id: matrix
        env:
          CHANGED_FILES_PATH: /tmp/changed-files.txt
        run: node tools/ci/bench/matrix/discover.js

  benchmarks:
    needs: discover
    if: needs.discover.outputs.has-benchmarks == 'true'
    strategy:
      fail-fast: false
      matrix:
        entry: ${{ fromJson(needs.discover.outputs.matrix) }}
    name: bench-${{ matrix.entry.name }}
    runs-on: ubuntu-latest
    # With amplified metrics (Bench: Amplify… commits), a typical run lands
    # at 12-14 min — initial sampling dominates, ~9 min at N=50 × 2 benches ×
    # N metrics, then auto-sample usually converges in 2-3 min. 20-min cap
    # gives a few minutes of margin over the observed ceiling.
    timeout-minutes: 20

    steps:
      - uses: actions/checkout@v6

      - uses: actions/setup-node@v6
        with:
          node-version-file: '.node-version'
          cache: 'npm'
          cache-dependency-path: package-lock.json

      - name: Install dependencies
        run: npm ci

      - name: Install matching chromedriver
        run: |
          CHROME_VERSION=$(google-chrome --version | grep -oP '\d+' | head -1)
          npm install chromedriver@${CHROME_VERSION} --no-save

      # Pin bench harness to main for both current and baseline builds.
      # The PR only contributes packages/*/src/; bench code, tachometer
      # configs, and build-ci.js always come from main so the measurement
      # is author-neutral.
      - name: Overlay bench harness from main
        run: |
          git fetch origin main --depth=1
          git checkout origin/main -- packages/*/bench/ tools/ci/bench/reporter/ 2>/dev/null || true

      # Build current branch benchmark bundles
      - name: Build current
        run: node packages/${{ matrix.entry.package }}/bench/tachometer/build-ci.js current

      # Swap source to the appropriate baseline and build baseline bundles.
      # PR: baseline = base branch tip.
      # Push to main: baseline = this commit's parent (so the delta captures
      #   the merged commit's effect; bench-history indexes the current
      #   commit's absolute CI alongside the within-session percent-delta).
      #
      # Resolve baseline SHA inline so it can be written to the artifact as
      # a sidecar (baseline-sha.txt). The reporter pins each metric's
      # percent_delta_ci to that SHA — required for cross-iteration drift
      # detection (see tools/ci/bench/reporter/reporter.js:computeBaselineDrift).
      - name: Build baseline
        run: |
          if [ '${{ github.event_name }}' = 'push' ]; then
            # Fetch enough history to reach the parent commit locally.
            git fetch origin main --depth=2
            BASELINE_SHA=$(git rev-parse HEAD~1)
            git checkout HEAD~1 -- packages/*/src/
          else
            git fetch origin ${{ github.event.pull_request.base.ref }} --depth=1
            BASELINE_SHA=$(git rev-parse FETCH_HEAD)
            git checkout FETCH_HEAD -- packages/*/src/
          fi
          node packages/${{ matrix.entry.package }}/bench/tachometer/build-ci.js baseline
          git checkout HEAD -- packages/*/src/
          mkdir -p results
          echo "$BASELINE_SHA" > results/baseline-sha.txt

      # Run just this matrix cell's single config.
      # Per-cell auto-sample tail is governed by the config's own `timeout`
      # (tachometer rejects --timeout alongside --config). Hard runaway
      # protection is `timeout-minutes: 10` at the job level above.
      - name: Run benchmark
        run: |
          mkdir -p results
          npx tachometer \
            --config "${{ matrix.entry.config }}" \
            --json-file "results/${{ matrix.entry.package }}-${{ matrix.entry.name }}.json"

      - name: Upload results
        uses: actions/upload-artifact@v7
        with:
          name: results-${{ matrix.entry.name }}
          # Include baseline-sha.txt sidecar — the reporter and history
          # archiver read it to pin percent_delta_ci entries to their
          # baseline SHA.
          path: |
            results/*.json
            results/baseline-sha.txt