-
Notifications
You must be signed in to change notification settings - Fork 1k
Expand file tree
/
Copy pathllm-benchmark-validate-goldens.yml
More file actions
78 lines (64 loc) · 1.96 KB
/
llm-benchmark-validate-goldens.yml
File metadata and controls
78 lines (64 loc) · 1.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
name: Validate LLM benchmark golden answers
on:
schedule:
# Nightly at 2 AM UTC
- cron: '0 2 * * *'
workflow_dispatch: {}
permissions:
contents: read
concurrency:
group: llm-benchmark-validate-goldens
cancel-in-progress: true
jobs:
validate-goldens:
runs-on: spacetimedb-new-runner
container:
image: localhost:5000/spacetimedb-ci:latest
options: >-
--privileged
timeout-minutes: 60
strategy:
fail-fast: false
matrix:
lang: [rust, csharp, typescript]
steps:
- name: Install spacetime CLI
run: |
curl -sSf https://install.spacetimedb.com | sh -s -- -y
echo "$HOME/.local/bin" >> $GITHUB_PATH
- name: Checkout master
uses: actions/checkout@v4
with:
ref: master
fetch-depth: 1
- uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2
- name: Setup .NET SDK
if: matrix.lang == 'csharp'
uses: actions/setup-dotnet@v4
with:
dotnet-version: "8.0.x"
- name: Install WASI workload
if: matrix.lang == 'csharp'
env:
DOTNET_MULTILEVEL_LOOKUP: "0"
DOTNET_CLI_HOME: ${{ runner.temp }}/dotnet-home
DOTNET_SKIP_FIRST_TIME_EXPERIENCE: "1"
run: |
dotnet workload install wasi-experimental --skip-manifest-update --disable-parallel
- name: Set up Node.js
if: matrix.lang == 'typescript'
uses: actions/setup-node@v4
with:
node-version: 22
- name: Install pnpm
if: matrix.lang == 'typescript'
uses: pnpm/action-setup@v4
- name: Build llm-benchmark tool
run: cargo install --path tools/xtask-llm-benchmark --locked
- name: Validate golden answers (${{ matrix.lang }})
env:
MSBUILDDISABLENODEREUSE: "1"
DOTNET_CLI_USE_MSBUILD_SERVER: "0"
run: |
llm_benchmark run --goldens-only --lang ${{ matrix.lang }}