1+ name : SpeedBench AL Collection
2+
3+ # Push-button (workflow_dispatch) collection of the DeepSeek-V4-Pro SPEED-Bench
4+ # acceptance-length (AL) matrix: thinking_on/off x MTP levels. Produces the
5+ # golden reference consumed by the synthetic-acceptance framework and (optionally)
6+ # opens a PR updating benchmarks/speedbench-reference-al.yaml.
7+
8+ on :
9+ workflow_dispatch :
10+ inputs :
11+ runner :
12+ description : " Self-hosted GPU runner label (B300)"
13+ required : false
14+ type : string
15+ default : ' b300'
16+ image :
17+ description : " vLLM container image"
18+ required : false
19+ type : string
20+ default : ' vllm/vllm-openai:v0.21.0'
21+ mtp-list :
22+ description : " Space-separated MTP levels (num_speculative_tokens)"
23+ required : false
24+ type : string
25+ default : ' 1 2 3 4 5 6 7 8'
26+ thinking-modes :
27+ description : " Space-separated thinking modes to collect"
28+ required : false
29+ type : string
30+ default : ' off on'
31+ category :
32+ description : " SPEED-Bench category"
33+ required : false
34+ type : string
35+ default : ' coding'
36+ output-len :
37+ description : " Per-request output length"
38+ required : false
39+ type : string
40+ default : ' 4096'
41+ thinking-kwargs :
42+ description : " chat_template_kwargs JSON for thinking-on cells (match golden config)"
43+ required : false
44+ type : string
45+ default : ' {"thinking": true, "reasoning_effort": "high"}'
46+ salloc-time :
47+ description : " Slurm allocation minutes (16 server starts ~ several hours)"
48+ required : false
49+ type : string
50+ default : ' 480'
51+ open-pr :
52+ description : " Open a PR updating benchmarks/speedbench-reference-al.yaml"
53+ required : false
54+ type : boolean
55+ default : true
56+ ref :
57+ description : " Git ref (branch/sha) to checkout"
58+ required : false
59+ type : string
60+
61+ permissions :
62+ contents : read
63+
64+ env :
65+ HF_TOKEN : ${{ secrets.HF_TOKEN }}
66+ HF_HUB_CACHE : ' /mnt/hf_hub_cache/'
67+ # Drive the dsv4 single-node path in runners/launch_b300-nv.sh. MODEL is the
68+ # HF id; its basename (DeepSeek-V4-Pro) is in the launcher's STAGED_MODELS, so
69+ # the launcher resolves MODEL_PATH to the pre-staged local weights and mounts
70+ # them. The collector serves from MODEL_PATH (see SERVE_MODEL), so no download.
71+ MODEL : deepseek-ai/DeepSeek-V4-Pro
72+ MODEL_PREFIX : dsv4
73+ PRECISION : fp4
74+ FRAMEWORK : vllm
75+ EXP_NAME : dsv4_speedbench
76+ IMAGE : ${{ inputs.image }}
77+ TP : ' 8'
78+ EP_SIZE : ' 1'
79+ DP_ATTENTION : ' false'
80+ SPEC_DECODING : mtp
81+ # Run the AL-matrix collector instead of the auto-selected throughput script.
82+ BENCH_SCRIPT_OVERRIDE : benchmarks/single_node/dsv4_fp4_b300_vllm_speedbench_matrix.sh
83+ SALLOC_TIME_LIMIT : ${{ inputs.salloc-time }}
84+ # Matrix-collector tunables (propagated into the container via srun --export=ALL).
85+ MTP_LIST : ${{ inputs.mtp-list }}
86+ THINKING_MODES : ${{ inputs.thinking-modes }}
87+ CATEGORY : ${{ inputs.category }}
88+ SPEEDBENCH_OUTPUT_LEN : ${{ inputs.output-len }}
89+ CHAT_TEMPLATE_KWARGS_ON : ${{ inputs.thinking-kwargs }}
90+ OUT_YAML : /workspace/speedbench-reference-al.yaml
91+ PYTHONDONTWRITEBYTECODE : ' 1'
92+ PYTHONPYCACHEPREFIX : /tmp/inferencex-pycache
93+
94+ jobs :
95+ collect-al :
96+ runs-on : ${{ inputs.runner }}
97+ timeout-minutes : 600
98+ name : " SpeedBench AL matrix | ${{ inputs.category }} | mtp=[${{ inputs.mtp-list }}] | thinking=[${{ inputs.thinking-modes }}]"
99+ steps :
100+ - name : Resource cleanup (pre-run)
101+ run : &resource-cleanup |
102+ # Cleanup Docker resources
103+ if command -v docker >/dev/null 2>&1 && docker info >/dev/null 2>&1; then
104+ echo "[Docker] Cleaning up resources ..."
105+ docker ps -aq | xargs -r docker rm -f
106+ docker network prune -f
107+ while [ -n "$(docker ps -aq)" ]; do
108+ docker ps -a
109+ sleep 5
110+ done
111+ fi
112+
113+ # Cleanup SLURM resources
114+ if command -v squeue >/dev/null 2>&1; then
115+ echo "[Slurm] Cleaning up jobs with name : ${{ runner.name }} ..."
116+ scancel --name="${{ runner.name }}" || true
117+ while [ -n "$(squeue --name='${{ runner.name }}' --noheader --format='%i')" ]; do
118+ squeue --name="${{ runner.name }}"
119+ sleep 5
120+ done
121+ fi
122+
123+ # Cleanup AL-matrix outputs from a prior job on this runner so a stale
124+ # matrix from a previous run is never picked up as this job's output.
125+ rm -rf "${{ github.workspace }}/speedbench_results" 2>/dev/null || true
126+
127+ - uses : actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
128+ with :
129+ token : ${{ secrets.REPO_PAT }}
130+ fetch-depth : 0
131+ ref : ${{ inputs.ref || github.sha }}
132+ clean : true
133+ submodules : true
134+
135+ - name : Cleanup stale outputs (pre-run)
136+ run : |
137+ rm -f speedbench-reference-al.yaml || true
138+ rm -f gpu_metrics.csv || true
139+ rm -rf speed_bench_data || true
140+
141+ - name : Collect AL matrix
142+ env :
143+ RUNNER_NAME : ${{ runner.name }}
144+ run : |
145+ set -euo pipefail
146+ bash ./runners/launch_${RUNNER_NAME%%_*}.sh
147+
148+ if [ ! -f "speedbench-reference-al.yaml" ]; then
149+ echo "AL collection failed: speedbench-reference-al.yaml not produced." >&2
150+ exit 1
151+ fi
152+ echo "### SpeedBench AL matrix" >> "$GITHUB_STEP_SUMMARY"
153+ echo '```yaml' >> "$GITHUB_STEP_SUMMARY"
154+ cat speedbench-reference-al.yaml >> "$GITHUB_STEP_SUMMARY"
155+ echo '```' >> "$GITHUB_STEP_SUMMARY"
156+
157+ - name : Upload AL matrix artifact
158+ if : always()
159+ uses : actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
160+ with :
161+ name : speedbench-reference-al
162+ path : speedbench-reference-al.yaml
163+ if-no-files-found : warn
164+
165+ - name : Open PR updating reference yaml
166+ if : ${{ inputs.open-pr && success() }}
167+ env :
168+ GH_TOKEN : ${{ secrets.REPO_PAT }}
169+ run : |
170+ set -euo pipefail
171+ cp speedbench-reference-al.yaml benchmarks/speedbench-reference-al.yaml
172+
173+ BRANCH="speedbench-al/auto-${{ github.run_id }}"
174+ git config user.name "github-actions"
175+ git config user.email "github-actions@github.com"
176+ git checkout -b "$BRANCH"
177+ git add benchmarks/speedbench-reference-al.yaml
178+ if git diff --cached --quiet; then
179+ echo "No change in reference yaml; skipping PR."
180+ exit 0
181+ fi
182+ git commit -m "Update SpeedBench AL reference matrix (auto, run ${{ github.run_id }})"
183+ git push -u origin "$BRANCH"
184+ gh pr create \
185+ --title "Update SpeedBench AL reference matrix (auto)" \
186+ --body "Auto-generated by the SpeedBench AL Collection workflow (run ${{ github.run_id }}). Category: \`${{ inputs.category }}\`, MTP: \`${{ inputs.mtp-list }}\`, thinking: \`${{ inputs.thinking-modes }}\`, output_len: \`${{ inputs.output-len }}\`. Please review the measured values before merging." \
187+ --base main \
188+ --head "$BRANCH"
189+
190+ - name : Upload server logs
191+ if : always()
192+ uses : actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
193+ with :
194+ name : speedbench_server_logs
195+ path : speedbench_results/server_*.log
196+ if-no-files-found : ignore
197+
198+ - name : Resource cleanup (post-run)
199+ if : always()
200+ run : *resource-cleanup
0 commit comments