11name : SpeedBench AL Collection
22
3- # Push-button (workflow_dispatch) collection of the DeepSeek-V4-Pro SPEED-Bench
4- # acceptance-length (AL) matrix: thinking_on/off x MTP levels. Produces the
5- # golden reference consumed by the synthetic-acceptance framework and (optionally)
6- # opens a PR updating benchmarks/speedbench-reference-al.yaml.
3+ # Push-button (workflow_dispatch) collection of a SPEED-Bench acceptance-length
4+ # (AL) matrix: thinking_on/off x MTP levels, for the given model (defaults to
5+ # DeepSeek-V4-Pro). Produces the golden reference consumed by the
6+ # synthetic-acceptance framework and (optionally) opens a PR updating
7+ # benchmarks/speedbench-reference-al.yaml.
78
89on :
910 workflow_dispatch :
1314 required : false
1415 type : string
1516 default : ' b300'
17+ model :
18+ description : " HF model id (basename must be in launcher STAGED_MODELS for pre-staged local weights)"
19+ required : false
20+ type : string
21+ default : ' deepseek-ai/DeepSeek-V4-Pro'
22+ model-prefix :
23+ description : " Model prefix; drives launcher MODEL_PATH resolution, exp name, collector script, and artifact names"
24+ required : false
25+ type : string
26+ default : ' dsv4'
1627 image :
1728 description : " vLLM container image"
1829 required : false
@@ -64,22 +75,22 @@ permissions:
6475env :
6576 HF_TOKEN : ${{ secrets.HF_TOKEN }}
6677 HF_HUB_CACHE : ' /mnt/hf_hub_cache/'
67- # Drive the dsv4 single-node path in runners/launch_b300-nv.sh. MODEL is the
68- # HF id; its basename (DeepSeek-V4-Pro) is in the launcher's STAGED_MODELS, so
78+ # Drive the single-node path in runners/launch_b300-nv.sh. MODEL is the HF id;
79+ # its basename (e.g. DeepSeek-V4-Pro) must be in the launcher's STAGED_MODELS so
6980 # the launcher resolves MODEL_PATH to the pre-staged local weights and mounts
7081 # them. The collector serves from MODEL_PATH (see SERVE_MODEL), so no download.
71- MODEL : deepseek-ai/DeepSeek-V4-Pro
72- MODEL_PREFIX : dsv4
82+ MODEL : ${{ inputs.model }}
83+ MODEL_PREFIX : ${{ inputs.model-prefix }}
7384 PRECISION : fp4
7485 FRAMEWORK : vllm
75- EXP_NAME : dsv4_speedbench
86+ EXP_NAME : ${{ inputs.model-prefix }}_speedbench
7687 IMAGE : ${{ inputs.image }}
7788 TP : ' 8'
7889 EP_SIZE : ' 1'
7990 DP_ATTENTION : ' false'
8091 SPEC_DECODING : mtp
8192 # Run the AL-matrix collector instead of the auto-selected throughput script.
82- BENCH_SCRIPT_OVERRIDE : benchmarks/single_node/dsv4_fp4_b300_vllm_speedbench_matrix .sh
93+ BENCH_SCRIPT_OVERRIDE : benchmarks/single_node/speedbench/${{ inputs.model-prefix }}_fp4_b300_vllm .sh
8394 SALLOC_TIME_LIMIT : ${{ inputs.salloc-time }}
8495 # Matrix-collector tunables (propagated into the container via srun --export=ALL).
8596 MTP_LIST : ${{ inputs.mtp-list }}
@@ -158,7 +169,7 @@ jobs:
158169 if : always()
159170 uses : actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
160171 with :
161- name : speedbench-reference-al
172+ name : speedbench-reference-al-${{ inputs.model-prefix }}
162173 path : speedbench-reference-al.yaml
163174 if-no-files-found : warn
164175
@@ -168,9 +179,12 @@ jobs:
168179 GH_TOKEN : ${{ secrets.REPO_PAT }}
169180 run : |
170181 set -euo pipefail
182+ # NOTE: the reference yaml is keyed by model at the top level. This
183+ # overwrites it with the current model's matrix; when more than one
184+ # model is collected, replace this cp with a per-model-key YAML merge.
171185 cp speedbench-reference-al.yaml benchmarks/speedbench-reference-al.yaml
172186
173- BRANCH="speedbench-al/auto-${{ github.run_id }}"
187+ BRANCH="speedbench-al/${{ inputs.model-prefix }}- auto-${{ github.run_id }}"
174188 git config user.name "github-actions"
175189 git config user.email "github-actions@github.com"
176190 git checkout -b "$BRANCH"
@@ -179,19 +193,19 @@ jobs:
179193 echo "No change in reference yaml; skipping PR."
180194 exit 0
181195 fi
182- git commit -m "Update SpeedBench AL reference matrix (auto, run ${{ github.run_id }})"
196+ git commit -m "Update SpeedBench AL reference matrix for ${{ inputs.model }} (auto, run ${{ github.run_id }})"
183197 git push -u origin "$BRANCH"
184198 gh pr create \
185- --title "Update SpeedBench AL reference matrix (auto)" \
186- --body "Auto-generated by the SpeedBench AL Collection workflow (run ${{ github.run_id }}). Category : \`${{ inputs.category }}\`, MTP: \`${{ inputs.mtp-list }}\`, thinking: \`${{ inputs.thinking-modes }}\`, output_len: \`${{ inputs.output-len }}\`. Please review the measured values before merging." \
199+ --title "Update SpeedBench AL reference matrix for ${{ inputs.model-prefix }} (auto)" \
200+ --body "Auto-generated by the SpeedBench AL Collection workflow (run ${{ github.run_id }}). Model: \`${{ inputs.model }}\`, category : \`${{ inputs.category }}\`, MTP: \`${{ inputs.mtp-list }}\`, thinking: \`${{ inputs.thinking-modes }}\`, output_len: \`${{ inputs.output-len }}\`. Please review the measured values before merging." \
187201 --base main \
188202 --head "$BRANCH"
189203
190204 - name : Upload server logs
191205 if : always()
192206 uses : actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
193207 with :
194- name : speedbench_server_logs
208+ name : speedbench_server_logs-${{ inputs.model-prefix }}
195209 path : speedbench_results/server_*.log
196210 if-no-files-found : ignore
197211
0 commit comments