Skip to content

Commit d595d49

Browse files
committed
speedbench-al: parameterize model + relocate collector script
Address review: - Model is now a workflow input (model + model-prefix, default deepseek-ai/DeepSeek-V4-Pro / dsv4). MODEL, MODEL_PREFIX, EXP_NAME, BENCH_SCRIPT_OVERRIDE, artifact names and the Create-PR branch/title/body are all derived from those inputs. The emitted YAML top-level key is now derived from the model (MODEL_KEY, defaults to the model basename lowercased). - Move the collector to benchmarks/single_node/speedbench/dsv4_fp4_b300_vllm.sh and fix its benchmark_lib.sh source path (../ -> ../../) for the deeper dir.
1 parent bab431d commit d595d49

2 files changed

Lines changed: 38 additions & 21 deletions

File tree

.github/workflows/speedbench-al.yml

Lines changed: 30 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
name: SpeedBench AL Collection
22

3-
# Push-button (workflow_dispatch) collection of the DeepSeek-V4-Pro SPEED-Bench
4-
# acceptance-length (AL) matrix: thinking_on/off x MTP levels. Produces the
5-
# golden reference consumed by the synthetic-acceptance framework and (optionally)
6-
# opens a PR updating benchmarks/speedbench-reference-al.yaml.
3+
# Push-button (workflow_dispatch) collection of a SPEED-Bench acceptance-length
4+
# (AL) matrix: thinking_on/off x MTP levels, for the given model (defaults to
5+
# DeepSeek-V4-Pro). Produces the golden reference consumed by the
6+
# synthetic-acceptance framework and (optionally) opens a PR updating
7+
# benchmarks/speedbench-reference-al.yaml.
78

89
on:
910
workflow_dispatch:
@@ -13,6 +14,16 @@ on:
1314
required: false
1415
type: string
1516
default: 'b300'
17+
model:
18+
description: "HF model id (basename must be in launcher STAGED_MODELS for pre-staged local weights)"
19+
required: false
20+
type: string
21+
default: 'deepseek-ai/DeepSeek-V4-Pro'
22+
model-prefix:
23+
description: "Model prefix; drives launcher MODEL_PATH resolution, exp name, collector script, and artifact names"
24+
required: false
25+
type: string
26+
default: 'dsv4'
1627
image:
1728
description: "vLLM container image"
1829
required: false
@@ -64,22 +75,22 @@ permissions:
6475
env:
6576
HF_TOKEN: ${{ secrets.HF_TOKEN }}
6677
HF_HUB_CACHE: '/mnt/hf_hub_cache/'
67-
# Drive the dsv4 single-node path in runners/launch_b300-nv.sh. MODEL is the
68-
# HF id; its basename (DeepSeek-V4-Pro) is in the launcher's STAGED_MODELS, so
78+
# Drive the single-node path in runners/launch_b300-nv.sh. MODEL is the HF id;
79+
# its basename (e.g. DeepSeek-V4-Pro) must be in the launcher's STAGED_MODELS so
6980
# the launcher resolves MODEL_PATH to the pre-staged local weights and mounts
7081
# them. The collector serves from MODEL_PATH (see SERVE_MODEL), so no download.
71-
MODEL: deepseek-ai/DeepSeek-V4-Pro
72-
MODEL_PREFIX: dsv4
82+
MODEL: ${{ inputs.model }}
83+
MODEL_PREFIX: ${{ inputs.model-prefix }}
7384
PRECISION: fp4
7485
FRAMEWORK: vllm
75-
EXP_NAME: dsv4_speedbench
86+
EXP_NAME: ${{ inputs.model-prefix }}_speedbench
7687
IMAGE: ${{ inputs.image }}
7788
TP: '8'
7889
EP_SIZE: '1'
7990
DP_ATTENTION: 'false'
8091
SPEC_DECODING: mtp
8192
# Run the AL-matrix collector instead of the auto-selected throughput script.
82-
BENCH_SCRIPT_OVERRIDE: benchmarks/single_node/dsv4_fp4_b300_vllm_speedbench_matrix.sh
93+
BENCH_SCRIPT_OVERRIDE: benchmarks/single_node/speedbench/${{ inputs.model-prefix }}_fp4_b300_vllm.sh
8394
SALLOC_TIME_LIMIT: ${{ inputs.salloc-time }}
8495
# Matrix-collector tunables (propagated into the container via srun --export=ALL).
8596
MTP_LIST: ${{ inputs.mtp-list }}
@@ -158,7 +169,7 @@ jobs:
158169
if: always()
159170
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
160171
with:
161-
name: speedbench-reference-al
172+
name: speedbench-reference-al-${{ inputs.model-prefix }}
162173
path: speedbench-reference-al.yaml
163174
if-no-files-found: warn
164175

@@ -168,9 +179,12 @@ jobs:
168179
GH_TOKEN: ${{ secrets.REPO_PAT }}
169180
run: |
170181
set -euo pipefail
182+
# NOTE: the reference yaml is keyed by model at the top level. This
183+
# overwrites it with the current model's matrix; when more than one
184+
# model is collected, replace this cp with a per-model-key YAML merge.
171185
cp speedbench-reference-al.yaml benchmarks/speedbench-reference-al.yaml
172186
173-
BRANCH="speedbench-al/auto-${{ github.run_id }}"
187+
BRANCH="speedbench-al/${{ inputs.model-prefix }}-auto-${{ github.run_id }}"
174188
git config user.name "github-actions"
175189
git config user.email "github-actions@github.com"
176190
git checkout -b "$BRANCH"
@@ -179,19 +193,19 @@ jobs:
179193
echo "No change in reference yaml; skipping PR."
180194
exit 0
181195
fi
182-
git commit -m "Update SpeedBench AL reference matrix (auto, run ${{ github.run_id }})"
196+
git commit -m "Update SpeedBench AL reference matrix for ${{ inputs.model }} (auto, run ${{ github.run_id }})"
183197
git push -u origin "$BRANCH"
184198
gh pr create \
185-
--title "Update SpeedBench AL reference matrix (auto)" \
186-
--body "Auto-generated by the SpeedBench AL Collection workflow (run ${{ github.run_id }}). Category: \`${{ inputs.category }}\`, MTP: \`${{ inputs.mtp-list }}\`, thinking: \`${{ inputs.thinking-modes }}\`, output_len: \`${{ inputs.output-len }}\`. Please review the measured values before merging." \
199+
--title "Update SpeedBench AL reference matrix for ${{ inputs.model-prefix }} (auto)" \
200+
--body "Auto-generated by the SpeedBench AL Collection workflow (run ${{ github.run_id }}). Model: \`${{ inputs.model }}\`, category: \`${{ inputs.category }}\`, MTP: \`${{ inputs.mtp-list }}\`, thinking: \`${{ inputs.thinking-modes }}\`, output_len: \`${{ inputs.output-len }}\`. Please review the measured values before merging." \
187201
--base main \
188202
--head "$BRANCH"
189203
190204
- name: Upload server logs
191205
if: always()
192206
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
193207
with:
194-
name: speedbench_server_logs
208+
name: speedbench_server_logs-${{ inputs.model-prefix }}
195209
path: speedbench_results/server_*.log
196210
if-no-files-found: ignore
197211

benchmarks/single_node/dsv4_fp4_b300_vllm_speedbench_matrix.sh renamed to benchmarks/single_node/speedbench/dsv4_fp4_b300_vllm.sh

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
#
1414
# Usage (inside the vLLM container, on a B300 node):
1515
# export MODEL=/data/models/dsv4-pro
16-
# bash benchmarks/single_node/dsv4_fp4_b300_vllm_speedbench_matrix.sh
16+
# bash benchmarks/single_node/speedbench/dsv4_fp4_b300_vllm.sh
1717
#
1818
# Tunables (env):
1919
# MTP_LIST space-separated MTP levels (default "1 2 3 4 5 6 7 8")
@@ -23,7 +23,7 @@
2323
# OUT_YAML output matrix path (default $RESULTS_DIR/speedbench-reference-al.yaml)
2424

2525
set -uo pipefail
26-
source "$(dirname "$0")/../benchmark_lib.sh"
26+
source "$(dirname "$0")/../../benchmark_lib.sh"
2727

2828
MODEL="${MODEL:?MODEL env var required (e.g. /data/models/dsv4-pro)}"
2929
# Serve from the local weights dir resolved by the launcher (MODEL_PATH points
@@ -39,6 +39,9 @@ PORT="${PORT:-8888}"
3939
MTP_LIST="${MTP_LIST:-1 2 3 4 5 6 7 8}"
4040
THINKING_MODES="${THINKING_MODES:-off on}"
4141
CATEGORY="${CATEGORY:-coding}"
42+
# Top-level key in the emitted YAML matrix. Derived from the model by the
43+
# workflow (e.g. deepseek-v4-pro); falls back to the model basename, lowercased.
44+
MODEL_KEY="${MODEL_KEY:-$(basename "$SERVE_MODEL" | tr '[:upper:]' '[:lower:]')}"
4245
SPEEDBENCH_OUTPUT_LEN="${SPEEDBENCH_OUTPUT_LEN:-4096}"
4346
CONCURRENCY="${CONCURRENCY:-1}"
4447
TEMPERATURE="${TEMPERATURE:-1.0}"
@@ -315,11 +318,11 @@ emit_mode_block() {
315318
echo "# Acceptance Length (AL) reference values measured with SPEED-Bench."
316319
echo "# dataset: $CATEGORY | temperature: $TEMPERATURE | output_len: $SPEEDBENCH_OUTPUT_LEN"
317320
echo "# thinking_on chat_template_kwargs: $CHAT_TEMPLATE_KWARGS_ON"
318-
echo "# Measured on DeepSeek-V4-Pro (B300, vLLM MTP), per num_speculative_tokens."
319-
echo "# Auto-generated by dsv4_fp4_b300_vllm_speedbench_matrix.sh (speedbench-al.yml)."
321+
echo "# Measured on $MODEL_KEY (B300, vLLM MTP), per num_speculative_tokens."
322+
echo "# Auto-generated by benchmarks/single_node/speedbench/dsv4_fp4_b300_vllm.sh (speedbench-al.yml)."
320323
echo "#"
321324
echo "# key = num_speculative_tokens (MTP level); value = golden AL"
322-
echo "deepseek-v4-pro:"
325+
echo "${MODEL_KEY}:"
323326
if [[ " $THINKING_MODES " == *" on "* ]]; then
324327
echo " thinking_on:"
325328
emit_mode_block on

0 commit comments

Comments
 (0)