File tree Expand file tree Collapse file tree
common/specdec_bench/_cells Expand file tree Collapse file tree Original file line number Diff line number Diff line change 1+ sampling_kwargs :
2+ temperature : 0
3+ engine_args :
4+ max_model_len : 40960
Original file line number Diff line number Diff line change 1+ # SPEED-bench MTP speculative-decoding run for Qwen3.5-4B via vLLM.
2+ # Cell t0_d3: temperature=0, draft_length=3.
3+ # Slurm run on cw_dfw:
4+ # uv run slurm.py --yaml modules/Model-Optimizer/tools/launcher/examples/Qwen/Qwen3.5-4B/specdec_bench_mtp_vllm_t0_d3.yaml --yes detach=true
5+
6+ job_name : Qwen3.5-4B_specdec_bench_mtp_vllm_t0_d3
7+
8+ pipeline :
9+ global_vars :
10+ hf_model : /hf-local/Qwen/Qwen3.5-4B
11+
12+ task_0 :
13+ script : common/specdec_bench/run.sh
14+ args :
15+ - --dataset speed
16+ - --dataset_path /hf-local/nvidia/SPEED-Bench-Internal/qualitative
17+ - --engine VLLM
18+ - --speculative_algorithm MTP
19+ - --draft_length 3
20+ - --runtime_params common/specdec_bench/_cells/qwen35_4b_mtp_vllm_t0_d3.yaml
21+ - --tp_size 2
22+ - --ep_size 1
23+ - --concurrency 32
24+ - --output_length 4096
25+ - --aa_timing
26+ - --show_progress
27+ - --save_dir /scratchspace/qwen35_4b_mtp_vllm_t0_d3/qualitative
28+ environment :
29+ - HF_MODEL_CKPT : <<global_vars.hf_model>>
30+ - HF_LOCAL : /hf-local
31+ slurm_config :
32+ _factory_ : " slurm_factory"
33+ nodes : 1
34+ ntasks_per_node : 1
35+ gpus_per_node : 2
36+ container : vllm/vllm-openai:qwen3_5-cu130
37+
38+ task_1 :
39+ script : common/specdec_bench/run.sh
40+ args :
41+ - --dataset speed
42+ - --dataset_path /hf-local/nvidia/SPEED-Bench-Internal/throughput_32k
43+ - --engine VLLM
44+ - --speculative_algorithm MTP
45+ - --draft_length 3
46+ - --runtime_params common/specdec_bench/_cells/qwen35_4b_mtp_vllm_t0_d3.yaml
47+ - --tp_size 2
48+ - --ep_size 1
49+ - --concurrency 8
50+ - --num_requests 80
51+ - --output_length 4096
52+ - --aa_timing
53+ - --show_progress
54+ - --save_dir /scratchspace/qwen35_4b_mtp_vllm_t0_d3/throughput_32k
55+ environment :
56+ - HF_MODEL_CKPT : <<global_vars.hf_model>>
57+ - HF_LOCAL : /hf-local
58+ slurm_config :
59+ _factory_ : " slurm_factory"
60+ nodes : 1
61+ ntasks_per_node : 1
62+ gpus_per_node : 2
63+ container : vllm/vllm-openai:qwen3_5-cu130
You can’t perform that action at this time.
0 commit comments