Skip to content

Commit 36b4723

Browse files
author
pensieve-intern
committed
[OMNIML-4886] cell_t0_d3 — pensieve-intern agent draft
1 parent 5eba879 commit 36b4723

2 files changed

Lines changed: 65 additions & 0 deletions

File tree

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
sampling_kwargs:
2+
temperature: 0
3+
engine_args:
4+
max_model_len: 40960
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# SPEED-bench MTP speculative-decoding run for Qwen3.5-4B via vLLM.
2+
# Cell: t0_d3 (temperature=0, draft_length=3)
3+
4+
job_name: Qwen3.5-4B_specdec_bench_mtp_vllm_t0_d3
5+
6+
pipeline:
7+
global_vars:
8+
hf_model: /hf-local/Qwen/Qwen3.5-4B
9+
10+
task_0:
11+
script: common/specdec_bench/run.sh
12+
args:
13+
- --dataset speed
14+
- --dataset_path /hf-local/nvidia/SPEED-Bench-Internal/qualitative
15+
- --engine VLLM
16+
- --speculative_algorithm MTP
17+
- --draft_length 3
18+
- --runtime_params common/specdec_bench/_cells/qwen35_4b_mtp_vllm_t0_d3.yaml
19+
- --tp_size 2
20+
- --ep_size 1
21+
- --concurrency 32
22+
- --output_length 4096
23+
- --aa_timing
24+
- --show_progress
25+
- --save_dir /scratchspace/qwen35_4b_mtp_vllm_t0_d3/qualitative
26+
environment:
27+
- HF_MODEL_CKPT: <<global_vars.hf_model>>
28+
- HF_LOCAL: /hf-local
29+
slurm_config:
30+
_factory_: "slurm_factory"
31+
nodes: 1
32+
ntasks_per_node: 1
33+
gpus_per_node: 2
34+
container: vllm/vllm-openai:qwen3_5-cu130
35+
36+
task_1:
37+
script: common/specdec_bench/run.sh
38+
args:
39+
- --dataset speed
40+
- --dataset_path /hf-local/nvidia/SPEED-Bench-Internal/throughput_32k
41+
- --engine VLLM
42+
- --speculative_algorithm MTP
43+
- --draft_length 3
44+
- --runtime_params common/specdec_bench/_cells/qwen35_4b_mtp_vllm_t0_d3.yaml
45+
- --tp_size 2
46+
- --ep_size 1
47+
- --concurrency 8
48+
- --num_requests 80
49+
- --output_length 4096
50+
- --aa_timing
51+
- --show_progress
52+
- --save_dir /scratchspace/qwen35_4b_mtp_vllm_t0_d3/throughput_32k
53+
environment:
54+
- HF_MODEL_CKPT: <<global_vars.hf_model>>
55+
- HF_LOCAL: /hf-local
56+
slurm_config:
57+
_factory_: "slurm_factory"
58+
nodes: 1
59+
ntasks_per_node: 1
60+
gpus_per_node: 2
61+
container: vllm/vllm-openai:qwen3_5-cu130

0 commit comments

Comments
 (0)