Skip to content

Commit f031d76

Browse files
author
pensieve-intern
committed
[OMNIML-4886] cell_t0_d3 — pensieve-intern agent draft
1 parent 5eba879 commit f031d76

2 files changed

Lines changed: 62 additions & 0 deletions

File tree

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
sampling_kwargs:
2+
temperature: 0
3+
engine_args:
4+
max_model_len: 40960
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
job_name: Qwen3.5-4B_specdec_bench_mtp_vllm_t0_d3
2+
3+
pipeline:
4+
global_vars:
5+
hf_model: /hf-local/Qwen/Qwen3.5-4B
6+
7+
task_0:
8+
script: common/specdec_bench/run.sh
9+
args:
10+
- --dataset speed
11+
- --dataset_path /hf-local/nvidia/SPEED-Bench-Internal/qualitative
12+
- --engine VLLM
13+
- --speculative_algorithm MTP
14+
- --draft_length 3
15+
- --runtime_params common/specdec_bench/_cells/qwen35_4b_mtp_vllm_t0_d3.yaml
16+
- --tp_size 2
17+
- --ep_size 1
18+
- --concurrency 32
19+
- --output_length 4096
20+
- --aa_timing
21+
- --show_progress
22+
- --save_dir /scratchspace/qwen35_4b_mtp_vllm_t0_d3/qualitative
23+
environment:
24+
- HF_MODEL_CKPT: <<global_vars.hf_model>>
25+
- HF_LOCAL: /hf-local
26+
slurm_config:
27+
_factory_: "slurm_factory"
28+
nodes: 1
29+
ntasks_per_node: 1
30+
gpus_per_node: 2
31+
container: vllm/vllm-openai:qwen3_5-cu130
32+
33+
task_1:
34+
script: common/specdec_bench/run.sh
35+
args:
36+
- --dataset speed
37+
- --dataset_path /hf-local/nvidia/SPEED-Bench-Internal/throughput_32k
38+
- --engine VLLM
39+
- --speculative_algorithm MTP
40+
- --draft_length 3
41+
- --runtime_params common/specdec_bench/_cells/qwen35_4b_mtp_vllm_t0_d3.yaml
42+
- --tp_size 2
43+
- --ep_size 1
44+
- --concurrency 8
45+
- --output_length 4096
46+
- --num_requests 80
47+
- --aa_timing
48+
- --show_progress
49+
- --save_dir /scratchspace/qwen35_4b_mtp_vllm_t0_d3/throughput_32k
50+
environment:
51+
- HF_MODEL_CKPT: <<global_vars.hf_model>>
52+
- HF_LOCAL: /hf-local
53+
slurm_config:
54+
_factory_: "slurm_factory"
55+
nodes: 1
56+
ntasks_per_node: 1
57+
gpus_per_node: 2
58+
container: vllm/vllm-openai:qwen3_5-cu130

0 commit comments

Comments
 (0)