Skip to content

Commit 0316b19

Browse files
[NV] Add MiniMax-M2.5 FP4 B200 Dynamo vLLM recipes (#1643)
* Add MiniMax-M2.5 FP4 B200 Dynamo vLLM recipes * Update B200 MiniMax changelog PR link * Fix B200 MiniMax Slurm account defaults --------- Co-authored-by: functionstackx <47992694+functionstackx@users.noreply.github.com>
1 parent 92c0fcb commit 0316b19

20 files changed

Lines changed: 1445 additions & 2 deletions

.github/configs/nvidia-master.yaml

Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10293,3 +10293,242 @@ minimaxm2.5-fp4-gb200-dynamo-vllm:
1029310293
tp: 4
1029410294
ep: 4
1029510295
dp-attn: true
10296+
10297+
minimaxm2.5-fp4-b200-dynamo-vllm:
10298+
image: vllm/vllm-openai:v0.20.1
10299+
model: nvidia/MiniMax-M2.5-NVFP4
10300+
model-prefix: minimaxm2.5
10301+
runner: b200-multinode
10302+
precision: fp4
10303+
framework: dynamo-vllm
10304+
multinode: true
10305+
disagg: true
10306+
scenarios:
10307+
fixed-seq-len:
10308+
- isl: 1024
10309+
osl: 1024
10310+
search-space:
10311+
- conc-list: [16]
10312+
prefill:
10313+
num-worker: 1
10314+
tp: 1
10315+
ep: 1
10316+
dp-attn: false
10317+
additional-settings:
10318+
- "CONFIG_FILE=recipes/vllm/minimax-m2.5-b200-fp4/1k1k/tp4-1p1d.yaml"
10319+
decode:
10320+
num-worker: 1
10321+
tp: 4
10322+
ep: 1
10323+
dp-attn: false
10324+
- conc-list: [4, 8, 16, 32, 64]
10325+
prefill:
10326+
num-worker: 1
10327+
tp: 1
10328+
ep: 1
10329+
dp-attn: false
10330+
additional-settings:
10331+
- "CONFIG_FILE=recipes/vllm/minimax-m2.5-b200-fp4/1k1k/tp4-1p2d.yaml"
10332+
decode:
10333+
num-worker: 2
10334+
tp: 4
10335+
ep: 1
10336+
dp-attn: false
10337+
- conc-list: [32, 64, 128]
10338+
prefill:
10339+
num-worker: 1
10340+
tp: 1
10341+
ep: 1
10342+
dp-attn: false
10343+
additional-settings:
10344+
- "CONFIG_FILE=recipes/vllm/minimax-m2.5-b200-fp4/1k1k/tp4ep-1p1d.yaml"
10345+
decode:
10346+
num-worker: 1
10347+
tp: 4
10348+
ep: 4
10349+
dp-attn: false
10350+
- conc-list: [256]
10351+
prefill:
10352+
num-worker: 1
10353+
tp: 1
10354+
ep: 1
10355+
dp-attn: false
10356+
additional-settings:
10357+
- "CONFIG_FILE=recipes/vllm/minimax-m2.5-b200-fp4/1k1k/tp4ep-1p1d-hi-conc.yaml"
10358+
decode:
10359+
num-worker: 1
10360+
tp: 4
10361+
ep: 4
10362+
dp-attn: false
10363+
- conc-list: [128, 256]
10364+
prefill:
10365+
num-worker: 1
10366+
tp: 1
10367+
ep: 1
10368+
dp-attn: false
10369+
additional-settings:
10370+
- "CONFIG_FILE=recipes/vllm/minimax-m2.5-b200-fp4/1k1k/tp4ep-1p2d.yaml"
10371+
decode:
10372+
num-worker: 2
10373+
tp: 4
10374+
ep: 4
10375+
dp-attn: false
10376+
- conc-list: [64, 128, 256, 512]
10377+
prefill:
10378+
num-worker: 1
10379+
tp: 1
10380+
ep: 1
10381+
dp-attn: false
10382+
additional-settings:
10383+
- "CONFIG_FILE=recipes/vllm/minimax-m2.5-b200-fp4/1k1k/tp4ep-1p3d.yaml"
10384+
decode:
10385+
num-worker: 3
10386+
tp: 4
10387+
ep: 4
10388+
dp-attn: false
10389+
- conc-list: [1024, 2048]
10390+
prefill:
10391+
num-worker: 1
10392+
tp: 1
10393+
ep: 1
10394+
dp-attn: false
10395+
additional-settings:
10396+
- "CONFIG_FILE=recipes/vllm/minimax-m2.5-b200-fp4/1k1k/tp4ep-1p3d-hi-conc.yaml"
10397+
decode:
10398+
num-worker: 3
10399+
tp: 4
10400+
ep: 4
10401+
dp-attn: false
10402+
- conc-list: [256, 1024]
10403+
prefill:
10404+
num-worker: 2
10405+
tp: 1
10406+
ep: 1
10407+
dp-attn: false
10408+
additional-settings:
10409+
- "CONFIG_FILE=recipes/vllm/minimax-m2.5-b200-fp4/1k1k/tp4ep-2p3d.yaml"
10410+
decode:
10411+
num-worker: 3
10412+
tp: 4
10413+
ep: 4
10414+
dp-attn: false
10415+
- conc-list: [2048, 4096, 8192]
10416+
prefill:
10417+
num-worker: 1
10418+
tp: 1
10419+
ep: 1
10420+
dp-attn: false
10421+
additional-settings:
10422+
- "CONFIG_FILE=recipes/vllm/minimax-m2.5-b200-fp4/1k1k/dep2-1p2d.yaml"
10423+
decode:
10424+
num-worker: 2
10425+
tp: 2
10426+
ep: 2
10427+
dp-attn: true
10428+
- conc-list: [4096]
10429+
prefill:
10430+
num-worker: 2
10431+
tp: 1
10432+
ep: 1
10433+
dp-attn: false
10434+
additional-settings:
10435+
- "CONFIG_FILE=recipes/vllm/minimax-m2.5-b200-fp4/1k1k/dep2-2p3d.yaml"
10436+
decode:
10437+
num-worker: 3
10438+
tp: 2
10439+
ep: 2
10440+
dp-attn: true
10441+
- conc-list: [6144]
10442+
prefill:
10443+
num-worker: 2
10444+
tp: 1
10445+
ep: 1
10446+
dp-attn: false
10447+
additional-settings:
10448+
- "CONFIG_FILE=recipes/vllm/minimax-m2.5-b200-fp4/1k1k/dep2-2p3d-c6144.yaml"
10449+
decode:
10450+
num-worker: 3
10451+
tp: 2
10452+
ep: 2
10453+
dp-attn: true
10454+
- conc-list: [1024, 2048]
10455+
prefill:
10456+
num-worker: 3
10457+
tp: 1
10458+
ep: 1
10459+
dp-attn: false
10460+
additional-settings:
10461+
- "CONFIG_FILE=recipes/vllm/minimax-m2.5-b200-fp4/1k1k/dep4-3p2d.yaml"
10462+
decode:
10463+
num-worker: 2
10464+
tp: 4
10465+
ep: 4
10466+
dp-attn: true
10467+
- conc-list: [1024, 2048, 4096]
10468+
prefill:
10469+
num-worker: 2
10470+
tp: 1
10471+
ep: 1
10472+
dp-attn: false
10473+
additional-settings:
10474+
- "CONFIG_FILE=recipes/vllm/minimax-m2.5-b200-fp4/1k1k/dep8-2p1d.yaml"
10475+
decode:
10476+
num-worker: 1
10477+
tp: 8
10478+
ep: 8
10479+
dp-attn: true
10480+
- isl: 8192
10481+
osl: 1024
10482+
search-space:
10483+
- conc-list: [4, 8, 16]
10484+
prefill:
10485+
num-worker: 1
10486+
tp: 1
10487+
ep: 1
10488+
dp-attn: false
10489+
additional-settings:
10490+
- "CONFIG_FILE=recipes/vllm/minimax-m2.5-b200-fp4/8k1k/tp4-1p1d.yaml"
10491+
decode:
10492+
num-worker: 1
10493+
tp: 4
10494+
ep: 1
10495+
dp-attn: false
10496+
- conc-list: [32, 64]
10497+
prefill:
10498+
num-worker: 1
10499+
tp: 1
10500+
ep: 1
10501+
dp-attn: false
10502+
additional-settings:
10503+
- "CONFIG_FILE=recipes/vllm/minimax-m2.5-b200-fp4/8k1k/tp4ep-1p1d.yaml"
10504+
decode:
10505+
num-worker: 1
10506+
tp: 4
10507+
ep: 4
10508+
dp-attn: false
10509+
- conc-list: [256, 512, 1024]
10510+
prefill:
10511+
num-worker: 1
10512+
tp: 1
10513+
ep: 1
10514+
dp-attn: false
10515+
additional-settings:
10516+
- "CONFIG_FILE=recipes/vllm/minimax-m2.5-b200-fp4/8k1k/tp4ep-1p1d-hi-conc.yaml"
10517+
decode:
10518+
num-worker: 1
10519+
tp: 4
10520+
ep: 4
10521+
dp-attn: false
10522+
- conc-list: [256, 512, 1024, 2048]
10523+
prefill:
10524+
num-worker: 2
10525+
tp: 1
10526+
ep: 1
10527+
dp-attn: false
10528+
additional-settings:
10529+
- "CONFIG_FILE=recipes/vllm/minimax-m2.5-b200-fp4/8k1k/dep4-2p1d.yaml"
10530+
decode:
10531+
num-worker: 1
10532+
tp: 4
10533+
ep: 4
10534+
dp-attn: true
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
name: "minimax-m2.5-vllm-disagg-b200-decode-2xdep2"
2+
3+
model:
4+
path: "minimax-m2.5-nvfp4"
5+
container: "vllm/vllm-openai:v0.20.1"
6+
precision: "fp4"
7+
8+
dynamo:
9+
install: true
10+
wheel: "1.2.0.dev20260526"
11+
12+
setup_script: install-deps.sh
13+
14+
resources:
15+
gpu_type: "b200"
16+
gpus_per_node: 8
17+
prefill_nodes: 1
18+
decode_nodes: 1
19+
prefill_workers: 1
20+
decode_workers: 2
21+
gpus_per_prefill: 1
22+
gpus_per_decode: 2
23+
24+
frontend:
25+
type: dynamo
26+
enable_multiple_frontends: false
27+
28+
backend:
29+
type: vllm
30+
connector: null
31+
32+
prefill_environment:
33+
VLLM_ENGINE_READY_TIMEOUT_S: "3600"
34+
VLLM_FLOAT32_MATMUL_PRECISION: "high"
35+
UCX_TLS: "cuda_copy,rc"
36+
37+
decode_environment:
38+
VLLM_ENGINE_READY_TIMEOUT_S: "3600"
39+
VLLM_FLOAT32_MATMUL_PRECISION: "high"
40+
UCX_TLS: "cuda_copy,rc"
41+
42+
vllm_config:
43+
prefill:
44+
kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
45+
kv-cache-dtype: "fp8"
46+
trust-remote-code: true
47+
no-enable-prefix-caching: true
48+
max-model-len: 2048
49+
max-cudagraph-capture-size: 2048
50+
max-num-batched-tokens: 2048
51+
stream-interval: 128
52+
53+
decode:
54+
kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
55+
kv-cache-dtype: "fp8"
56+
data-parallel-size: 2
57+
data-parallel-rpc-port: 13345
58+
enable-expert-parallel: true
59+
no-enable-prefix-caching: true
60+
max-model-len: 2048
61+
max-cudagraph-capture-size: 2048
62+
max-num-batched-tokens: 2048
63+
max-num-seqs: 864
64+
gpu-memory-utilization: 0.90
65+
stream-interval: 128
66+
67+
benchmark:
68+
type: "sa-bench"
69+
isl: 1024
70+
osl: 1024
71+
concurrencies: "2048x4096x8192"
72+
random_range_ratio: 0.8
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
name: "minimax-m2.5-vllm-disagg-b200-decode-2p3xdep2-c6144"
2+
3+
model:
4+
path: "minimax-m2.5-nvfp4"
5+
container: "vllm/vllm-openai:v0.20.1"
6+
precision: "fp4"
7+
8+
dynamo:
9+
install: true
10+
wheel: "1.2.0.dev20260526"
11+
12+
setup_script: install-deps.sh
13+
14+
resources:
15+
gpu_type: "b200"
16+
gpus_per_node: 8
17+
prefill_nodes: 1
18+
decode_nodes: 1
19+
prefill_workers: 2
20+
decode_workers: 3
21+
gpus_per_prefill: 1
22+
gpus_per_decode: 2
23+
24+
frontend:
25+
type: dynamo
26+
enable_multiple_frontends: false
27+
28+
backend:
29+
type: vllm
30+
connector: null
31+
32+
prefill_environment:
33+
VLLM_ENGINE_READY_TIMEOUT_S: "3600"
34+
VLLM_FLOAT32_MATMUL_PRECISION: "high"
35+
UCX_TLS: "cuda_copy,rc"
36+
37+
decode_environment:
38+
VLLM_ENGINE_READY_TIMEOUT_S: "3600"
39+
VLLM_FLOAT32_MATMUL_PRECISION: "high"
40+
UCX_TLS: "cuda_copy,rc"
41+
42+
vllm_config:
43+
prefill:
44+
kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
45+
kv-cache-dtype: "fp8"
46+
trust-remote-code: true
47+
no-enable-prefix-caching: true
48+
max-model-len: 2048
49+
max-cudagraph-capture-size: 2048
50+
max-num-batched-tokens: 2048
51+
stream-interval: 128
52+
53+
decode:
54+
kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
55+
kv-cache-dtype: "fp8"
56+
data-parallel-size: 2
57+
data-parallel-rpc-port: 13345
58+
enable-expert-parallel: true
59+
no-enable-prefix-caching: true
60+
max-model-len: 2048
61+
max-cudagraph-capture-size: 2048
62+
max-num-batched-tokens: 2048
63+
max-num-seqs: 864
64+
gpu-memory-utilization: 0.90
65+
stream-interval: 128
66+
67+
benchmark:
68+
type: "sa-bench"
69+
isl: 1024
70+
osl: 1024
71+
concurrencies: "6144"
72+
random_range_ratio: 0.8

0 commit comments

Comments
 (0)