Skip to content

Commit 4d57f43

Browse files
[NV] Add MiniMax-M2.5 FP8 GB300 Dynamo vLLM recipes (#1647)
* Add MiniMax-M2.5 FP8 GB300 Dynamo vLLM recipes * Update GB300 FP8 MiniMax changelog PR link * Fix GB300 eval artifact copy * Handle existing GB300 eval artifacts * Do not fail GB300 eval artifact copy
1 parent eb8350e commit 4d57f43

18 files changed

Lines changed: 1316 additions & 4 deletions

.github/configs/nvidia-master.yaml

Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10919,3 +10919,216 @@ minimaxm2.5-fp4-b300-dynamo-vllm:
1091910919
tp: 8
1092010920
ep: 8
1092110921
dp-attn: true
10922+
10923+
minimaxm2.5-fp8-gb300-dynamo-vllm:
10924+
image: vllm/vllm-openai:v0.20.1
10925+
model: MiniMaxAI/MiniMax-M2.5
10926+
model-prefix: minimaxm2.5
10927+
runner: gb300-nv
10928+
precision: fp8
10929+
framework: dynamo-vllm
10930+
multinode: true
10931+
disagg: true
10932+
scenarios:
10933+
fixed-seq-len:
10934+
- isl: 1024
10935+
osl: 1024
10936+
search-space:
10937+
- conc-list: [8, 16, 32, 64, 128]
10938+
prefill:
10939+
num-worker: 1
10940+
tp: 1
10941+
ep: 1
10942+
dp-attn: false
10943+
additional-settings:
10944+
- "CONFIG_FILE=recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-1p1d-tp4.yaml"
10945+
decode:
10946+
num-worker: 1
10947+
tp: 4
10948+
ep: 1
10949+
dp-attn: false
10950+
- conc-list: [32, 64, 128, 256, 512]
10951+
prefill:
10952+
num-worker: 1
10953+
tp: 1
10954+
ep: 1
10955+
dp-attn: false
10956+
additional-settings:
10957+
- "CONFIG_FILE=recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-1p2d-tp4.yaml"
10958+
decode:
10959+
num-worker: 2
10960+
tp: 4
10961+
ep: 1
10962+
dp-attn: false
10963+
- conc-list: [256, 512, 1024]
10964+
prefill:
10965+
num-worker: 1
10966+
tp: 1
10967+
ep: 1
10968+
dp-attn: false
10969+
additional-settings:
10970+
- "CONFIG_FILE=recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-1p2d-tp4ep.yaml"
10971+
decode:
10972+
num-worker: 2
10973+
tp: 4
10974+
ep: 4
10975+
dp-attn: false
10976+
- conc-list: [256, 512, 1024]
10977+
prefill:
10978+
num-worker: 2
10979+
tp: 1
10980+
ep: 1
10981+
dp-attn: false
10982+
additional-settings:
10983+
- "CONFIG_FILE=recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-2p1d-dep8.yaml"
10984+
decode:
10985+
num-worker: 1
10986+
tp: 8
10987+
ep: 8
10988+
dp-attn: true
10989+
- conc-list: [512, 1024, 2048]
10990+
prefill:
10991+
num-worker: 2
10992+
tp: 1
10993+
ep: 1
10994+
dp-attn: false
10995+
additional-settings:
10996+
- "CONFIG_FILE=recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-2p2d-dep4.yaml"
10997+
decode:
10998+
num-worker: 2
10999+
tp: 4
11000+
ep: 4
11001+
dp-attn: true
11002+
- conc-list: [4096, 8192]
11003+
prefill:
11004+
num-worker: 2
11005+
tp: 1
11006+
ep: 1
11007+
dp-attn: false
11008+
additional-settings:
11009+
- "CONFIG_FILE=recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-2p2d-dep4-hi-conc.yaml"
11010+
decode:
11011+
num-worker: 2
11012+
tp: 4
11013+
ep: 4
11014+
dp-attn: true
11015+
- conc-list: [1024]
11016+
prefill:
11017+
num-worker: 2
11018+
tp: 1
11019+
ep: 1
11020+
dp-attn: false
11021+
additional-settings:
11022+
- "CONFIG_FILE=recipes/vllm/minimax-m2.5-fp8/1k1k/disagg-gb300-2p3d-dep2.yaml"
11023+
decode:
11024+
num-worker: 3
11025+
tp: 2
11026+
ep: 2
11027+
dp-attn: true
11028+
- isl: 8192
11029+
osl: 1024
11030+
search-space:
11031+
- conc-list: [16, 64, 128]
11032+
prefill:
11033+
num-worker: 1
11034+
tp: 1
11035+
ep: 1
11036+
dp-attn: false
11037+
additional-settings:
11038+
- "CONFIG_FILE=recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-1p1d-tp4ep.yaml"
11039+
decode:
11040+
num-worker: 1
11041+
tp: 4
11042+
ep: 4
11043+
dp-attn: false
11044+
- conc-list: [256, 512]
11045+
prefill:
11046+
num-worker: 1
11047+
tp: 1
11048+
ep: 1
11049+
dp-attn: false
11050+
additional-settings:
11051+
- "CONFIG_FILE=recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-1p1d-tp4ep-hi-conc.yaml"
11052+
decode:
11053+
num-worker: 1
11054+
tp: 4
11055+
ep: 4
11056+
dp-attn: false
11057+
- conc-list: [32]
11058+
prefill:
11059+
num-worker: 2
11060+
tp: 1
11061+
ep: 1
11062+
dp-attn: false
11063+
additional-settings:
11064+
- "CONFIG_FILE=recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-2p1d-tp2.yaml"
11065+
decode:
11066+
num-worker: 1
11067+
tp: 2
11068+
ep: 1
11069+
dp-attn: false
11070+
- conc-list: [64, 128, 256, 512]
11071+
prefill:
11072+
num-worker: 2
11073+
tp: 1
11074+
ep: 1
11075+
dp-attn: false
11076+
additional-settings:
11077+
- "CONFIG_FILE=recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-2p1d-tp4ep.yaml"
11078+
decode:
11079+
num-worker: 1
11080+
tp: 4
11081+
ep: 4
11082+
dp-attn: false
11083+
- conc-list: [64]
11084+
prefill:
11085+
num-worker: 3
11086+
tp: 1
11087+
ep: 1
11088+
dp-attn: false
11089+
additional-settings:
11090+
- "CONFIG_FILE=recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-3p1d-tp4.yaml"
11091+
decode:
11092+
num-worker: 1
11093+
tp: 4
11094+
ep: 1
11095+
dp-attn: false
11096+
- conc-list: [256, 512]
11097+
prefill:
11098+
num-worker: 3
11099+
tp: 1
11100+
ep: 1
11101+
dp-attn: false
11102+
additional-settings:
11103+
- "CONFIG_FILE=recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-3p1d-dep4.yaml"
11104+
decode:
11105+
num-worker: 1
11106+
tp: 4
11107+
ep: 4
11108+
dp-attn: true
11109+
- conc-list: [1024, 2048]
11110+
prefill:
11111+
num-worker: 3
11112+
tp: 1
11113+
ep: 1
11114+
dp-attn: false
11115+
additional-settings:
11116+
- "CONFIG_FILE=recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-3p1d-dep4-hi-conc.yaml"
11117+
decode:
11118+
num-worker: 1
11119+
tp: 4
11120+
ep: 4
11121+
dp-attn: true
11122+
- conc-list: [512, 1024, 2048]
11123+
prefill:
11124+
num-worker: 5
11125+
tp: 1
11126+
ep: 1
11127+
dp-attn: false
11128+
additional-settings:
11129+
- "CONFIG_FILE=recipes/vllm/minimax-m2.5-fp8/8k1k/disagg-gb300-5p2d-dep4.yaml"
11130+
decode:
11131+
num-worker: 2
11132+
tp: 4
11133+
ep: 4
11134+
dp-attn: true
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
name: "minimax-m2.5-vllm-disagg-gb300-1p1d-tp4"
2+
3+
model:
4+
path: "minimax-m2.5-fp8"
5+
container: "vllm/vllm-openai:v0.20.1"
6+
precision: "fp8"
7+
8+
dynamo:
9+
install: true
10+
wheel: "1.2.0.dev20260526"
11+
12+
setup_script: install-deps.sh
13+
14+
resources:
15+
gpu_type: "gb300"
16+
gpus_per_node: 4
17+
prefill_nodes: 1
18+
decode_nodes: 1
19+
prefill_workers: 1
20+
decode_workers: 1
21+
gpus_per_prefill: 1
22+
gpus_per_decode: 4
23+
24+
frontend:
25+
type: dynamo
26+
enable_multiple_frontends: false
27+
28+
backend:
29+
type: vllm
30+
connector: null
31+
32+
prefill_environment:
33+
VLLM_ENGINE_READY_TIMEOUT_S: "3600"
34+
VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
35+
36+
decode_environment:
37+
VLLM_ENGINE_READY_TIMEOUT_S: "3600"
38+
VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
39+
40+
vllm_config:
41+
prefill:
42+
kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
43+
kv-cache-dtype: "fp8"
44+
tensor-parallel-size: 1
45+
pipeline-parallel-size: 1
46+
safetensors-load-strategy: "prefetch"
47+
trust-remote-code: true
48+
no-enable-prefix-caching: true
49+
stream-interval: 32
50+
51+
decode:
52+
kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
53+
kv-cache-dtype: "fp8"
54+
tensor-parallel-size: 4
55+
safetensors-load-strategy: "prefetch"
56+
trust-remote-code: true
57+
no-enable-prefix-caching: true
58+
stream-interval: 32
59+
60+
benchmark:
61+
type: "sa-bench"
62+
isl: 1024
63+
osl: 1024
64+
concurrencies: "8x16x32x64x128"
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
name: "minimax-m2.5-vllm-disagg-gb300-1p2d-tp4"
2+
3+
model:
4+
path: "minimax-m2.5-fp8"
5+
container: "vllm/vllm-openai:v0.20.1"
6+
precision: "fp8"
7+
8+
dynamo:
9+
install: true
10+
wheel: "1.2.0.dev20260526"
11+
12+
13+
setup_script: install-deps.sh
14+
15+
resources:
16+
gpu_type: "gb300"
17+
gpus_per_node: 4
18+
prefill_nodes: 1
19+
decode_nodes: 2
20+
prefill_workers: 1
21+
decode_workers: 2
22+
gpus_per_prefill: 1
23+
gpus_per_decode: 4
24+
25+
frontend:
26+
type: dynamo
27+
enable_multiple_frontends: false
28+
29+
backend:
30+
type: vllm
31+
connector: null
32+
33+
prefill_environment:
34+
VLLM_ENGINE_READY_TIMEOUT_S: "3600"
35+
VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
36+
37+
decode_environment:
38+
VLLM_ENGINE_READY_TIMEOUT_S: "3600"
39+
VLLM_FLASHINFER_ALLREDUCE_BACKEND: "mnnvl"
40+
41+
vllm_config:
42+
prefill:
43+
kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
44+
kv-cache-dtype: "fp8"
45+
tensor-parallel-size: 1
46+
pipeline-parallel-size: 1
47+
safetensors-load-strategy: "prefetch"
48+
trust-remote-code: true
49+
no-enable-prefix-caching: true
50+
stream-interval: 32
51+
52+
decode:
53+
kv-transfer-config: '{"kv_connector": "NixlConnector", "kv_role": "kv_both"}'
54+
kv-cache-dtype: "fp8"
55+
tensor-parallel-size: 4
56+
safetensors-load-strategy: "prefetch"
57+
trust-remote-code: true
58+
no-enable-prefix-caching: true
59+
stream-interval: 32
60+
61+
benchmark:
62+
type: "sa-bench"
63+
isl: 1024
64+
osl: 1024
65+
concurrencies: "32x64x128x256x512"
66+
# warmup_prompts: 1
67+
# use_chat_template: false
68+
# req_rate: "inf"
69+
# random_range_ratio: 1.0

0 commit comments

Comments
 (0)