Skip to content

Commit c183bc8

Browse files
committed
Resolve B300 DSV4 SGLang PR conflicts
2 parents a947d18 + d4948f9 commit c183bc8

19 files changed

Lines changed: 2458 additions & 41 deletions

.github/configs/nvidia-master.yaml

Lines changed: 217 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1682,13 +1682,13 @@ dsr1-fp4-b200-sglang:
16821682
- isl: 1024
16831683
osl: 1024
16841684
search-space:
1685-
- { tp: 4, ep: 4, conc-start: 4, conc-end: 128 }
1686-
- { tp: 8, ep: 8, conc-start: 4, conc-end: 128 }
1685+
- { tp: 4, ep: 4, conc-start: 1, conc-end: 128 }
1686+
- { tp: 8, ep: 8, conc-start: 1, conc-end: 128 }
16871687
- isl: 8192
16881688
osl: 1024
16891689
search-space:
1690-
- { tp: 4, ep: 4, conc-start: 4, conc-end: 128 }
1691-
- { tp: 8, ep: 8, conc-start: 4, conc-end: 16 }
1690+
- { tp: 4, ep: 4, conc-start: 1, conc-end: 128 }
1691+
- { tp: 8, ep: 8, conc-start: 1, conc-end: 16 }
16921692
# agentic-coding: temporarily disabled — blocked by e2e-tests.yml artifact
16931693
# name mismatch (downloads `agentic_*` but benchmark-tmpl.yml uploads as
16941694
# `bmk_agentic_*`). Re-enable once that workflow is aligned.
@@ -1883,13 +1883,13 @@ dsr1-fp4-b300-sglang:
18831883
- isl: 1024
18841884
osl: 1024
18851885
search-space:
1886-
- { tp: 4, ep: 4, conc-start: 4, conc-end: 128 }
1887-
- { tp: 8, ep: 8, conc-start: 4, conc-end: 128 }
1886+
- { tp: 4, ep: 4, conc-start: 1, conc-end: 128 }
1887+
- { tp: 8, ep: 8, conc-start: 1, conc-end: 128 }
18881888
- isl: 8192
18891889
osl: 1024
18901890
search-space:
1891-
- { tp: 4, ep: 4, conc-start: 4, conc-end: 128 }
1892-
- { tp: 8, ep: 8, conc-start: 4, conc-end: 16 }
1891+
- { tp: 4, ep: 4, conc-start: 1, conc-end: 128 }
1892+
- { tp: 8, ep: 8, conc-start: 1, conc-end: 16 }
18931893

18941894
dsr1-fp4-b200-trt:
18951895
image: nvcr.io#nvidia/tensorrt-llm/release:1.3.0rc14
@@ -1969,12 +1969,12 @@ dsr1-fp8-b200-sglang:
19691969
- isl: 1024
19701970
osl: 1024
19711971
search-space:
1972-
- { tp: 8, ep: 1, conc-start: 4, conc-end: 64 }
1972+
- { tp: 8, ep: 1, conc-start: 1, conc-end: 64 }
19731973
- isl: 8192
19741974
osl: 1024
19751975
search-space:
1976-
- { tp: 8, ep: 1, conc-start: 4, conc-end: 4 }
1977-
- { tp: 4, ep: 1, conc-start: 4, conc-end: 32 }
1976+
- { tp: 8, ep: 1, conc-start: 1, conc-end: 4 }
1977+
- { tp: 4, ep: 1, conc-start: 1, conc-end: 32 }
19781978

19791979
# NOTE: At the time of submission, https://cookbook.sglang.io/autoregressive/DeepSeek/DeepSeek-R1
19801980
# does not have a B300-specific recipe, so this config reuses the existing DSR1 FP8
@@ -1992,12 +1992,12 @@ dsr1-fp8-b300-sglang:
19921992
- isl: 1024
19931993
osl: 1024
19941994
search-space:
1995-
- { tp: 8, ep: 1, conc-start: 4, conc-end: 64 }
1995+
- { tp: 8, ep: 1, conc-start: 1, conc-end: 64 }
19961996
- isl: 8192
19971997
osl: 1024
19981998
search-space:
1999-
- { tp: 8, ep: 1, conc-start: 4, conc-end: 4 }
2000-
- { tp: 4, ep: 1, conc-start: 4, conc-end: 32 }
1999+
- { tp: 8, ep: 1, conc-start: 1, conc-end: 4 }
2000+
- { tp: 4, ep: 1, conc-start: 1, conc-end: 32 }
20012001

20022002
# DeepSeek-V4-Pro on B300 with SGLang (non-MTP). This follows the 8k/1k
20032003
# submission frontier from the 2026-05-19 Pareto HTML:
@@ -2725,11 +2725,11 @@ dsr1-fp8-b200-sglang-mtp:
27252725
- isl: 1024
27262726
osl: 1024
27272727
search-space:
2728-
- { tp: 8, ep: 1, conc-start: 4, conc-end: 512, spec-decoding: mtp }
2728+
- { tp: 8, ep: 1, conc-start: 1, conc-end: 512, spec-decoding: mtp }
27292729
- isl: 8192
27302730
osl: 1024
27312731
search-space:
2732-
- { tp: 8, ep: 1, conc-start: 4, conc-end: 512, spec-decoding: mtp }
2732+
- { tp: 8, ep: 1, conc-start: 1, conc-end: 512, spec-decoding: mtp }
27332733

27342734
# NOTE: At the time of submission, https://cookbook.sglang.io/autoregressive/DeepSeek/DeepSeek-R1
27352735
# does not have a B300-specific recipe, so this config reuses the existing DSR1 FP8
@@ -2748,11 +2748,11 @@ dsr1-fp8-b300-sglang-mtp:
27482748
- isl: 1024
27492749
osl: 1024
27502750
search-space:
2751-
- { tp: 8, ep: 1, conc-start: 4, conc-end: 512, spec-decoding: mtp }
2751+
- { tp: 8, ep: 1, conc-start: 1, conc-end: 512, spec-decoding: mtp }
27522752
- isl: 8192
27532753
osl: 1024
27542754
search-space:
2755-
- { tp: 8, ep: 1, conc-start: 4, conc-end: 512, spec-decoding: mtp }
2755+
- { tp: 8, ep: 1, conc-start: 1, conc-end: 512, spec-decoding: mtp }
27562756

27572757
# Diverged from kimik2.5-fp4-b300-vllm (agentic-coding sibling). Reasons below;
27582758
# the original kimik2.5-fp4-b300-vllm entry is left identical to origin/main so
@@ -4503,7 +4503,7 @@ gptoss-fp4-h100-vllm:
45034503
- { tp: 8, conc-start: 4, conc-end: 16 }
45044504

45054505
minimaxm2.5-fp8-h100-vllm:
4506-
image: vllm/vllm-openai:v0.21.0
4506+
image: vllm/vllm-openai:v0.19.1-cu130
45074507
model: MiniMaxAI/MiniMax-M2.5
45084508
model-prefix: minimaxm2.5
45094509
runner: h100
@@ -4515,13 +4515,11 @@ minimaxm2.5-fp8-h100-vllm:
45154515
- isl: 1024
45164516
osl: 1024
45174517
search-space:
4518-
# - { tp: 8, ep: 8, conc-start: 4, conc-end: 64 }
4519-
- { tp: 4, ep: 4, conc-start: 4, conc-end: 64 }
4518+
- { tp: 8, ep: 8, conc-start: 4, conc-end: 128 }
45204519
- isl: 8192
45214520
osl: 1024
45224521
search-space:
4523-
# - { tp: 8, ep: 8, conc-start: 4, conc-end: 64 }
4524-
- { tp: 4, ep: 4, conc-start: 4, conc-end: 64 }
4522+
- { tp: 8, ep: 8, conc-start: 4, conc-end: 128 }
45254523

45264524
# Diverged from minimaxm2.5-fp8-h100-vllm (agentic-coding sibling). Metadata is
45274525
# identical to origin/main's minimaxm2.5-fp8-h100-vllm; the split exists because this
@@ -9222,3 +9220,199 @@ qwen3.5-fp8-h100-sglang-mtp:
92229220
osl: 1024
92239221
search-space:
92249222
- { tp: 8, ep: 8, conc-start: 4, conc-end: 32, spec-decoding: mtp }
9223+
9224+
glm5-fp4-gb300-dynamo-sglang:
9225+
image: lmsysorg/sglang:v0.5.11-cu130
9226+
model: nvidia/GLM-5-NVFP4
9227+
model-prefix: glm5
9228+
runner: gb300-nv
9229+
precision: fp4
9230+
framework: dynamo-sglang
9231+
multinode: true
9232+
disagg: true
9233+
scenarios:
9234+
fixed-seq-len:
9235+
# ---------- 8k1k high-throughput (wide-EP TP=32 decode) ----------
9236+
- isl: 8192
9237+
osl: 1024
9238+
search-space:
9239+
# 5p1d wide-EP. 13 nodes (5P @ TP=4 + 1D @ TP=32 on 8 nodes).
9240+
- conc-list: [2048]
9241+
prefill:
9242+
num-worker: 5
9243+
tp: 4
9244+
ep: 1
9245+
dp-attn: true
9246+
additional-settings:
9247+
- "CONFIG_FILE=recipes/sglang/glm5/gb300-fp4/8k1k/disagg/stp/8k1k_stp_maxtpt_0.yaml"
9248+
decode:
9249+
num-worker: 1
9250+
tp: 32
9251+
ep: 32
9252+
dp-attn: true
9253+
# 7p1d wide-EP. 15 nodes.
9254+
- conc-list: [3072]
9255+
prefill:
9256+
num-worker: 7
9257+
tp: 4
9258+
ep: 1
9259+
dp-attn: true
9260+
additional-settings:
9261+
- "CONFIG_FILE=recipes/sglang/glm5/gb300-fp4/8k1k/disagg/stp/8k1k_stp_maxtpt_1.yaml"
9262+
decode:
9263+
num-worker: 1
9264+
tp: 32
9265+
ep: 32
9266+
dp-attn: true
9267+
# 10p1d wide-EP. 18 nodes.
9268+
- conc-list: [4096]
9269+
prefill:
9270+
num-worker: 10
9271+
tp: 4
9272+
ep: 1
9273+
dp-attn: true
9274+
additional-settings:
9275+
- "CONFIG_FILE=recipes/sglang/glm5/gb300-fp4/8k1k/disagg/stp/8k1k_stp_maxtpt_2.yaml"
9276+
decode:
9277+
num-worker: 1
9278+
tp: 32
9279+
ep: 32
9280+
dp-attn: true
9281+
# ---------- 8k1k low-latency (per-node TP=4 decode workers) ----------
9282+
- isl: 8192
9283+
osl: 1024
9284+
search-space:
9285+
# 1p3d. 4 nodes (1P + 3 D workers @ 1 node each).
9286+
- conc-list: [1024]
9287+
prefill:
9288+
num-worker: 1
9289+
tp: 4
9290+
ep: 1
9291+
dp-attn: true
9292+
additional-settings:
9293+
- "CONFIG_FILE=recipes/sglang/glm5/gb300-fp4/8k1k/disagg/stp/8k1k_stp_lowlat_0.yaml"
9294+
decode:
9295+
num-worker: 3
9296+
tp: 4
9297+
ep: 1
9298+
dp-attn: false
9299+
# 1p5d. 6 nodes.
9300+
- conc-list: [1024]
9301+
prefill:
9302+
num-worker: 1
9303+
tp: 4
9304+
ep: 1
9305+
dp-attn: true
9306+
additional-settings:
9307+
- "CONFIG_FILE=recipes/sglang/glm5/gb300-fp4/8k1k/disagg/stp/8k1k_stp_lowlat_1.yaml"
9308+
decode:
9309+
num-worker: 5
9310+
tp: 4
9311+
ep: 1
9312+
dp-attn: false
9313+
# 1p9d. 10 nodes.
9314+
- conc-list: [1024]
9315+
prefill:
9316+
num-worker: 1
9317+
tp: 4
9318+
ep: 1
9319+
dp-attn: true
9320+
additional-settings:
9321+
- "CONFIG_FILE=recipes/sglang/glm5/gb300-fp4/8k1k/disagg/stp/8k1k_stp_lowlat_2.yaml"
9322+
decode:
9323+
num-worker: 9
9324+
tp: 4
9325+
ep: 1
9326+
dp-attn: false
9327+
# 1p15d. 16 nodes.
9328+
- conc-list: [1024]
9329+
prefill:
9330+
num-worker: 1
9331+
tp: 4
9332+
ep: 1
9333+
dp-attn: true
9334+
additional-settings:
9335+
- "CONFIG_FILE=recipes/sglang/glm5/gb300-fp4/8k1k/disagg/stp/8k1k_stp_lowlat_3.yaml"
9336+
decode:
9337+
num-worker: 15
9338+
tp: 4
9339+
ep: 1
9340+
dp-attn: false
9341+
# ---------- 1k1k high-throughput (wide-EP TP=32 decode) ----------
9342+
- isl: 1024
9343+
osl: 1024
9344+
search-space:
9345+
# 3p1d wide-EP. 11 nodes. conc 16500.
9346+
- conc-list: [16500]
9347+
prefill:
9348+
num-worker: 3
9349+
tp: 4
9350+
ep: 1
9351+
dp-attn: true
9352+
additional-settings:
9353+
- "CONFIG_FILE=recipes/sglang/glm5/gb300-fp4/1k1k/disagg/stp/1k1k_stp_maxtpt_0.yaml"
9354+
decode:
9355+
num-worker: 1
9356+
tp: 32
9357+
ep: 32
9358+
dp-attn: true
9359+
# 2p1d wide-EP. 10 nodes. conc 8300.
9360+
- conc-list: [8300]
9361+
prefill:
9362+
num-worker: 2
9363+
tp: 4
9364+
ep: 1
9365+
dp-attn: true
9366+
additional-settings:
9367+
- "CONFIG_FILE=recipes/sglang/glm5/gb300-fp4/1k1k/disagg/stp/1k1k_stp_maxtpt_1.yaml"
9368+
decode:
9369+
num-worker: 1
9370+
tp: 32
9371+
ep: 32
9372+
dp-attn: true
9373+
# 1p1d wide-EP. 9 nodes. conc sweep 2500x1024x512x256.
9374+
- conc-list: [2500, 1024, 512, 256]
9375+
prefill:
9376+
num-worker: 1
9377+
tp: 4
9378+
ep: 1
9379+
dp-attn: true
9380+
additional-settings:
9381+
- "CONFIG_FILE=recipes/sglang/glm5/gb300-fp4/1k1k/disagg/stp/1k1k_stp_maxtpt_2.yaml"
9382+
decode:
9383+
num-worker: 1
9384+
tp: 32
9385+
ep: 32
9386+
dp-attn: true
9387+
# ---------- 1k1k low-latency (per-node TP=4 decode workers) ----------
9388+
- isl: 1024
9389+
osl: 1024
9390+
search-space:
9391+
# 1p17d low-latency, bs=32 sweep. 18 nodes.
9392+
- conc-list: [512, 256, 128, 64]
9393+
prefill:
9394+
num-worker: 1
9395+
tp: 4
9396+
ep: 1
9397+
dp-attn: true
9398+
additional-settings:
9399+
- "CONFIG_FILE=recipes/sglang/glm5/gb300-fp4/1k1k/disagg/stp/1k1k_stp_lowlat_0.yaml"
9400+
decode:
9401+
num-worker: 17
9402+
tp: 4
9403+
ep: 1
9404+
dp-attn: false
9405+
# 1p17d low-latency, bs=1 (single-stream). 18 nodes.
9406+
- conc-list: [32]
9407+
prefill:
9408+
num-worker: 1
9409+
tp: 4
9410+
ep: 1
9411+
dp-attn: true
9412+
additional-settings:
9413+
- "CONFIG_FILE=recipes/sglang/glm5/gb300-fp4/1k1k/disagg/stp/1k1k_stp_lowlat_1.yaml"
9414+
decode:
9415+
num-worker: 17
9416+
tp: 4
9417+
ep: 1
9418+
dp-attn: false

0 commit comments

Comments
 (0)