Skip to content

Commit b1fbf48

Browse files
author
Jatin Gangani
committed
Update GPTOSS B200 AGG
1 parent 175fe53 commit b1fbf48

2 files changed

Lines changed: 11 additions & 6 deletions

File tree

.github/configs/nvidia-master.yaml

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -167,34 +167,37 @@ dsr1-fp8-h200-trt:
167167
- { tp: 8, ep: 8, dp-attn: true, conc-start: 64, conc-end: 64 }
168168

169169
gptoss-fp4-b200-trt:
170-
image: nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc0.post1
170+
image: nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc2
171171
model: openai/gpt-oss-120b
172172
model-prefix: gptoss
173173
runner: b200-trt
174174
precision: fp4
175175
framework: trt
176-
# For all sequence lengths, if CONC >= 256, then EP=TP and DP_ATTN=true
177176
seq-len-configs:
178177
- isl: 1024
179178
osl: 1024
180179
search-space:
180+
- { tp: 2, dp-attn: true, conc-start: 32, conc-end: 128 }
181+
- { tp: 4, dp-attn: true, conc-start: 32, conc-end: 64 }
181182
- { tp: 1, conc-start: 64, conc-end: 128 }
182-
- { tp: 2, conc-start: 4, conc-end: 128 }
183-
- { tp: 4, conc-start: 4, conc-end: 128 }
183+
- { tp: 2, conc-start: 4, conc-end: 32 }
184+
- { tp: 4, conc-start: 4, conc-end: 64 }
184185
- { tp: 8, conc-start: 4, conc-end: 8 }
185186
- isl: 1024
186187
osl: 8192
187188
search-space:
188189
- { tp: 1, conc-start: 64, conc-end: 128 }
190+
- { tp: 2, dp-attn: true, conc-start: 64, conc-end: 128 }
189191
- { tp: 2, conc-start: 4, conc-end: 128 }
190192
- { tp: 4, conc-start: 4, conc-end: 128 }
191193
- { tp: 8, conc-start: 4, conc-end: 16 }
192194
- isl: 8192
193195
osl: 1024
194196
search-space:
195197
- { tp: 1, conc-start: 64, conc-end: 128 }
198+
- { tp: 2, dp-attn: true, conc-start: 64, conc-end: 128 }
196199
- { tp: 2, conc-start: 4, conc-end: 128 }
197-
- { tp: 4, conc-start: 4, conc-end: 128 }
200+
- { tp: 4, conc-start: 4, conc-end: 32 }
198201
- { tp: 8, conc-start: 4, conc-end: 8 }
199202

200203
gptoss-fp4-b200-vllm:

benchmarks/gptoss_fp4_b200_trt_slurm.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,9 @@ echo "MOE_BACKEND set to '$MOE_BACKEND'"
3131

3232
EXTRA_CONFIG_FILE="gptoss-fp4.yml"
3333
export TRTLLM_ENABLE_PDL=1
34-
export NCCL_GRAPH_REGISTER=0
34+
export TRTLLM_MOE_ALLTOALL_BACKEND="mnnvlthroughput"
35+
export TRTLLM_FORCE_ALLTOALL_METHOD="MNNVL"
36+
export TRTLLM_MOE_A2A_WORKSPACE_MB="2048"
3537

3638
cat > $EXTRA_CONFIG_FILE << EOF
3739
cuda_graph_config:

0 commit comments

Comments
 (0)