Skip to content

Commit d82c9a6

Browse files
authored
Merge branch 'main' into multinode-integration
2 parents d126dca + 06667a5 commit d82c9a6

2 files changed

Lines changed: 11 additions & 5 deletions

File tree

.github/configs/nvidia-master.yaml

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ dsr1-fp8-h200-trt:
173173
- { tp: 8, ep: 8, dp-attn: true, conc-start: 64, conc-end: 64 }
174174

175175
gptoss-fp4-b200-trt:
176-
image: nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc0.post1
176+
image: nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc2
177177
model: openai/gpt-oss-120b
178178
model-prefix: gptoss
179179
runner: b200-trt
@@ -185,23 +185,27 @@ gptoss-fp4-b200-trt:
185185
- isl: 1024
186186
osl: 1024
187187
search-space:
188+
- { tp: 2, dp-attn: true, conc-start: 32, conc-end: 128 }
189+
- { tp: 4, dp-attn: true, conc-start: 32, conc-end: 64 }
188190
- { tp: 1, conc-start: 64, conc-end: 128 }
189-
- { tp: 2, conc-start: 4, conc-end: 128 }
190-
- { tp: 4, conc-start: 4, conc-end: 128 }
191+
- { tp: 2, conc-start: 4, conc-end: 32 }
192+
- { tp: 4, conc-start: 4, conc-end: 64 }
191193
- { tp: 8, conc-start: 4, conc-end: 8 }
192194
- isl: 1024
193195
osl: 8192
194196
search-space:
195197
- { tp: 1, conc-start: 64, conc-end: 128 }
198+
- { tp: 2, dp-attn: true, conc-start: 64, conc-end: 128 }
196199
- { tp: 2, conc-start: 4, conc-end: 128 }
197200
- { tp: 4, conc-start: 4, conc-end: 128 }
198201
- { tp: 8, conc-start: 4, conc-end: 16 }
199202
- isl: 8192
200203
osl: 1024
201204
search-space:
202205
- { tp: 1, conc-start: 64, conc-end: 128 }
206+
- { tp: 2, dp-attn: true, conc-start: 64, conc-end: 128 }
203207
- { tp: 2, conc-start: 4, conc-end: 128 }
204-
- { tp: 4, conc-start: 4, conc-end: 128 }
208+
- { tp: 4, conc-start: 4, conc-end: 32 }
205209
- { tp: 8, conc-start: 4, conc-end: 8 }
206210

207211
gptoss-fp4-b200-vllm:

benchmarks/gptoss_fp4_b200_trt_slurm.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ echo "MOE_BACKEND set to '$MOE_BACKEND'"
3131

3232
EXTRA_CONFIG_FILE="gptoss-fp4.yml"
3333
export TRTLLM_ENABLE_PDL=1
34-
export NCCL_GRAPH_REGISTER=0
3534

3635
cat > $EXTRA_CONFIG_FILE << EOF
3736
cuda_graph_config:
@@ -50,6 +49,9 @@ moe_config:
5049
EOF
5150

5251
if [[ "$DP_ATTENTION" == "true" ]]; then
52+
export TRTLLM_MOE_ALLTOALL_BACKEND="mnnvlthroughput"
53+
export TRTLLM_FORCE_ALLTOALL_METHOD="MNNVL"
54+
export TRTLLM_MOE_A2A_WORKSPACE_MB="2048"
5355
cat << EOF >> $EXTRA_CONFIG_FILE
5456
attention_dp_config:
5557
enable_balance: true

0 commit comments

Comments
 (0)