Skip to content

Commit dd96fcf

Browse files
authored
Merge branch 'main' into evals-on-refactor
2 parents 544e698 + 343d193 commit dd96fcf

3 files changed

Lines changed: 16 additions & 7 deletions

File tree

.github/configs/nvidia-master.yaml

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -167,34 +167,38 @@ dsr1-fp8-h200-trt:
167167
- { tp: 8, ep: 8, dp-attn: true, conc-start: 64, conc-end: 64 }
168168

169169
gptoss-fp4-b200-trt:
170-
image: nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc0.post1
170+
image: nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc2
171171
model: openai/gpt-oss-120b
172172
model-prefix: gptoss
173173
runner: b200-trt
174174
precision: fp4
175175
framework: trt
176-
# For all sequence lengths, if CONC >= 256, then EP=TP and DP_ATTN=true
176+
# Enable DP_ATTENTION for conc >= 32
177177
seq-len-configs:
178178
- isl: 1024
179179
osl: 1024
180180
search-space:
181+
- { tp: 2, dp-attn: true, conc-start: 32, conc-end: 128 }
182+
- { tp: 4, dp-attn: true, conc-start: 32, conc-end: 64 }
181183
- { tp: 1, conc-start: 64, conc-end: 128 }
182-
- { tp: 2, conc-start: 4, conc-end: 128 }
183-
- { tp: 4, conc-start: 4, conc-end: 128 }
184+
- { tp: 2, conc-start: 4, conc-end: 32 }
185+
- { tp: 4, conc-start: 4, conc-end: 64 }
184186
- { tp: 8, conc-start: 4, conc-end: 8 }
185187
- isl: 1024
186188
osl: 8192
187189
search-space:
188190
- { tp: 1, conc-start: 64, conc-end: 128 }
191+
- { tp: 2, dp-attn: true, conc-start: 64, conc-end: 128 }
189192
- { tp: 2, conc-start: 4, conc-end: 128 }
190193
- { tp: 4, conc-start: 4, conc-end: 128 }
191194
- { tp: 8, conc-start: 4, conc-end: 16 }
192195
- isl: 8192
193196
osl: 1024
194197
search-space:
195198
- { tp: 1, conc-start: 64, conc-end: 128 }
199+
- { tp: 2, dp-attn: true, conc-start: 64, conc-end: 128 }
196200
- { tp: 2, conc-start: 4, conc-end: 128 }
197-
- { tp: 4, conc-start: 4, conc-end: 128 }
201+
- { tp: 4, conc-start: 4, conc-end: 32 }
198202
- { tp: 8, conc-start: 4, conc-end: 8 }
199203

200204
gptoss-fp4-b200-vllm:

.github/workflows/label-validation.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,20 @@ concurrency:
66
cancel-in-progress: true
77

88
on:
9-
pull_request:
9+
pull_request_target:
1010
types: [labeled, synchronize]
1111

1212
jobs:
1313
get-jobs:
1414
runs-on: ubuntu-latest
15+
environment: fork-pr-validation
1516
outputs:
1617
search-space-config: ${{ steps.get-jobs.outputs.search-space-config }}
1718
steps:
1819
- name: Checkout code
1920
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
21+
with:
22+
ref: ${{ github.event.pull_request.head.sha }}
2023

2124
- id: get-jobs
2225
shell: python

benchmarks/gptoss_fp4_b200_trt_slurm.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ echo "MOE_BACKEND set to '$MOE_BACKEND'"
3131

3232
EXTRA_CONFIG_FILE="gptoss-fp4.yml"
3333
export TRTLLM_ENABLE_PDL=1
34-
export NCCL_GRAPH_REGISTER=0
3534

3635
cat > $EXTRA_CONFIG_FILE << EOF
3736
cuda_graph_config:
@@ -50,6 +49,9 @@ moe_config:
5049
EOF
5150

5251
if [[ "$DP_ATTENTION" == "true" ]]; then
52+
export TRTLLM_MOE_ALLTOALL_BACKEND="mnnvlthroughput"
53+
export TRTLLM_FORCE_ALLTOALL_METHOD="MNNVL"
54+
export TRTLLM_MOE_A2A_WORKSPACE_MB="2048"
5355
cat << EOF >> $EXTRA_CONFIG_FILE
5456
attention_dp_config:
5557
enable_balance: true

0 commit comments

Comments
 (0)