@@ -167,34 +167,38 @@ dsr1-fp8-h200-trt:
167167 - { tp: 8, ep: 8, dp-attn: true, conc-start: 64, conc-end: 64 }
168168
169169gptoss-fp4-b200-trt :
170- image : nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc0.post1
170+ image : nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc2
171171 model : openai/gpt-oss-120b
172172 model-prefix : gptoss
173173 runner : b200-trt
174174 precision : fp4
175175 framework : trt
176- # For all sequence lengths, if CONC >= 256, then EP=TP and DP_ATTN=true
176+ # Enable DP_ATTENTION for conc >= 32
177177 seq-len-configs :
178178 - isl : 1024
179179 osl : 1024
180180 search-space :
181+ - { tp: 2, dp-attn: true, conc-start: 32, conc-end: 128 }
182+ - { tp: 4, dp-attn: true, conc-start: 32, conc-end: 64 }
181183 - { tp: 1, conc-start: 64, conc-end: 128 }
182- - { tp: 2, conc-start: 4, conc-end: 128 }
183- - { tp: 4, conc-start: 4, conc-end: 128 }
184+ - { tp: 2, conc-start: 4, conc-end: 32 }
185+ - { tp: 4, conc-start: 4, conc-end: 64 }
184186 - { tp: 8, conc-start: 4, conc-end: 8 }
185187 - isl : 1024
186188 osl : 8192
187189 search-space :
188190 - { tp: 1, conc-start: 64, conc-end: 128 }
191+ - { tp: 2, dp-attn: true, conc-start: 64, conc-end: 128 }
189192 - { tp: 2, conc-start: 4, conc-end: 128 }
190193 - { tp: 4, conc-start: 4, conc-end: 128 }
191194 - { tp: 8, conc-start: 4, conc-end: 16 }
192195 - isl : 8192
193196 osl : 1024
194197 search-space :
195198 - { tp: 1, conc-start: 64, conc-end: 128 }
199+ - { tp: 2, dp-attn: true, conc-start: 64, conc-end: 128 }
196200 - { tp: 2, conc-start: 4, conc-end: 128 }
197- - { tp: 4, conc-start: 4, conc-end: 128 }
201+ - { tp: 4, conc-start: 4, conc-end: 32 }
198202 - { tp: 8, conc-start: 4, conc-end: 8 }
199203
200204gptoss-fp4-b200-vllm :
0 commit comments