@@ -9259,7 +9259,7 @@ glm5-fp8-gb300-dynamo-sglang:
92599259 dp-attn : false
92609260
92619261dsv4-fp4-b200-vllm-agentic :
9262- image : cquil /vllm-openai:v0.22.0-6c529f3001ab8bf44b1657e779dc54b622397045
9262+ image : vllm /vllm-openai:nightly-d0975a4b50140a9d953f00955a1cbb2a4945edef
92639263 model : deepseek-ai/DeepSeek-V4-Pro
92649264 model-prefix : dsv4
92659265 runner : b200-dgxc
@@ -9408,7 +9408,8 @@ dsv4-fp8-h200-vllm-agentic:
94089408dsv4-fp4-b300-vllm-agentic :
94099409 # image: vllm/vllm-openai:v0.22.0
94109410 # includes https://github.com/vllm-project/vllm/pull/43447 up to 6c529f3001ab8bf44b1657e779dc54b622397045
9411- image : cquil/vllm-openai:v0.22.0-6c529f3001ab8bf44b1657e779dc54b622397045
9411+ # image: cquil/vllm-openai:v0.22.0-6c529f3001ab8bf44b1657e779dc54b622397045'
9412+ image : vllm/vllm-openai:nightly-d0975a4b50140a9d953f00955a1cbb2a4945edef
94129413 model : deepseek-ai/DeepSeek-V4-Pro
94139414 model-prefix : dsv4
94149415 runner : b300
@@ -9419,20 +9420,10 @@ dsv4-fp4-b300-vllm-agentic:
94199420 agentic-coding :
94209421 - duration : 1800
94219422 search-space :
9422- # TEMPORARILY COMMENTED OUT — running offloading=none only this iteration.
9423- # cpu offload only this iteration — none entries already validated in
9424- # earlier runs. Re-add when investigating regressions in offload=none.
9425- # - { tp: 4, offloading: cpu, conc-list: [16, 32, 64] }
9426- # - { tp: 8, offloading: cpu, conc-list: [16, 32, 64] }
9427- # - { tp: 4, ep: 4, dp-attn: true, offloading: cpu, conc-list: [64, 128, 256] }
9428- # - { tp: 8, ep: 8, dp-attn: true, offloading: cpu, conc-list: [128, 256, 512] }
9429- # - { tp: 4, offloading: none, conc-list: [16, 32, 64] }
9430- # - { tp: 8, offloading: none, conc-list: [16, 32, 64] }
9431- # - { tp: 4, ep: 4, dp-attn: true, offloading: none, conc-list: [64, 128, 256] }
9432- # - { tp: 8, ep: 8, dp-attn: true, offloading: none, conc-list: [128, 256, 512] }
9433- - { tp: 4, offloading: none, conc-list: [1, 4, 8, 16, 32, 40, 48, 52, 64, 72] }
9423+ - { tp: 4, offloading: none, conc-list: [1, 4, 8, 16, 32] }
94349424 - { tp: 8, offloading: none, conc-list: [1, 4, 8, 16, 32, 40, 48, 52, 64, 72] }
9435- - { tp: 4, ep: 4, dp-attn: true, offloading: none, conc-list: [52, 64, 72, 84, 100, 128, 196, 256, 512] }
9425+ - { tp: 4, ep: 4, dp-attn: true, offloading: none, conc-list: [8, 16, 32, 64, 128] }
9426+ - { tp: 4, ep: 4, dp-attn: true, offloading: cpu, conc-list: [32, 48, 64, 96, 128, 192, 256] }
94369427 - { tp: 8, ep: 8, dp-attn: true, offloading: none, conc-list: [52, 64, 72, 84, 100, 128, 196, 256, 512] }
94379428
94389429gptoss-fp4-b200-vllm-agentic :
0 commit comments