Skip to content

Commit cb21694

Browse files
committed
(testing) add offload off scneario to dsv4 b300
1 parent 3747263 commit cb21694

1 file changed

Lines changed: 6 additions & 15 deletions

File tree

.github/configs/nvidia-master.yaml

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9259,7 +9259,7 @@ glm5-fp8-gb300-dynamo-sglang:
92599259
dp-attn: false
92609260

92619261
dsv4-fp4-b200-vllm-agentic:
9262-
image: cquil/vllm-openai:v0.22.0-6c529f3001ab8bf44b1657e779dc54b622397045
9262+
image: vllm/vllm-openai:nightly-d0975a4b50140a9d953f00955a1cbb2a4945edef
92639263
model: deepseek-ai/DeepSeek-V4-Pro
92649264
model-prefix: dsv4
92659265
runner: b200-dgxc
@@ -9408,7 +9408,8 @@ dsv4-fp8-h200-vllm-agentic:
94089408
dsv4-fp4-b300-vllm-agentic:
94099409
# image: vllm/vllm-openai:v0.22.0
94109410
# includes https://github.com/vllm-project/vllm/pull/43447 up to 6c529f3001ab8bf44b1657e779dc54b622397045
9411-
image: cquil/vllm-openai:v0.22.0-6c529f3001ab8bf44b1657e779dc54b622397045
9411+
# image: cquil/vllm-openai:v0.22.0-6c529f3001ab8bf44b1657e779dc54b622397045'
9412+
image: vllm/vllm-openai:nightly-d0975a4b50140a9d953f00955a1cbb2a4945edef
94129413
model: deepseek-ai/DeepSeek-V4-Pro
94139414
model-prefix: dsv4
94149415
runner: b300
@@ -9419,20 +9420,10 @@ dsv4-fp4-b300-vllm-agentic:
94199420
agentic-coding:
94209421
- duration: 1800
94219422
search-space:
9422-
# TEMPORARILY COMMENTED OUT — running offloading=none only this iteration.
9423-
# cpu offload only this iteration — none entries already validated in
9424-
# earlier runs. Re-add when investigating regressions in offload=none.
9425-
# - { tp: 4, offloading: cpu, conc-list: [16, 32, 64] }
9426-
# - { tp: 8, offloading: cpu, conc-list: [16, 32, 64] }
9427-
# - { tp: 4, ep: 4, dp-attn: true, offloading: cpu, conc-list: [64, 128, 256] }
9428-
# - { tp: 8, ep: 8, dp-attn: true, offloading: cpu, conc-list: [128, 256, 512] }
9429-
# - { tp: 4, offloading: none, conc-list: [16, 32, 64] }
9430-
# - { tp: 8, offloading: none, conc-list: [16, 32, 64] }
9431-
# - { tp: 4, ep: 4, dp-attn: true, offloading: none, conc-list: [64, 128, 256] }
9432-
# - { tp: 8, ep: 8, dp-attn: true, offloading: none, conc-list: [128, 256, 512] }
9433-
- { tp: 4, offloading: none, conc-list: [1, 4, 8, 16, 32, 40, 48, 52, 64, 72] }
9423+
- { tp: 4, offloading: none, conc-list: [1, 4, 8, 16, 32] }
94349424
- { tp: 8, offloading: none, conc-list: [1, 4, 8, 16, 32, 40, 48, 52, 64, 72] }
9435-
- { tp: 4, ep: 4, dp-attn: true, offloading: none, conc-list: [52, 64, 72, 84, 100, 128, 196, 256, 512] }
9425+
- { tp: 4, ep: 4, dp-attn: true, offloading: none, conc-list: [8, 16, 32, 64, 128] }
9426+
- { tp: 4, ep: 4, dp-attn: true, offloading: cpu, conc-list: [32, 48, 64, 96, 128, 192, 256] }
94369427
- { tp: 8, ep: 8, dp-attn: true, offloading: none, conc-list: [52, 64, 72, 84, 100, 128, 196, 256, 512] }
94379428

94389429
gptoss-fp4-b200-vllm-agentic:

0 commit comments

Comments
 (0)