Skip to content

Commit 297bd7f

Browse files
committed
typo...
1 parent 0e3c359 commit 297bd7f

1 file changed

Lines changed: 71 additions & 71 deletions

File tree

.github/configs/nvidia-master.yaml

Lines changed: 71 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -890,74 +890,74 @@ dsr1-fp8-gb200-dynamo-sglang:
890890
additional-settings:
891891
- "DECODE_NODES=8"
892892

893-
dsr1-fp4-gb200-dynamo-sglang:
894-
# TODO: swap
895-
image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.5.1-rc0.pre1
896-
# TODO: what is the right name?
897-
model: deepseek-ai/DeepSeek-R1-0528-fp4-v2
898-
model-prefix: dsr1
899-
runner: gb200
900-
precision: fp4
901-
framework: dynamo-sglang
902-
multinode: true
903-
disagg: true
904-
seq-len-configs:
905-
- isl: 1024
906-
osl: 1024
907-
search-space:
908-
# Low latency (1 prefill worker at DEP4 and 2 decode workers at DEP4)
909-
- spec-decoding: "none"
910-
conc-list: [ 4, 8, 32, 64, 128, 112, 128, 256 ]
911-
prefill:
912-
num-worker: 1
913-
tp: 1
914-
ep: 1
915-
dp-attn: true
916-
additional-settings:
917-
- "PREFILL_NODES=1"
918-
- "N_ADDITIONAL_FRONTENDS=8"
919-
decode:
920-
num-worker: 2
921-
tp: 1
922-
ep: 1
923-
dp-attn: true
924-
additional-settings:
925-
- "DECODE_NODES=2"
926-
927-
# Mid curve (1 prefill worker at DEP4 and 1 decode workers at DEP48)
928-
- spec-decoding: "none"
929-
conc-list: [ 512, 1024, 2048, 4096, 8192 ]
930-
prefill:
931-
num-worker: 1
932-
tp: 1
933-
ep: 1
934-
dp-attn: true
935-
additional-settings:
936-
- "PREFILL_NODES=1"
937-
- "N_ADDITIONAL_FRONTENDS=8"
938-
decode:
939-
num-worker: 2
940-
tp: 1
941-
ep: 1
942-
dp-attn: true
943-
additional-settings:
944-
- "DECODE_NODES=12"
945-
946-
# Top of curve (1 prefill worker at DEP4 and 1 decode worker at DEP32)
947-
- spec-decoding: "none"
948-
conc-list: [ 8192, 12000, 15000 ]
949-
prefill:
950-
num-worker: 1
951-
tp: 1
952-
ep: 1
953-
dp-attn: true
954-
additional-settings:
955-
- "PREFILL_NODES=1"
956-
- "N_ADDITIONAL_FRONTENDS=8"
957-
decode:
958-
num-worker: 2
959-
tp: 1
960-
ep: 1
961-
dp-attn: true
962-
additional-settings:
963-
- "DECODE_NODES=8"
893+
dsr1-fp4-gb200-dynamo-sglang:
894+
# TODO: swap
895+
image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.5.1-rc0.pre1
896+
# TODO: what is the right name?
897+
model: deepseek-ai/DeepSeek-R1-0528-fp4-v2
898+
model-prefix: dsr1
899+
runner: gb200
900+
precision: fp4
901+
framework: dynamo-sglang
902+
multinode: true
903+
disagg: true
904+
seq-len-configs:
905+
- isl: 1024
906+
osl: 1024
907+
search-space:
908+
# Low latency (1 prefill worker at DEP4 and 2 decode workers at DEP4)
909+
- spec-decoding: "none"
910+
conc-list: [ 4, 8, 32, 64, 128, 112, 128, 256 ]
911+
prefill:
912+
num-worker: 1
913+
tp: 1
914+
ep: 1
915+
dp-attn: true
916+
additional-settings:
917+
- "PREFILL_NODES=1"
918+
- "N_ADDITIONAL_FRONTENDS=8"
919+
decode:
920+
num-worker: 2
921+
tp: 1
922+
ep: 1
923+
dp-attn: true
924+
additional-settings:
925+
- "DECODE_NODES=2"
926+
927+
# Mid curve (1 prefill worker at DEP4 and 1 decode workers at DEP48)
928+
- spec-decoding: "none"
929+
conc-list: [ 512, 1024, 2048, 4096, 8192 ]
930+
prefill:
931+
num-worker: 1
932+
tp: 1
933+
ep: 1
934+
dp-attn: true
935+
additional-settings:
936+
- "PREFILL_NODES=1"
937+
- "N_ADDITIONAL_FRONTENDS=8"
938+
decode:
939+
num-worker: 2
940+
tp: 1
941+
ep: 1
942+
dp-attn: true
943+
additional-settings:
944+
- "DECODE_NODES=12"
945+
946+
# Top of curve (1 prefill worker at DEP4 and 1 decode worker at DEP32)
947+
- spec-decoding: "none"
948+
conc-list: [ 8192, 12000, 15000 ]
949+
prefill:
950+
num-worker: 1
951+
tp: 1
952+
ep: 1
953+
dp-attn: true
954+
additional-settings:
955+
- "PREFILL_NODES=1"
956+
- "N_ADDITIONAL_FRONTENDS=8"
957+
decode:
958+
num-worker: 2
959+
tp: 1
960+
ep: 1
961+
dp-attn: true
962+
additional-settings:
963+
- "DECODE_NODES=8"

0 commit comments

Comments
 (0)