Skip to content

Commit 44868fc

Browse files
committed
go
1 parent f22cf47 commit 44868fc

2 files changed

Lines changed: 98 additions & 10 deletions

File tree

.github/configs/nvidia-master.yaml

Lines changed: 97 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -852,22 +852,114 @@ dsr1-fp8-gb200-dynamo-sglang:
852852
- isl: 8192
853853
osl: 1024
854854
search-space:
855+
# Low latency (1 prefill worker at DEP4 and 1 decode worker at DEP4)
855856
- spec-decoding: "none"
856-
conc-list: [ 128, 256, 384, 448, 512, 576, 1024, 2048, 4096 ]
857+
conc-list: [ 4, 8, 16, 32, 64, 128, 256, 512 ]
857858
prefill:
858-
num-worker: 6
859+
num-worker: 1
859860
# tp, ep, and dp-attn do nothing because they are hardcoded in the following file:
860-
# https://github.com/Elnifio/dynamo/blob/update-result-file-name/components/backends/sglang/slurm_jobs/scripts/gb200-fp8.sh
861861
tp: 1
862862
ep: 1
863863
dp-attn: true
864864
additional-settings:
865-
- "PREFILL_NODES=12"
865+
- "PREFILL_NODES=1"
866+
- "N_ADDITIONAL_FRONTENDS=8"
867+
decode:
868+
num-worker: 1
869+
tp: 1
870+
ep: 1
871+
dp-attn: true
872+
additional-settings:
873+
- "DECODE_NODES=1"
874+
875+
# Middle and top of curve (5 prefill workers each at DEP8 and 1 decode worker at DEP32)
876+
- spec-decoding: "none"
877+
conc-list: [ 512, 1024, 2048, 6144 ]
878+
prefill:
879+
num-worker: 5
880+
# tp, ep, and dp-attn do nothing because they are hardcoded in the following file:
881+
tp: 1
882+
ep: 1
883+
dp-attn: true
884+
additional-settings:
885+
- "PREFILL_NODES=2"
866886
- "N_ADDITIONAL_FRONTENDS=8"
867887
decode:
868888
num-worker: 1
869889
tp: 1
870890
ep: 1
871891
dp-attn: true
872892
additional-settings:
873-
- "DECODE_NODES=6"
893+
- "DECODE_NODES=8"
894+
895+
dsr1-fp8-gb200-dynamo-sglang:
896+
# TODO: swap
897+
image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.5.1-rc0.pre1
898+
# TODO: what is the right name?
899+
model: deepseek-ai/DeepSeek-R1-0528-fp4-v2
900+
model-prefix: dsr1
901+
runner: gb200
902+
precision: fp4
903+
framework: dynamo-sglang
904+
multinode: true
905+
disagg: true
906+
seq-len-configs:
907+
- isl: 1024
908+
osl: 1024
909+
search-space:
910+
# Low latency (1 prefill worker at DEP4 and 2 decode workers at DEP4)
911+
- spec-decoding: "none"
912+
conc-list: [ 4, 8, 32, 64, 128, 112, 128, 256 ]
913+
prefill:
914+
num-worker: 1
915+
tp: 1
916+
ep: 1
917+
dp-attn: true
918+
additional-settings:
919+
- "PREFILL_NODES=1"
920+
- "N_ADDITIONAL_FRONTENDS=8"
921+
decode:
922+
num-worker: 2
923+
tp: 1
924+
ep: 1
925+
dp-attn: true
926+
additional-settings:
927+
- "DECODE_NODES=2"
928+
929+
# Mid curve (1 prefill worker at DEP4 and 1 decode workers at DEP48)
930+
- spec-decoding: "none"
931+
conc-list: [ 512, 1024, 2048, 4096, 8192 ]
932+
prefill:
933+
num-worker: 1
934+
tp: 1
935+
ep: 1
936+
dp-attn: true
937+
additional-settings:
938+
- "PREFILL_NODES=1"
939+
- "N_ADDITIONAL_FRONTENDS=8"
940+
decode:
941+
num-worker: 2
942+
tp: 1
943+
ep: 1
944+
dp-attn: true
945+
additional-settings:
946+
- "DECODE_NODES=12"
947+
948+
# Top of curve (1 prefill worker at DEP4 and 1 decode worker at DEP32)
949+
- spec-decoding: "none"
950+
conc-list: [ 8192, 12000, 15000 ]
951+
prefill:
952+
num-worker: 1
953+
tp: 1
954+
ep: 1
955+
dp-attn: true
956+
additional-settings:
957+
- "PREFILL_NODES=1"
958+
- "N_ADDITIONAL_FRONTENDS=8"
959+
decode:
960+
num-worker: 2
961+
tp: 1
962+
ep: 1
963+
dp-attn: true
964+
additional-settings:
965+
- "DECODE_NODES=8"

benchmarks/dsr1_fp8_gb200_dynamo-sglang_slurm.sh

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,7 @@ check_env_vars CONC_LIST ISL OSL IMAGE SPEC_DECODING MODEL_PATH \
1212

1313
# Always clone and setup Dynamo
1414
echo "Cloning Dynamo repository..."
15-
if [ "$ISL" = "1024" ] && [ "$OSL" = "1024" ]; then
16-
git clone --branch ishan/sa-1.1-sgl-dsr1-fp8 https://github.com/ai-dynamo/dynamo.git
17-
else
18-
git clone --branch update-result-file-name https://github.com/Elnifio/dynamo.git
19-
fi
15+
git clone --branch ishan/sa-1.1-sgl-dsr1 https://github.com/ai-dynamo/dynamo.git
2016

2117
cd "$SGL_SLURM_JOBS_PATH"
2218

0 commit comments

Comments
 (0)