Skip to content

Commit 255fe34

Browse files
authored
Revert "sglang: add fp8 8k1k and fp4 1k1k (#274)"
This reverts commit efcb4e4.
1 parent efcb4e4 commit 255fe34

3 files changed

Lines changed: 11 additions & 137 deletions

File tree

.github/configs/nvidia-master.yaml

Lines changed: 6 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -798,7 +798,6 @@ dsr1-fp8-gb200-dynamo-sglang:
798798
additional-settings:
799799
- "PREFILL_NODES=4"
800800
- "N_ADDITIONAL_FRONTENDS=9"
801-
- "SCRIPT_MODE=max-tpt"
802801
decode:
803802
num-worker: 1
804803
tp: 1
@@ -853,112 +852,22 @@ dsr1-fp8-gb200-dynamo-sglang:
853852
- isl: 8192
854853
osl: 1024
855854
search-space:
856-
# Low latency (1 prefill worker at DEP4 and 1 decode worker at DEP4)
857855
- spec-decoding: "none"
858-
conc-list: [ 4, 8, 16, 32, 64, 128, 256, 512 ]
856+
conc-list: [ 128, 256, 384, 448, 512, 576, 1024, 2048, 4096 ]
859857
prefill:
860-
num-worker: 1
861-
tp: 1
862-
ep: 1
863-
dp-attn: true
864-
additional-settings:
865-
- "PREFILL_NODES=1"
866-
- "N_ADDITIONAL_FRONTENDS=8"
867-
decode:
868-
num-worker: 1
869-
tp: 1
870-
ep: 1
871-
dp-attn: true
872-
additional-settings:
873-
- "DECODE_NODES=1"
874-
875-
# Middle and top of curve (5 prefill workers each at DEP8 and 1 decode worker at DEP32)
876-
- spec-decoding: "none"
877-
conc-list: [ 512, 1024, 2048, 6144 ]
878-
prefill:
879-
num-worker: 5
880-
tp: 1
881-
ep: 1
882-
dp-attn: true
883-
additional-settings:
884-
- "PREFILL_NODES=2"
885-
- "N_ADDITIONAL_FRONTENDS=8"
886-
decode:
887-
num-worker: 1
888-
tp: 1
889-
ep: 1
890-
dp-attn: true
891-
additional-settings:
892-
- "DECODE_NODES=8"
893-
894-
dsr1-fp4-gb200-dynamo-sglang:
895-
# TODO: swap
896-
image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.5.1-rc0.pre1
897-
# TODO: what is the right name?
898-
model: deepseek-ai/DeepSeek-R1-0528-fp4-v2
899-
model-prefix: dsr1
900-
runner: gb200
901-
precision: fp4
902-
framework: dynamo-sglang
903-
multinode: true
904-
disagg: true
905-
seq-len-configs:
906-
- isl: 1024
907-
osl: 1024
908-
search-space:
909-
# Low latency (1 prefill worker at DEP4 and 2 decode workers at DEP4)
910-
- spec-decoding: "none"
911-
conc-list: [ 4, 8, 32, 64, 128, 112, 128, 256 ]
912-
prefill:
913-
num-worker: 1
914-
tp: 1
915-
ep: 1
916-
dp-attn: true
917-
additional-settings:
918-
- "PREFILL_NODES=1"
919-
- "N_ADDITIONAL_FRONTENDS=8"
920-
decode:
921-
num-worker: 2
922-
tp: 1
923-
ep: 1
924-
dp-attn: true
925-
additional-settings:
926-
- "DECODE_NODES=2"
927-
928-
# Mid curve (1 prefill worker at DEP4 and 1 decode workers at DEP48)
929-
- spec-decoding: "none"
930-
conc-list: [ 512, 1024, 2048, 4096, 8192 ]
931-
prefill:
932-
num-worker: 1
858+
num-worker: 6
859+
# tp, ep, and dp-attn do nothing because they are hardcoded in the following file:
860+
# https://github.com/Elnifio/dynamo/blob/update-result-file-name/components/backends/sglang/slurm_jobs/scripts/gb200-fp8.sh
933861
tp: 1
934862
ep: 1
935863
dp-attn: true
936864
additional-settings:
937-
- "PREFILL_NODES=1"
865+
- "PREFILL_NODES=12"
938866
- "N_ADDITIONAL_FRONTENDS=8"
939867
decode:
940-
num-worker: 2
941-
tp: 1
942-
ep: 1
943-
dp-attn: true
944-
additional-settings:
945-
- "DECODE_NODES=12"
946-
947-
# Top of curve (1 prefill worker at DEP4 and 1 decode worker at DEP32)
948-
- spec-decoding: "none"
949-
conc-list: [ 8192, 12000, 15000 ]
950-
prefill:
951868
num-worker: 1
952869
tp: 1
953870
ep: 1
954871
dp-attn: true
955872
additional-settings:
956-
- "PREFILL_NODES=1"
957-
- "N_ADDITIONAL_FRONTENDS=8"
958-
decode:
959-
num-worker: 2
960-
tp: 1
961-
ep: 1
962-
dp-attn: true
963-
additional-settings:
964-
- "DECODE_NODES=8"
873+
- "DECODE_NODES=6"

benchmarks/dsr1_fp4_gb200_dynamo-sglang_slurm.sh

Lines changed: 0 additions & 37 deletions
This file was deleted.

benchmarks/dsr1_fp8_gb200_dynamo-sglang_slurm.sh

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,11 @@ check_env_vars CONC_LIST ISL OSL IMAGE SPEC_DECODING MODEL_PATH \
1212

1313
# Always clone and setup Dynamo
1414
echo "Cloning Dynamo repository..."
15-
git clone --branch ishan/sa-1.1-sgl-dsr1 https://github.com/ai-dynamo/dynamo.git
15+
if [ "$ISL" = "1024" ] && [ "$OSL" = "1024" ]; then
16+
git clone --branch ishan/sa-1.1-sgl-dsr1-fp8 https://github.com/ai-dynamo/dynamo.git
17+
else
18+
git clone --branch update-result-file-name https://github.com/Elnifio/dynamo.git
19+
fi
1620

1721
cd "$SGL_SLURM_JOBS_PATH"
1822

@@ -21,7 +25,6 @@ export TIME_LIMIT="04:00:00"
2125
export MODEL_PATH=$MODEL_PATH
2226
export CONFIG_DIR=$CONFIG_DIR
2327
export CONTAINER_IMAGE=$IMAGE
24-
export GPU_TYPE="gb200-fp8"
2528

2629
# Launch jobs based on ISL/OSL
2730
# Replace ' ' in CONC_LIST with 'x' such that the concurrency list is represented
@@ -33,5 +36,4 @@ bash ./submit_disagg.sh $PREFILL_NODES \
3336
$DECODE_NUM_WORKERS \
3437
$N_ADDITIONAL_FRONTENDS \
3538
$ISL $OSL "${CONC_LIST// /x}" inf \
36-
$GPU_TYPE \
3739
$SCRIPT_MODE

0 commit comments

Comments
 (0)