@@ -852,22 +852,114 @@ dsr1-fp8-gb200-dynamo-sglang:
852852 - isl : 8192
853853 osl : 1024
854854 search-space :
855+ # Low latency (1 prefill worker at DEP4 and 1 decode worker at DEP4)
855856 - spec-decoding : " none"
856- conc-list : [ 128, 256, 384, 448, 512, 576, 1024, 2048, 4096 ]
857+ conc-list : [ 4, 8, 16, 32, 64, 128, 256, 512 ]
857858 prefill :
858- num-worker : 6
859+ num-worker : 1
859860 # tp, ep, and dp-attn do nothing because they are hardcoded in the following file:
860- # https://github.com/Elnifio/dynamo/blob/update-result-file-name/components/backends/sglang/slurm_jobs/scripts/gb200-fp8.sh
861861 tp : 1
862862 ep : 1
863863 dp-attn : true
864864 additional-settings :
865- - " PREFILL_NODES=12"
865+ - " PREFILL_NODES=1"
866+ - " N_ADDITIONAL_FRONTENDS=8"
867+ decode :
868+ num-worker : 1
869+ tp : 1
870+ ep : 1
871+ dp-attn : true
872+ additional-settings :
873+ - " DECODE_NODES=1"
874+
875+ # Middle and top of curve (5 prefill workers each at DEP8 and 1 decode worker at DEP32)
876+ - spec-decoding : " none"
877+ conc-list : [ 512, 1024, 2048, 6144 ]
878+ prefill :
879+ num-worker : 5
880+ # tp, ep, and dp-attn do nothing because they are hardcoded in the following file:
881+ tp : 1
882+ ep : 1
883+ dp-attn : true
884+ additional-settings :
885+ - " PREFILL_NODES=2"
866886 - " N_ADDITIONAL_FRONTENDS=8"
867887 decode :
868888 num-worker : 1
869889 tp : 1
870890 ep : 1
871891 dp-attn : true
872892 additional-settings :
873- - " DECODE_NODES=6"
893+ - " DECODE_NODES=8"
894+
895+ dsr1-fp8-gb200-dynamo-sglang :
896+ # TODO: swap
897+ image : nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.5.1-rc0.pre1
898+ # TODO: what is the right name?
899+ model : deepseek-ai/DeepSeek-R1-0528-fp4-v2
900+ model-prefix : dsr1
901+ runner : gb200
902+ precision : fp4
903+ framework : dynamo-sglang
904+ multinode : true
905+ disagg : true
906+ seq-len-configs :
907+ - isl : 1024
908+ osl : 1024
909+ search-space :
910+ # Low latency (1 prefill worker at DEP4 and 2 decode workers at DEP4)
911+ - spec-decoding : " none"
912+ conc-list : [ 4, 8, 32, 64, 128, 112, 128, 256 ]
913+ prefill :
914+ num-worker : 1
915+ tp : 1
916+ ep : 1
917+ dp-attn : true
918+ additional-settings :
919+ - " PREFILL_NODES=1"
920+ - " N_ADDITIONAL_FRONTENDS=8"
921+ decode :
922+ num-worker : 2
923+ tp : 1
924+ ep : 1
925+ dp-attn : true
926+ additional-settings :
927+ - " DECODE_NODES=2"
928+
929+ # Mid curve (1 prefill worker at DEP4 and 1 decode workers at DEP48)
930+ - spec-decoding : " none"
931+ conc-list : [ 512, 1024, 2048, 4096, 8192 ]
932+ prefill :
933+ num-worker : 1
934+ tp : 1
935+ ep : 1
936+ dp-attn : true
937+ additional-settings :
938+ - " PREFILL_NODES=1"
939+ - " N_ADDITIONAL_FRONTENDS=8"
940+ decode :
941+ num-worker : 2
942+ tp : 1
943+ ep : 1
944+ dp-attn : true
945+ additional-settings :
946+ - " DECODE_NODES=12"
947+
948+ # Top of curve (1 prefill worker at DEP4 and 1 decode worker at DEP32)
949+ - spec-decoding : " none"
950+ conc-list : [ 8192, 12000, 15000 ]
951+ prefill :
952+ num-worker : 1
953+ tp : 1
954+ ep : 1
955+ dp-attn : true
956+ additional-settings :
957+ - " PREFILL_NODES=1"
958+ - " N_ADDITIONAL_FRONTENDS=8"
959+ decode :
960+ num-worker : 2
961+ tp : 1
962+ ep : 1
963+ dp-attn : true
964+ additional-settings :
965+ - " DECODE_NODES=8"
0 commit comments