@@ -798,7 +798,6 @@ dsr1-fp8-gb200-dynamo-sglang:
798798 additional-settings :
799799 - " PREFILL_NODES=4"
800800 - " N_ADDITIONAL_FRONTENDS=9"
801- - " SCRIPT_MODE=max-tpt"
802801 decode :
803802 num-worker : 1
804803 tp : 1
@@ -853,112 +852,22 @@ dsr1-fp8-gb200-dynamo-sglang:
853852 - isl : 8192
854853 osl : 1024
855854 search-space :
856- # Low latency (1 prefill worker at DEP4 and 1 decode worker at DEP4)
857855 - spec-decoding : " none"
858- conc-list : [ 4, 8, 16, 32, 64, 128, 256, 512 ]
856+ conc-list : [ 128, 256, 384, 448, 512, 576, 1024, 2048, 4096 ]
859857 prefill :
860- num-worker : 1
861- tp : 1
862- ep : 1
863- dp-attn : true
864- additional-settings :
865- - " PREFILL_NODES=1"
866- - " N_ADDITIONAL_FRONTENDS=8"
867- decode :
868- num-worker : 1
869- tp : 1
870- ep : 1
871- dp-attn : true
872- additional-settings :
873- - " DECODE_NODES=1"
874-
875- # Middle and top of curve (5 prefill workers each at DEP8 and 1 decode worker at DEP32)
876- - spec-decoding : " none"
877- conc-list : [ 512, 1024, 2048, 6144 ]
878- prefill :
879- num-worker : 5
880- tp : 1
881- ep : 1
882- dp-attn : true
883- additional-settings :
884- - " PREFILL_NODES=2"
885- - " N_ADDITIONAL_FRONTENDS=8"
886- decode :
887- num-worker : 1
888- tp : 1
889- ep : 1
890- dp-attn : true
891- additional-settings :
892- - " DECODE_NODES=8"
893-
894- dsr1-fp4-gb200-dynamo-sglang :
895- # TODO: swap
896- image : nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.5.1-rc0.pre1
897- # TODO: what is the right name?
898- model : deepseek-ai/DeepSeek-R1-0528-fp4-v2
899- model-prefix : dsr1
900- runner : gb200
901- precision : fp4
902- framework : dynamo-sglang
903- multinode : true
904- disagg : true
905- seq-len-configs :
906- - isl : 1024
907- osl : 1024
908- search-space :
909- # Low latency (1 prefill worker at DEP4 and 2 decode workers at DEP4)
910- - spec-decoding : " none"
911- conc-list : [ 4, 8, 32, 64, 128, 112, 128, 256 ]
912- prefill :
913- num-worker : 1
914- tp : 1
915- ep : 1
916- dp-attn : true
917- additional-settings :
918- - " PREFILL_NODES=1"
919- - " N_ADDITIONAL_FRONTENDS=8"
920- decode :
921- num-worker : 2
922- tp : 1
923- ep : 1
924- dp-attn : true
925- additional-settings :
926- - " DECODE_NODES=2"
927-
928- # Mid curve (1 prefill worker at DEP4 and 1 decode workers at DEP48)
929- - spec-decoding : " none"
930- conc-list : [ 512, 1024, 2048, 4096, 8192 ]
931- prefill :
932- num-worker : 1
858+ num-worker : 6
859+ # tp, ep, and dp-attn do nothing because they are hardcoded in the following file:
860+ # https://github.com/Elnifio/dynamo/blob/update-result-file-name/components/backends/sglang/slurm_jobs/scripts/gb200-fp8.sh
933861 tp : 1
934862 ep : 1
935863 dp-attn : true
936864 additional-settings :
937- - " PREFILL_NODES=1 "
865+ - " PREFILL_NODES=12 "
938866 - " N_ADDITIONAL_FRONTENDS=8"
939867 decode :
940- num-worker : 2
941- tp : 1
942- ep : 1
943- dp-attn : true
944- additional-settings :
945- - " DECODE_NODES=12"
946-
947- # Top of curve (1 prefill worker at DEP4 and 1 decode worker at DEP32)
948- - spec-decoding : " none"
949- conc-list : [ 8192, 12000, 15000 ]
950- prefill :
951868 num-worker : 1
952869 tp : 1
953870 ep : 1
954871 dp-attn : true
955872 additional-settings :
956- - " PREFILL_NODES=1"
957- - " N_ADDITIONAL_FRONTENDS=8"
958- decode :
959- num-worker : 2
960- tp : 1
961- ep : 1
962- dp-attn : true
963- additional-settings :
964- - " DECODE_NODES=8"
873+ - " DECODE_NODES=6"
0 commit comments