@@ -890,74 +890,74 @@ dsr1-fp8-gb200-dynamo-sglang:
890890 additional-settings :
891891 - " DECODE_NODES=8"
892892
893- dsr1-fp4-gb200-dynamo-sglang :
894- # TODO: swap
895- image : nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.5.1-rc0.pre1
896- # TODO: what is the right name?
897- model : deepseek-ai/DeepSeek-R1-0528-fp4-v2
898- model-prefix : dsr1
899- runner : gb200
900- precision : fp4
901- framework : dynamo-sglang
902- multinode : true
903- disagg : true
904- seq-len-configs :
905- - isl : 1024
906- osl : 1024
907- search-space :
908- # Low latency (1 prefill worker at DEP4 and 2 decode workers at DEP4)
909- - spec-decoding : " none"
910- conc-list : [ 4, 8, 32, 64, 128, 112, 128, 256 ]
911- prefill :
912- num-worker : 1
913- tp : 1
914- ep : 1
915- dp-attn : true
916- additional-settings :
917- - " PREFILL_NODES=1"
918- - " N_ADDITIONAL_FRONTENDS=8"
919- decode :
920- num-worker : 2
921- tp : 1
922- ep : 1
923- dp-attn : true
924- additional-settings :
925- - " DECODE_NODES=2"
926-
927- # Mid curve (1 prefill worker at DEP4 and 1 decode workers at DEP48)
928- - spec-decoding : " none"
929- conc-list : [ 512, 1024, 2048, 4096, 8192 ]
930- prefill :
931- num-worker : 1
932- tp : 1
933- ep : 1
934- dp-attn : true
935- additional-settings :
936- - " PREFILL_NODES=1"
937- - " N_ADDITIONAL_FRONTENDS=8"
938- decode :
939- num-worker : 2
940- tp : 1
941- ep : 1
942- dp-attn : true
943- additional-settings :
944- - " DECODE_NODES=12"
945-
946- # Top of curve (1 prefill worker at DEP4 and 1 decode worker at DEP32)
947- - spec-decoding : " none"
948- conc-list : [ 8192, 12000, 15000 ]
949- prefill :
950- num-worker : 1
951- tp : 1
952- ep : 1
953- dp-attn : true
954- additional-settings :
955- - " PREFILL_NODES=1"
956- - " N_ADDITIONAL_FRONTENDS=8"
957- decode :
958- num-worker : 2
959- tp : 1
960- ep : 1
961- dp-attn : true
962- additional-settings :
963- - " DECODE_NODES=8"
893+ dsr1-fp4-gb200-dynamo-sglang :
894+ # TODO: swap
895+ image : nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.5.1-rc0.pre1
896+ # TODO: what is the right name?
897+ model : deepseek-ai/DeepSeek-R1-0528-fp4-v2
898+ model-prefix : dsr1
899+ runner : gb200
900+ precision : fp4
901+ framework : dynamo-sglang
902+ multinode : true
903+ disagg : true
904+ seq-len-configs :
905+ - isl : 1024
906+ osl : 1024
907+ search-space :
908+ # Low latency (1 prefill worker at DEP4 and 2 decode workers at DEP4)
909+ - spec-decoding : " none"
910+ conc-list : [ 4, 8, 32, 64, 128, 112, 128, 256 ]
911+ prefill :
912+ num-worker : 1
913+ tp : 1
914+ ep : 1
915+ dp-attn : true
916+ additional-settings :
917+ - " PREFILL_NODES=1"
918+ - " N_ADDITIONAL_FRONTENDS=8"
919+ decode :
920+ num-worker : 2
921+ tp : 1
922+ ep : 1
923+ dp-attn : true
924+ additional-settings :
925+ - " DECODE_NODES=2"
926+
927+ # Mid curve (1 prefill worker at DEP4 and 1 decode workers at DEP48)
928+ - spec-decoding : " none"
929+ conc-list : [ 512, 1024, 2048, 4096, 8192 ]
930+ prefill :
931+ num-worker : 1
932+ tp : 1
933+ ep : 1
934+ dp-attn : true
935+ additional-settings :
936+ - " PREFILL_NODES=1"
937+ - " N_ADDITIONAL_FRONTENDS=8"
938+ decode :
939+ num-worker : 2
940+ tp : 1
941+ ep : 1
942+ dp-attn : true
943+ additional-settings :
944+ - " DECODE_NODES=12"
945+
946+ # Top of curve (1 prefill worker at DEP4 and 1 decode worker at DEP32)
947+ - spec-decoding : " none"
948+ conc-list : [ 8192, 12000, 15000 ]
949+ prefill :
950+ num-worker : 1
951+ tp : 1
952+ ep : 1
953+ dp-attn : true
954+ additional-settings :
955+ - " PREFILL_NODES=1"
956+ - " N_ADDITIONAL_FRONTENDS=8"
957+ decode :
958+ num-worker : 2
959+ tp : 1
960+ ep : 1
961+ dp-attn : true
962+ additional-settings :
963+ - " DECODE_NODES=8"
0 commit comments