@@ -8241,86 +8241,97 @@ dsv4-fp4-gb200-dynamo-sglang-mtp3:
82418241 - isl : 8192
82428242 osl : 256
82438243 search-space :
8244- # ===== DISAGG ENTRIES TEMPORARILY DISABLED =====
8245- # All 7 produce 0 output tokens due to dynamo multi-node prefill
8246- # registration bug on GB200. Restore by uncommenting once dynamo
8247- # is fixed upstream. Agg sweep below stays active.
8248- # # GB200 extrapolation of the GB300 SGLang MTP3 run 25761192586.
8249- # # TP4/DEP4 points are widened to TP8/DEP8 for GB200.
8250- # - spec-decoding: mtp
8251- # conc-list: [512]
8252- # prefill:
8253- # num-worker: 1
8254- # tp: 8
8255- # ep: 1
8256- # dp-attn: false
8257- # additional-settings:
8258- # - "CONFIG_FILE=recipes/sglang/deepseek-v4/8k1k/disagg-gb200-low-latency-1p1d-tp8-tp8-mtp.yaml"
8259- # decode:
8260- # num-worker: 1
8261- # tp: 8
8262- # ep: 1
8263- # dp-attn: false
8264- # - spec-decoding: mtp
8265- # conc-list: [2048]
8266- # prefill:
8267- # num-worker: 1
8268- # tp: 8
8269- # ep: 8
8270- # dp-attn: true
8271- # additional-settings:
8272- # - "CONFIG_FILE=recipes/sglang/deepseek-v4/8k1k/disagg-gb200-low-latency-1p6d-dep8-tp8-mtp.yaml"
8273- # decode:
8274- # num-worker: 6
8275- # tp: 8
8276- # ep: 1
8277- # dp-attn: false
8278- # - spec-decoding: mtp
8279- # conc-list: [4096]
8280- # prefill:
8281- # num-worker: 1
8282- # tp: 8
8283- # ep: 8
8284- # dp-attn: true
8285- # additional-settings:
8286- # - "CONFIG_FILE=recipes/sglang/deepseek-v4/8k1k/disagg-gb200-mid-curve-1p1d-dep8-dep8-mtp.yaml"
8287- # decode:
8288- # num-worker: 1
8289- # tp: 8
8290- # ep: 8
8291- # dp-attn: true
8292- # - spec-decoding: mtp
8293- # conc-list: [4096]
8294- # prefill:
8295- # num-worker: 1
8296- # tp: 8
8297- # ep: 8
8298- # dp-attn: true
8299- # additional-settings:
8300- # - "CONFIG_FILE=recipes/sglang/deepseek-v4/8k1k/disagg-gb200-mid-curve-1p1d-dep8-dep16-mtp.yaml"
8301- # decode:
8302- # num-worker: 1
8303- # tp: 16
8304- # ep: 16
8305- # dp-attn: true
8306- # - spec-decoding: mtp
8307- # conc-list: [12288]
8308- # prefill:
8309- # num-worker: 5
8310- # tp: 8
8311- # ep: 8
8312- # dp-attn: true
8313- # additional-settings:
8314- # - "CONFIG_FILE=recipes/sglang/deepseek-v4/8k1k/disagg-gb200-mid-curve-5p1d-dep8-dep8-mtp-c12288.yaml"
8315- # decode:
8316- # num-worker: 1
8317- # tp: 8
8318- # ep: 8
8319- # dp-attn: true
8320- # Single conc-16384 disagg entry re-enabled to validate the
8321- # bootstrap-port pin (fix 1 in recipes) + srt-slurm main branch
8322- # switch (fix 2 in launch_gb200-nv.sh). Other disagg entries stay
8323- # commented until this one validates.
8244+ # GB200 extrapolation of the GB300 SGLang MTP3 run 25761192586.
8245+ # TP4/DEP4 points are widened to TP8/DEP8 for GB200. All 7 disagg
8246+ # entries validated by run 25843677684 after the recipe-level
8247+ # `disaggregation-bootstrap-port: 30001` pin landed (previously
8248+ # produced 0 output tokens because srt-slurm sa-submission-q2-2026
8249+ # omits the bootstrap-port arg and each prefill node picked its own
8250+ # random port).
8251+ - spec-decoding : mtp
8252+ conc-list : [512]
8253+ prefill :
8254+ num-worker : 1
8255+ tp : 8
8256+ ep : 1
8257+ dp-attn : false
8258+ additional-settings :
8259+ - " CONFIG_FILE=recipes/sglang/deepseek-v4/8k1k/disagg-gb200-low-latency-1p1d-tp8-tp8-mtp.yaml"
8260+ decode :
8261+ num-worker : 1
8262+ tp : 8
8263+ ep : 1
8264+ dp-attn : false
8265+ - spec-decoding : mtp
8266+ conc-list : [2048]
8267+ prefill :
8268+ num-worker : 1
8269+ tp : 8
8270+ ep : 8
8271+ dp-attn : true
8272+ additional-settings :
8273+ - " CONFIG_FILE=recipes/sglang/deepseek-v4/8k1k/disagg-gb200-low-latency-1p6d-dep8-tp8-mtp.yaml"
8274+ decode :
8275+ num-worker : 6
8276+ tp : 8
8277+ ep : 1
8278+ dp-attn : false
8279+ - spec-decoding : mtp
8280+ conc-list : [4096]
8281+ prefill :
8282+ num-worker : 1
8283+ tp : 8
8284+ ep : 8
8285+ dp-attn : true
8286+ additional-settings :
8287+ - " CONFIG_FILE=recipes/sglang/deepseek-v4/8k1k/disagg-gb200-mid-curve-1p1d-dep8-dep8-mtp.yaml"
8288+ decode :
8289+ num-worker : 1
8290+ tp : 8
8291+ ep : 8
8292+ dp-attn : true
8293+ - spec-decoding : mtp
8294+ conc-list : [4096]
8295+ prefill :
8296+ num-worker : 1
8297+ tp : 8
8298+ ep : 8
8299+ dp-attn : true
8300+ additional-settings :
8301+ - " CONFIG_FILE=recipes/sglang/deepseek-v4/8k1k/disagg-gb200-mid-curve-1p1d-dep8-dep16-mtp.yaml"
8302+ decode :
8303+ num-worker : 1
8304+ tp : 16
8305+ ep : 16
8306+ dp-attn : true
8307+ - spec-decoding : mtp
8308+ conc-list : [8192]
8309+ prefill :
8310+ num-worker : 4
8311+ tp : 8
8312+ ep : 8
8313+ dp-attn : true
8314+ additional-settings :
8315+ - " CONFIG_FILE=recipes/sglang/deepseek-v4/8k1k/disagg-gb200-mid-curve-4p1d-dep8-dep8-mtp-c8192.yaml"
8316+ decode :
8317+ num-worker : 1
8318+ tp : 8
8319+ ep : 8
8320+ dp-attn : true
8321+ - spec-decoding : mtp
8322+ conc-list : [12288]
8323+ prefill :
8324+ num-worker : 5
8325+ tp : 8
8326+ ep : 8
8327+ dp-attn : true
8328+ additional-settings :
8329+ - " CONFIG_FILE=recipes/sglang/deepseek-v4/8k1k/disagg-gb200-mid-curve-5p1d-dep8-dep8-mtp-c12288.yaml"
8330+ decode :
8331+ num-worker : 1
8332+ tp : 8
8333+ ep : 8
8334+ dp-attn : true
83248335 - spec-decoding : mtp
83258336 conc-list : [16384]
83268337 prefill :
@@ -8335,112 +8346,96 @@ dsv4-fp4-gb200-dynamo-sglang-mtp3:
83358346 tp : 8
83368347 ep : 8
83378348 dp-attn : true
8338- # - spec-decoding: mtp
8339- # conc-list: [8192]
8340- # prefill:
8341- # num-worker: 4
8342- # tp: 8
8343- # ep: 8
8344- # dp-attn: true
8345- # additional-settings:
8346- # - "CONFIG_FILE=recipes/sglang/deepseek-v4/8k1k/disagg-gb200-mid-curve-4p1d-dep8-dep8-mtp-c8192.yaml"
8347- # decode:
8348- # num-worker: 1
8349- # tp: 8
8350- # ep: 8
8351- # dp-attn: true
8352- # ===== END DISABLED BLOCK =====
8353-
8354- # # Aggregated fallback sweep: single agg worker, TP=8 across 2 nodes,
8355- # # DP attention + EAGLE MTP. Sidesteps the dynamo multi-node prefill
8356- # # registration bug that makes the disagg entries above produce 0
8357- # # output tokens on GB200 (see runs 25785003012, 25812320128). decode
8358- # # num-worker=0 signals aggregated. High-conc points queue server-side
8359- # # since there's only one worker pool.
8360- # - spec-decoding: mtp
8361- # conc-list: [512]
8362- # prefill:
8363- # num-worker: 1
8364- # tp: 8
8365- # ep: 8
8366- # dp-attn: true
8367- # additional-settings:
8368- # - "CONFIG_FILE=recipes/sglang/deepseek-v4/8k1k/agg-gb200-tp8-dep8-mtp-c512.yaml"
8369- # decode:
8370- # num-worker: 0
8371- # tp: 8
8372- # ep: 8
8373- # dp-attn: true
8374- # - spec-decoding: mtp
8375- # conc-list: [2048]
8376- # prefill:
8377- # num-worker: 1
8378- # tp: 8
8379- # ep: 8
8380- # dp-attn: true
8381- # additional-settings:
8382- # - "CONFIG_FILE=recipes/sglang/deepseek-v4/8k1k/agg-gb200-tp8-dep8-mtp-c2048.yaml"
8383- # decode:
8384- # num-worker: 0
8385- # tp: 8
8386- # ep: 8
8387- # dp-attn: true
8388- # - spec-decoding: mtp
8389- # conc-list: [4096]
8390- # prefill:
8391- # num-worker: 1
8392- # tp: 8
8393- # ep: 8
8394- # dp-attn: true
8395- # additional-settings:
8396- # - "CONFIG_FILE=recipes/sglang/deepseek-v4/8k1k/agg-gb200-tp8-dep8-mtp-c4096.yaml"
8397- # decode:
8398- # num-worker: 0
8399- # tp: 8
8400- # ep: 8
8401- # dp-attn: true
8402- # - spec-decoding: mtp
8403- # conc-list: [8192]
8404- # prefill:
8405- # num-worker: 1
8406- # tp: 8
8407- # ep: 8
8408- # dp-attn: true
8409- # additional-settings:
8410- # - "CONFIG_FILE=recipes/sglang/deepseek-v4/8k1k/agg-gb200-tp8-dep8-mtp-c8192.yaml"
8411- # decode:
8412- # num-worker: 0
8413- # tp: 8
8414- # ep: 8
8415- # dp-attn: true
8416- # - spec-decoding: mtp
8417- # conc-list: [12288]
8418- # prefill:
8419- # num-worker: 1
8420- # tp: 8
8421- # ep: 8
8422- # dp-attn: true
8423- # additional-settings:
8424- # - "CONFIG_FILE=recipes/sglang/deepseek-v4/8k1k/agg-gb200-tp8-dep8-mtp-c12288.yaml"
8425- # decode:
8426- # num-worker: 0
8427- # tp: 8
8428- # ep: 8
8429- # dp-attn: true
8430- # - spec-decoding: mtp
8431- # conc-list: [16384]
8432- # prefill:
8433- # num-worker: 1
8434- # tp: 8
8435- # ep: 8
8436- # dp-attn: true
8437- # additional-settings:
8438- # - "CONFIG_FILE=recipes/sglang/deepseek-v4/8k1k/agg-gb200-tp8-dep8-mtp-c16384.yaml"
8439- # decode:
8440- # num-worker: 0
8441- # tp: 8
8442- # ep: 8
8443- # dp-attn: true
8349+
8350+ # Aggregated comparison sweep: single agg worker, TP=8 across 2
8351+ # nodes, DP attention + EAGLE MTP. Pairs with the disagg curve
8352+ # above to show the disagg-vs-agg delta at matching concurrencies.
8353+ # decode num-worker=0 signals aggregated. High-conc points queue
8354+ # server-side since there's only one worker pool.
8355+ - spec-decoding : mtp
8356+ conc-list : [512]
8357+ prefill :
8358+ num-worker : 1
8359+ tp : 8
8360+ ep : 8
8361+ dp-attn : true
8362+ additional-settings :
8363+ - " CONFIG_FILE=recipes/sglang/deepseek-v4/8k1k/agg-gb200-tp8-dep8-mtp-c512.yaml"
8364+ decode :
8365+ num-worker : 0
8366+ tp : 8
8367+ ep : 8
8368+ dp-attn : true
8369+ - spec-decoding : mtp
8370+ conc-list : [2048]
8371+ prefill :
8372+ num-worker : 1
8373+ tp : 8
8374+ ep : 8
8375+ dp-attn : true
8376+ additional-settings :
8377+ - " CONFIG_FILE=recipes/sglang/deepseek-v4/8k1k/agg-gb200-tp8-dep8-mtp-c2048.yaml"
8378+ decode :
8379+ num-worker : 0
8380+ tp : 8
8381+ ep : 8
8382+ dp-attn : true
8383+ - spec-decoding : mtp
8384+ conc-list : [4096]
8385+ prefill :
8386+ num-worker : 1
8387+ tp : 8
8388+ ep : 8
8389+ dp-attn : true
8390+ additional-settings :
8391+ - " CONFIG_FILE=recipes/sglang/deepseek-v4/8k1k/agg-gb200-tp8-dep8-mtp-c4096.yaml"
8392+ decode :
8393+ num-worker : 0
8394+ tp : 8
8395+ ep : 8
8396+ dp-attn : true
8397+ - spec-decoding : mtp
8398+ conc-list : [8192]
8399+ prefill :
8400+ num-worker : 1
8401+ tp : 8
8402+ ep : 8
8403+ dp-attn : true
8404+ additional-settings :
8405+ - " CONFIG_FILE=recipes/sglang/deepseek-v4/8k1k/agg-gb200-tp8-dep8-mtp-c8192.yaml"
8406+ decode :
8407+ num-worker : 0
8408+ tp : 8
8409+ ep : 8
8410+ dp-attn : true
8411+ - spec-decoding : mtp
8412+ conc-list : [12288]
8413+ prefill :
8414+ num-worker : 1
8415+ tp : 8
8416+ ep : 8
8417+ dp-attn : true
8418+ additional-settings :
8419+ - " CONFIG_FILE=recipes/sglang/deepseek-v4/8k1k/agg-gb200-tp8-dep8-mtp-c12288.yaml"
8420+ decode :
8421+ num-worker : 0
8422+ tp : 8
8423+ ep : 8
8424+ dp-attn : true
8425+ - spec-decoding : mtp
8426+ conc-list : [16384]
8427+ prefill :
8428+ num-worker : 1
8429+ tp : 8
8430+ ep : 8
8431+ dp-attn : true
8432+ additional-settings :
8433+ - " CONFIG_FILE=recipes/sglang/deepseek-v4/8k1k/agg-gb200-tp8-dep8-mtp-c16384.yaml"
8434+ decode :
8435+ num-worker : 0
8436+ tp : 8
8437+ ep : 8
8438+ dp-attn : true
84448439
84458440# MTP variant of dsv4-fp4-gb200-dynamo-vllm. Uses the vLLM 0.20.1 image
84468441# and hand-picked 8k/1k Pareto points mirrored from NVIDIA/srt-slurm.
0 commit comments