@@ -2390,6 +2390,306 @@ glm5-fp4-b300-sglang-mtp:
23902390 - { tp: 8, ep: 1, conc-start: 4, conc-end: 4, spec-decoding: mtp }
23912391 - { tp: 4, ep: 1, conc-start: 4, conc-end: 256, spec-decoding: mtp }
23922392
2393+ glm5-fp4-gb300-dynamo-trt :
2394+ image : nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:1.3.0-dev.1-cuda13
2395+ model : nvidia/GLM-5-NVFP4
2396+ model-prefix : glm5
2397+ runner : gb300-nv
2398+ precision : fp4
2399+ framework : dynamo-trt
2400+ multinode : true
2401+ disagg : true
2402+ scenarios :
2403+ fixed-seq-len :
2404+ - isl : 1024
2405+ osl : 1024
2406+ search-space :
2407+ # STP configurations
2408+ - conc-list : [ 4 ]
2409+ prefill :
2410+ num-worker : 1
2411+ tp : 2
2412+ ep : 2
2413+ dp-attn : true
2414+ additional-settings :
2415+ # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/GLM5/disagg/trtllm_dynamo/gb300_nvfp4/ISL1K_OSL1K/STP/ctx1dep2_gen4tep8_batch1_eplb0_mtp0.yaml
2416+ - " CONFIG_FILE=recipes/GLM5/disagg/trtllm_dynamo/gb300_nvfp4/ISL1K_OSL1K/STP/ctx1dep2_gen4tep8_batch1_eplb0_mtp0.yaml"
2417+ decode :
2418+ num-worker : 4
2419+ tp : 8
2420+ ep : 8
2421+ dp-attn : false
2422+ - conc-list : [ 5 ]
2423+ prefill :
2424+ num-worker : 1
2425+ tp : 2
2426+ ep : 2
2427+ dp-attn : true
2428+ additional-settings :
2429+ # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/GLM5/disagg/trtllm_dynamo/gb300_nvfp4/ISL1K_OSL1K/STP/ctx1dep2_gen5tep4_batch1_eplb0_mtp0.yaml
2430+ - " CONFIG_FILE=recipes/GLM5/disagg/trtllm_dynamo/gb300_nvfp4/ISL1K_OSL1K/STP/ctx1dep2_gen5tep4_batch1_eplb0_mtp0.yaml"
2431+ decode :
2432+ num-worker : 5
2433+ tp : 4
2434+ ep : 4
2435+ dp-attn : false
2436+ - conc-list : [ 24 ]
2437+ prefill :
2438+ num-worker : 1
2439+ tp : 2
2440+ ep : 2
2441+ dp-attn : true
2442+ additional-settings :
2443+ # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/GLM5/disagg/trtllm_dynamo/gb300_nvfp4/ISL1K_OSL1K/STP/ctx1dep2_gen4tep8_batch4_eplb0_mtp0.yaml
2444+ - " CONFIG_FILE=recipes/GLM5/disagg/trtllm_dynamo/gb300_nvfp4/ISL1K_OSL1K/STP/ctx1dep2_gen4tep8_batch4_eplb0_mtp0.yaml"
2445+ decode :
2446+ num-worker : 4
2447+ tp : 8
2448+ ep : 8
2449+ dp-attn : false
2450+ - conc-list : [ 92 ]
2451+ prefill :
2452+ num-worker : 1
2453+ tp : 2
2454+ ep : 2
2455+ dp-attn : true
2456+ additional-settings :
2457+ # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/GLM5/disagg/trtllm_dynamo/gb300_nvfp4/ISL1K_OSL1K/STP/ctx1dep2_gen4tep8_batch16_eplb0_mtp0.yaml
2458+ - " CONFIG_FILE=recipes/GLM5/disagg/trtllm_dynamo/gb300_nvfp4/ISL1K_OSL1K/STP/ctx1dep2_gen4tep8_batch16_eplb0_mtp0.yaml"
2459+ decode :
2460+ num-worker : 4
2461+ tp : 8
2462+ ep : 8
2463+ dp-attn : false
2464+ - conc-list : [ 105 ]
2465+ prefill :
2466+ num-worker : 1
2467+ tp : 2
2468+ ep : 2
2469+ dp-attn : true
2470+ additional-settings :
2471+ # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/GLM5/disagg/trtllm_dynamo/gb300_nvfp4/ISL1K_OSL1K/STP/ctx1dep2_gen5tep4_batch16_eplb0_mtp0.yaml
2472+ - " CONFIG_FILE=recipes/GLM5/disagg/trtllm_dynamo/gb300_nvfp4/ISL1K_OSL1K/STP/ctx1dep2_gen5tep4_batch16_eplb0_mtp0.yaml"
2473+ decode :
2474+ num-worker : 5
2475+ tp : 4
2476+ ep : 4
2477+ dp-attn : false
2478+ - conc-list : [ 336 ]
2479+ prefill :
2480+ num-worker : 1
2481+ tp : 2
2482+ ep : 2
2483+ dp-attn : true
2484+ additional-settings :
2485+ # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/GLM5/disagg/trtllm_dynamo/gb300_nvfp4/ISL1K_OSL1K/STP/ctx1dep2_gen4tep8_batch64_eplb0_mtp0.yaml
2486+ - " CONFIG_FILE=recipes/GLM5/disagg/trtllm_dynamo/gb300_nvfp4/ISL1K_OSL1K/STP/ctx1dep2_gen4tep8_batch64_eplb0_mtp0.yaml"
2487+ decode :
2488+ num-worker : 4
2489+ tp : 8
2490+ ep : 8
2491+ dp-attn : false
2492+ - conc-list : [ 666 ]
2493+ prefill :
2494+ num-worker : 1
2495+ tp : 2
2496+ ep : 2
2497+ dp-attn : true
2498+ additional-settings :
2499+ # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/GLM5/disagg/trtllm_dynamo/gb300_nvfp4/ISL1K_OSL1K/STP/ctx1dep2_gen1dep32_batch16_eplb0_mtp0.yaml
2500+ - " CONFIG_FILE=recipes/GLM5/disagg/trtllm_dynamo/gb300_nvfp4/ISL1K_OSL1K/STP/ctx1dep2_gen1dep32_batch16_eplb0_mtp0.yaml"
2501+ decode :
2502+ num-worker : 1
2503+ tp : 32
2504+ ep : 32
2505+ dp-attn : true
2506+ - conc-list : [ 2253 ]
2507+ prefill :
2508+ num-worker : 2
2509+ tp : 2
2510+ ep : 2
2511+ dp-attn : true
2512+ additional-settings :
2513+ # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/GLM5/disagg/trtllm_dynamo/gb300_nvfp4/ISL1K_OSL1K/STP/ctx2dep2_gen1dep16_batch128_eplb0_mtp0.yaml
2514+ - " CONFIG_FILE=recipes/GLM5/disagg/trtllm_dynamo/gb300_nvfp4/ISL1K_OSL1K/STP/ctx2dep2_gen1dep16_batch128_eplb0_mtp0.yaml"
2515+ decode :
2516+ num-worker : 1
2517+ tp : 16
2518+ ep : 16
2519+ dp-attn : true
2520+ - conc-list : [ 2253 ]
2521+ prefill :
2522+ num-worker : 3
2523+ tp : 2
2524+ ep : 2
2525+ dp-attn : true
2526+ additional-settings :
2527+ # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/GLM5/disagg/trtllm_dynamo/gb300_nvfp4/ISL1K_OSL1K/STP/ctx3dep2_gen1dep32_batch64_eplb0_mtp0.yaml
2528+ - " CONFIG_FILE=recipes/GLM5/disagg/trtllm_dynamo/gb300_nvfp4/ISL1K_OSL1K/STP/ctx3dep2_gen1dep32_batch64_eplb0_mtp0.yaml"
2529+ decode :
2530+ num-worker : 1
2531+ tp : 32
2532+ ep : 32
2533+ dp-attn : true
2534+ - conc-list : [ 4301 ]
2535+ prefill :
2536+ num-worker : 3
2537+ tp : 2
2538+ ep : 2
2539+ dp-attn : true
2540+ additional-settings :
2541+ # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/GLM5/disagg/trtllm_dynamo/gb300_nvfp4/ISL1K_OSL1K/STP/ctx3dep2_gen1dep16_batch256_eplb0_mtp0.yaml
2542+ - " CONFIG_FILE=recipes/GLM5/disagg/trtllm_dynamo/gb300_nvfp4/ISL1K_OSL1K/STP/ctx3dep2_gen1dep16_batch256_eplb0_mtp0.yaml"
2543+ decode :
2544+ num-worker : 1
2545+ tp : 16
2546+ ep : 16
2547+ dp-attn : true
2548+ - conc-list : [ 8192 ]
2549+ prefill :
2550+ num-worker : 4
2551+ tp : 2
2552+ ep : 2
2553+ dp-attn : true
2554+ additional-settings :
2555+ # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/GLM5/disagg/trtllm_dynamo/gb300_nvfp4/ISL1K_OSL1K/STP/ctx4dep2_gen1dep16_batch512_eplb256_mtp0.yaml
2556+ - " CONFIG_FILE=recipes/GLM5/disagg/trtllm_dynamo/gb300_nvfp4/ISL1K_OSL1K/STP/ctx4dep2_gen1dep16_batch512_eplb256_mtp0.yaml"
2557+ decode :
2558+ num-worker : 1
2559+ tp : 16
2560+ ep : 16
2561+ dp-attn : true
2562+ - isl : 8192
2563+ osl : 1024
2564+ search-space :
2565+ # STP configurations
2566+ - conc-list : [ 10 ]
2567+ prefill :
2568+ num-worker : 1
2569+ tp : 2
2570+ ep : 2
2571+ dp-attn : true
2572+ additional-settings :
2573+ # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/GLM5/disagg/trtllm_dynamo/gb300_nvfp4/ISL8K_OSL1K/STP/ctx1dep2_gen5tep4_batch2_eplb0_mtp0.yaml
2574+ - " CONFIG_FILE=recipes/GLM5/disagg/trtllm_dynamo/gb300_nvfp4/ISL8K_OSL1K/STP/ctx1dep2_gen5tep4_batch2_eplb0_mtp0.yaml"
2575+ decode :
2576+ num-worker : 5
2577+ tp : 4
2578+ ep : 4
2579+ dp-attn : false
2580+ - conc-list : [ 25 ]
2581+ prefill :
2582+ num-worker : 1
2583+ tp : 2
2584+ ep : 2
2585+ dp-attn : true
2586+ additional-settings :
2587+ # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/GLM5/disagg/trtllm_dynamo/gb300_nvfp4/ISL8K_OSL1K/STP/ctx1dep2_gen5tep4_batch4_eplb0_mtp0.yaml
2588+ - " CONFIG_FILE=recipes/GLM5/disagg/trtllm_dynamo/gb300_nvfp4/ISL8K_OSL1K/STP/ctx1dep2_gen5tep4_batch4_eplb0_mtp0.yaml"
2589+ decode :
2590+ num-worker : 5
2591+ tp : 4
2592+ ep : 4
2593+ dp-attn : false
2594+ - conc-list : [ 50 ]
2595+ prefill :
2596+ num-worker : 1
2597+ tp : 2
2598+ ep : 2
2599+ dp-attn : true
2600+ additional-settings :
2601+ # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/GLM5/disagg/trtllm_dynamo/gb300_nvfp4/ISL8K_OSL1K/STP/ctx1dep2_gen5tep4_batch8_eplb0_mtp0.yaml
2602+ - " CONFIG_FILE=recipes/GLM5/disagg/trtllm_dynamo/gb300_nvfp4/ISL8K_OSL1K/STP/ctx1dep2_gen5tep4_batch8_eplb0_mtp0.yaml"
2603+ decode :
2604+ num-worker : 5
2605+ tp : 4
2606+ ep : 4
2607+ dp-attn : false
2608+ - conc-list : [ 100 ]
2609+ prefill :
2610+ num-worker : 1
2611+ tp : 2
2612+ ep : 2
2613+ dp-attn : true
2614+ additional-settings :
2615+ # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/GLM5/disagg/trtllm_dynamo/gb300_nvfp4/ISL8K_OSL1K/STP/ctx1dep2_gen5tep4_batch16_eplb0_mtp0.yaml
2616+ - " CONFIG_FILE=recipes/GLM5/disagg/trtllm_dynamo/gb300_nvfp4/ISL8K_OSL1K/STP/ctx1dep2_gen5tep4_batch16_eplb0_mtp0.yaml"
2617+ decode :
2618+ num-worker : 5
2619+ tp : 4
2620+ ep : 4
2621+ dp-attn : false
2622+ - conc-list : [ 308 ]
2623+ prefill :
2624+ num-worker : 3
2625+ tp : 2
2626+ ep : 2
2627+ dp-attn : true
2628+ additional-settings :
2629+ # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/GLM5/disagg/trtllm_dynamo/gb300_nvfp4/ISL8K_OSL1K/STP/ctx3dep2_gen1dep32_batch8_eplb0_mtp0.yaml
2630+ - " CONFIG_FILE=recipes/GLM5/disagg/trtllm_dynamo/gb300_nvfp4/ISL8K_OSL1K/STP/ctx3dep2_gen1dep32_batch8_eplb0_mtp0.yaml"
2631+ decode :
2632+ num-worker : 1
2633+ tp : 32
2634+ ep : 32
2635+ dp-attn : true
2636+ - conc-list : [ 615 ]
2637+ prefill :
2638+ num-worker : 6
2639+ tp : 2
2640+ ep : 2
2641+ dp-attn : true
2642+ additional-settings :
2643+ # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/GLM5/disagg/trtllm_dynamo/gb300_nvfp4/ISL8K_OSL1K/STP/ctx6dep2_gen1dep32_batch16_eplb0_mtp0.yaml
2644+ - " CONFIG_FILE=recipes/GLM5/disagg/trtllm_dynamo/gb300_nvfp4/ISL8K_OSL1K/STP/ctx6dep2_gen1dep32_batch16_eplb0_mtp0.yaml"
2645+ decode :
2646+ num-worker : 1
2647+ tp : 32
2648+ ep : 32
2649+ dp-attn : true
2650+ - conc-list : [ 1076 ]
2651+ prefill :
2652+ num-worker : 9
2653+ tp : 2
2654+ ep : 2
2655+ dp-attn : true
2656+ additional-settings :
2657+ # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/GLM5/disagg/trtllm_dynamo/gb300_nvfp4/ISL8K_OSL1K/STP/ctx9dep2_gen1dep16_batch64_eplb0_mtp0.yaml
2658+ - " CONFIG_FILE=recipes/GLM5/disagg/trtllm_dynamo/gb300_nvfp4/ISL8K_OSL1K/STP/ctx9dep2_gen1dep16_batch64_eplb0_mtp0.yaml"
2659+ decode :
2660+ num-worker : 1
2661+ tp : 16
2662+ ep : 16
2663+ dp-attn : true
2664+ - conc-list : [ 1229 ]
2665+ prefill :
2666+ num-worker : 11
2667+ tp : 2
2668+ ep : 2
2669+ dp-attn : true
2670+ additional-settings :
2671+ # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/GLM5/disagg/trtllm_dynamo/gb300_nvfp4/ISL8K_OSL1K/STP/ctx11dep2_gen1dep32_batch32_eplb0_mtp0.yaml
2672+ - " CONFIG_FILE=recipes/GLM5/disagg/trtllm_dynamo/gb300_nvfp4/ISL8K_OSL1K/STP/ctx11dep2_gen1dep32_batch32_eplb0_mtp0.yaml"
2673+ decode :
2674+ num-worker : 1
2675+ tp : 32
2676+ ep : 32
2677+ dp-attn : true
2678+ - conc-list : [ 2151 ]
2679+ prefill :
2680+ num-worker : 15
2681+ tp : 2
2682+ ep : 2
2683+ dp-attn : true
2684+ additional-settings :
2685+ # https://github.com/NVIDIA/srt-slurm/blob/sa-submission-q2-2026/recipes/GLM5/disagg/trtllm_dynamo/gb300_nvfp4/ISL8K_OSL1K/STP/ctx15dep2_gen1dep16_batch128_eplb0_mtp0.yaml
2686+ - " CONFIG_FILE=recipes/GLM5/disagg/trtllm_dynamo/gb300_nvfp4/ISL8K_OSL1K/STP/ctx15dep2_gen1dep16_batch128_eplb0_mtp0.yaml"
2687+ decode :
2688+ num-worker : 1
2689+ tp : 16
2690+ ep : 16
2691+ dp-attn : true
2692+
23932693qwen3.5-fp8-b200-sglang-mtp :
23942694 image : lmsysorg/sglang:v0.5.12-cu130
23952695 model : Qwen/Qwen3.5-397B-A17B-FP8
0 commit comments