@@ -33,7 +33,7 @@ name: "disagg-gb300-12p1d-dep4-dep12-15-c21504"
3333
3434model :
3535 path : " deepseek-v4-pro"
36- container : " lmsysorg/sglang:nightly-dev-cu13-20260519-dbac4647 "
36+ container : " lmsysorg/sglang-staging:deepseek-v4-grace-blackwell-dev "
3737 precision : " fp4"
3838
3939dynamo :
@@ -74,14 +74,24 @@ backend:
7474
7575 prefill_environment :
7676 PYTHONUNBUFFERED : " 1"
77- SGLANG_RADIX_FORCE_MISS : " 1"
77+ SGLANG_RADIX_DISABLE_REUSE : " 1"
7878 SGLANG_JIT_DEEPGEMM_FAST_WARMUP : " 1"
79- SGLANG_DEFAULT_THINKING : " 1"
80- SGLANG_DSV4_REASONING_EFFORT : " max"
79+ SGLANG_ENABLE_THINKING : " 1"
80+ SGLANG_REASONING_EFFORT : " max"
8181 SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT : " 1"
8282 SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN : " 1"
83+ SGLANG_OPT_USE_JIT_NORM : " 1"
84+ SGLANG_OPT_USE_JIT_INDEXER_METADATA : " 1"
85+ SGLANG_OPT_USE_TOPK_V2 : " 1"
86+ SGLANG_OPT_USE_CUSTOM_ALL_REDUCE_V2 : " 1"
87+ SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE : " 1"
88+ SGLANG_OPT_FIX_HASH_MEGA_MOE : " 1"
89+ SGLANG_OPT_USE_FAST_MASK_EP : " 1"
90+ SGLANG_OPT_FIX_MEGA_MOE_MEMORY : " 1"
8391 SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK : " 8192"
92+ SGLANG_OPT_FIX_NEXTN_MEGA_MOE : " 1"
8493 SGLANG_OPT_USE_ONLINE_COMPRESS : " 1"
94+ SGLANG_OPT_FP8_WO_A_GEMM : " 1"
8595 SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK : " 0"
8696 NCCL_MNNVL_ENABLE : " 1"
8797 NCCL_CUMEM_ENABLE : " 1"
@@ -94,17 +104,24 @@ backend:
94104 SGLANG_LOG_FORWARD_ITERS : " 1"
95105 SGLANG_LOG_MS : " 1"
96106 SGLANG_REQUEST_STATE_WAIT_TIMEOUT : " 60"
97- SGLANG_OPT_FP8_WO_A_GEMM : " 0"
98107
99108 decode_environment :
100109 PYTHONUNBUFFERED : " 1"
101- SGLANG_RADIX_FORCE_MISS : " 1"
110+ SGLANG_RADIX_DISABLE_REUSE : " 1"
102111 SGLANG_JIT_DEEPGEMM_FAST_WARMUP : " 1"
103- SGLANG_DEFAULT_THINKING : " 1"
104- SGLANG_DSV4_REASONING_EFFORT : " max"
112+ SGLANG_ENABLE_THINKING : " 1"
113+ SGLANG_REASONING_EFFORT : " max"
105114 SGLANG_OPT_SWA_SPLIT_LEAF_ON_INSERT : " 1"
106115 SGLANG_OPT_SWA_EVICT_DROP_PAGE_MARGIN : " 1"
116+ SGLANG_OPT_USE_JIT_NORM : " 1"
117+ SGLANG_OPT_USE_JIT_INDEXER_METADATA : " 1"
118+ SGLANG_OPT_USE_TOPK_V2 : " 1"
119+ SGLANG_OPT_USE_DEEPGEMM_MEGA_MOE : " 1"
120+ SGLANG_OPT_FIX_HASH_MEGA_MOE : " 1"
121+ SGLANG_OPT_USE_FAST_MASK_EP : " 1"
122+ SGLANG_OPT_FIX_MEGA_MOE_MEMORY : " 1"
107123 SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK : " 1280"
124+ SGLANG_OPT_FIX_NEXTN_MEGA_MOE : " 1"
108125 SGLANG_OPT_USE_ONLINE_COMPRESS : " 1"
109126 SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK : " 0"
110127 NCCL_MNNVL_ENABLE : " 1"
@@ -119,7 +136,7 @@ backend:
119136 SGLANG_LOG_FORWARD_ITERS : " 1"
120137 SGLANG_LOG_MS : " 1"
121138 SGLANG_REQUEST_STATE_WAIT_TIMEOUT : " 60"
122- SGLANG_OPT_FP8_WO_A_GEMM : " 0 "
139+ # SGLANG_OPT_USE_CUSTOM_ALL_REDUCE_V2 intentionally NOT set: CAR_V2
123140 # is single-node only and corrupts results in 2-node decode setups.
124141
125142 sglang_config :
@@ -135,7 +152,7 @@ backend:
135152 expert-parallel-size : 4
136153
137154 enable-dp-attention : true
138- moe-a2a-backend : " megamoe "
155+ moe-a2a-backend : " deepep "
139156 deepep-config : ' {"normal_dispatch":{"num_sms":88,"num_max_nvl_chunked_send_tokens":28,"num_max_nvl_chunked_recv_tokens":512},"normal_combine": {"num_sms":88,"num_max_nvl_chunked_send_tokens":16,"num_max_nvl_chunked_recv_tokens":512}}'
140157 moe-dense-tp-size : 1
141158
@@ -154,7 +171,7 @@ backend:
154171 stream-interval : 60
155172
156173 load-balance-method : " total_requests"
157- moe-a2a-backend : " megamoe "
174+ moe-a2a-backend : " deepep "
158175
159176 disaggregation-mode : " decode"
160177 disaggregation-transfer-backend : mooncake
@@ -179,4 +196,3 @@ benchmark:
179196 concurrencies : " 21504"
180197 req_rate : " inf"
181198 use_chat_template : false
182- custom_tokenizer : " sa_bench_tokenizers.sglang_deepseek_v4.SGLangDeepseekV4Tokenizer"
0 commit comments