Skip to content

Commit 5e87c8c

Browse files
committed
feat(profile): add Flash DEP MTP3 profile
1 parent e2639e5 commit 5e87c8c

2 files changed

Lines changed: 26 additions & 2 deletions

File tree

.github/configs/nvidia-master.yaml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2056,6 +2056,24 @@ dsv4-flash-fp4-b300-sglang:
20562056
search-space:
20572057
- { tp: 4, ep: 1, conc-start: 64, conc-end: 64 }
20582058

2059+
# Targeted Flash MTP profile: DEP4 at the same 1k1k conc=64 point as the
2060+
# non-MTP Flash profile above. The shared SGLang MTP launcher selects the
2061+
# Flash-only (steps=2, draft-tokens=3) speculative settings for this model.
2062+
dsv4-flash-fp4-b300-sglang-mtp:
2063+
image: lmsysorg/sglang:deepseek-v4-b300@sha256:26e116bd211e300dbb76924d56c5cbe6cc3ee5ee2fe314859cb8774f5bc070f3
2064+
model: deepseek-ai/DeepSeek-V4-Flash
2065+
model-prefix: dsv4
2066+
runner: b300
2067+
precision: fp4
2068+
framework: sglang
2069+
multinode: false
2070+
scenarios:
2071+
fixed-seq-len:
2072+
- isl: 1024
2073+
osl: 1024
2074+
search-space:
2075+
- { tp: 4, ep: 4, dp-attn: true, conc-start: 64, conc-end: 64, spec-decoding: mtp }
2076+
20592077
# DeepSeek-V4-Pro on B300 with EAGLE/MTP speculative decoding. Recipe is
20602078
# selected inside benchmarks/single_node/dsv4_fp4_b300_sglang_mtp.sh by
20612079
# DP_ATTENTION:

benchmarks/single_node/dsv4_fp4_b300_sglang_mtp.sh

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,11 +77,17 @@ if [ "${DP_ATTENTION}" = "true" ]; then
7777
export SGLANG_OPT_DEEPGEMM_MEGA_MOE_NUM_MAX_TOKENS_PER_RANK=4096
7878
export SGLANG_OPT_FIX_NEXTN_MEGA_MOE=1
7979
export SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK=0
80+
SPECULATIVE_NUM_STEPS=1
81+
SPECULATIVE_NUM_DRAFT_TOKENS=2
82+
if [[ "$MODEL" == "deepseek-ai/DeepSeek-V4-Flash" ]]; then
83+
SPECULATIVE_NUM_STEPS=2
84+
SPECULATIVE_NUM_DRAFT_TOKENS=3
85+
fi
8086
SPEC_FLAGS=(
8187
--speculative-algorithm EAGLE
82-
--speculative-num-steps 1
88+
--speculative-num-steps "$SPECULATIVE_NUM_STEPS"
8389
--speculative-eagle-topk 1
84-
--speculative-num-draft-tokens 2
90+
--speculative-num-draft-tokens "$SPECULATIVE_NUM_DRAFT_TOKENS"
8591
)
8692
PARALLEL_ARGS=(
8793
--dp-size "$TP"

0 commit comments

Comments
 (0)