@@ -96,25 +96,25 @@ def scan_log_dir(log_dir):
9696
9797 for root , dirs , files in os .walk (log_dir ):
9898 for fname in files :
99- if not fname .endswith (' .txt' ):
99+ if not fname .endswith (" .txt" ):
100100 continue
101101 filepath = os .path .join (root , fname )
102102
103103 # 尝试从文件名解析场景信息
104104 # 格式: *_bs<N>_[<quant>_]<fd|sg>.txt
105- m = re .search (r' _bs(\d+)_(?:(fp8|bf16|wint4|wint8)_)?(fd|sg)\.txt$' , fname , re .IGNORECASE )
105+ m = re .search (r" _bs(\d+)_(?:(fp8|bf16|wint4|wint8)_)?(fd|sg)\.txt$" , fname , re .IGNORECASE )
106106 if not m :
107107 # 也尝试无 quant 的模式 (默认 bf16)
108- m = re .search (r' _bs(\d+)_(fd|sg)\.txt$' , fname , re .IGNORECASE )
108+ m = re .search (r" _bs(\d+)_(fd|sg)\.txt$" , fname , re .IGNORECASE )
109109 if m :
110110 bs = m .group (1 )
111- quant = ' bf16'
111+ quant = " bf16"
112112 framework = m .group (2 ).lower ()
113113 else :
114114 continue
115115 else :
116116 bs = m .group (1 )
117- quant = (m .group (2 ) or ' bf16' ).lower ()
117+ quant = (m .group (2 ) or " bf16" ).lower ()
118118 framework = m .group (3 ).lower ()
119119
120120 key = f"{ quant } _bs{ bs } "
@@ -133,49 +133,51 @@ def generate_html(benchmark_data, config):
133133 """生成完整的多模式 HTML 报告"""
134134
135135 # 确定可用的量化方式和并发数
136- quants = sorted (set (k .split ('_bs' )[0 ] for k in benchmark_data .keys ()))
137- bs_values = sorted (set (k .split ('_bs' )[1 ] for k in benchmark_data .keys ()), key = int )
138-
139- model_name = config .get ('model_name' , 'Unknown Model' )
140- default_quant = config .get ('default_quant' , quants [0 ] if quants else 'bf16' )
141- default_bs = config .get ('default_bs' , bs_values [- 1 ] if bs_values else '32' )
142- gpu_type = config .get ('gpu_type' , 'H800' )
143- tp_size = config .get ('tp_size' , 1 )
144- fd_attention = config .get ('fd_attention' , 'MLA_ATTN (FlashAttn v3)' )
145- sg_attention = config .get ('sg_attention' , 'flashmla' )
146- sg_version = config .get ('sg_version' , '0.5.10.post1' )
147- fd_commit_date = config .get ('fd_commit_date' , '' )
148- fd_commit_short = config .get ('fd_commit_short' , '' )
149- fd_commit_full = config .get ('fd_commit_full' , '' )
150- max_model_len = config .get ('max_model_len' , 65536 )
151- dataset_url = config .get ('dataset_url' , '' )
152- dataset_desc = config .get ('dataset_desc' , '' )
153- test_date = config .get ('test_date' , '' )
154- model_type = config .get ('model_type' , '' )
155- model_size = config .get ('model_size' , '' )
156- model_experts = config .get ('model_experts' , '' )
157- model_layers_hidden = config .get ('model_layers_hidden' , '' )
136+ quants = sorted (set (k .split ("_bs" )[0 ] for k in benchmark_data .keys ()))
137+ bs_values = sorted (set (k .split ("_bs" )[1 ] for k in benchmark_data .keys ()), key = int )
138+
139+ model_name = config .get ("model_name" , "Unknown Model" )
140+ default_quant = config .get ("default_quant" , quants [0 ] if quants else "bf16" )
141+ default_bs = config .get ("default_bs" , bs_values [- 1 ] if bs_values else "32" )
142+ gpu_type = config .get ("gpu_type" , "H800" )
143+ tp_size = config .get ("tp_size" , 1 )
144+ dp_size = config .get ("dp_size" , 1 )
145+ ep_size = config .get ("ep_size" , 0 )
146+ fd_attention = config .get ("fd_attention" , "MLA_ATTN (FlashAttn v3)" )
147+ sg_attention = config .get ("sg_attention" , "flashmla" )
148+ sg_version = config .get ("sg_version" , "0.5.10.post1" )
149+ fd_commit_date = config .get ("fd_commit_date" , "" )
150+ fd_commit_short = config .get ("fd_commit_short" , "" )
151+ fd_commit_full = config .get ("fd_commit_full" , "" )
152+ max_model_len = config .get ("max_model_len" , 65536 )
153+ dataset_url = config .get ("dataset_url" , "" )
154+ dataset_desc = config .get ("dataset_desc" , "" )
155+ test_date = config .get ("test_date" , "" )
156+ model_type = config .get ("model_type" , "" )
157+ model_size = config .get ("model_size" , "" )
158+ model_experts = config .get ("model_experts" , "" )
159+ model_layers_hidden = config .get ("model_layers_hidden" , "" )
158160
159161 # 生成量化选择器按钮
160162 def quant_btn_label (q ):
161- if q == ' fp8' :
162- return ' FP8 (Block-Wise)'
163+ if q == " fp8" :
164+ return " FP8 (Block-Wise)"
163165 return q .upper ()
164166
165- quant_buttons = ' \n ' .join (
167+ quant_buttons = " \n " .join (
166168 f' <div class="seg-btn" data-val="{ q } " onclick="setQuant(\' { q } \' )" title="{ "FD: block_wise_fp8 / SG: fp8" if q == "fp8" else "" } ">{ quant_btn_label (q )} </div>'
167169 for q in quants
168170 )
169171
170172 # 生成并发选择器按钮
171- bs_buttons = ' \n ' .join (
173+ bs_buttons = " \n " .join (
172174 f' <div class="seg-btn" data-val="{ bs } " onclick="setBS(\' { bs } \' )">{ bs } </div>'
173175 for bs in bs_values
174176 )
175177
176178 data_json = json .dumps (benchmark_data , ensure_ascii = False )
177179
178- html = f''' <!DOCTYPE html>
180+ html = f""" <!DOCTYPE html>
179181<html lang="zh-CN" data-theme="light">
180182<head>
181183 <meta charset="UTF-8">
@@ -394,8 +396,8 @@ def quant_btn_label(q):
394396 <h1>FastDeploy vs SGLang</h1>
395397 <p class="subtitle">{ model_name } 推理性能基准测试报告</p>
396398 <div class="badge-row">
397- <span class="badge">{ gpu_type } x{ tp_size } </span>
398- <span class="badge">TP={ tp_size } </span>
399+ <span class="badge">{ gpu_type } x{ tp_size * dp_size } </span>
400+ <span class="badge">TP={ tp_size } { f' DP= { dp_size } ' if dp_size > 1 else '' } { f' EP= { ep_size } ' if ep_size > 0 else '' } </span>
399401 <span class="badge" id="badge-quant">{ default_quant .upper ()} </span>
400402 <span class="badge" id="badge-bs">并发 { default_bs } </span>
401403 { f'<span class="badge">{ test_date } </span>' if test_date else '' }
@@ -433,8 +435,8 @@ def quant_btn_label(q):
433435 <div class="config-card fd">
434436 <div class="card-tag">FastDeploy</div>
435437 <div class="config-grid-inner">
436- <div class="config-item"><div class="label">GPU</div><div class="value">{ gpu_type } x{ tp_size } </div></div>
437- <div class="config-item"><div class="label">TP </div><div class="value">{ tp_size } </div></div>
438+ <div class="config-item"><div class="label">GPU</div><div class="value">{ gpu_type } x{ tp_size * dp_size } </div></div>
439+ <div class="config-item"><div class="label">部署方式 </div><div class="value">TP= { tp_size } { f' DP= { dp_size } ' if dp_size > 1 else '' } { ' EP' if ep_size > 0 else '' } </div></div>
438440 <div class="config-item"><div class="label">并发</div><div class="value" id="cfg-fd-bs">{ default_bs } </div></div>
439441 <div class="config-item"><div class="label">Max Len</div><div class="value">{ max_model_len } </div></div>
440442 <div class="config-item"><div class="label">Attention</div><div class="value">{ fd_attention } </div></div>
@@ -446,8 +448,8 @@ def quant_btn_label(q):
446448 <div class="config-card sg">
447449 <div class="card-tag">SGLang</div>
448450 <div class="config-grid-inner">
449- <div class="config-item"><div class="label">GPU</div><div class="value">{ gpu_type } x{ tp_size } </div></div>
450- <div class="config-item"><div class="label">TP </div><div class="value">{ tp_size } </div></div>
451+ <div class="config-item"><div class="label">GPU</div><div class="value">{ gpu_type } x{ tp_size * dp_size } </div></div>
452+ <div class="config-item"><div class="label">部署方式 </div><div class="value">TP= { tp_size } { f' DP= { dp_size } ' if dp_size > 1 else '' } { f' EP= { ep_size } ' if ep_size > 0 else '' } </div></div>
451453 <div class="config-item"><div class="label">并发</div><div class="value" id="cfg-sg-bs">{ default_bs } </div></div>
452454 <div class="config-item"><div class="label">Context Len</div><div class="value">{ max_model_len } </div></div>
453455 <div class="config-item"><div class="label">Attention</div><div class="value">{ sg_attention } </div></div>
@@ -775,7 +777,7 @@ def quant_btn_label(q):
775777updateAll();
776778</script>
777779</body>
778- </html>'''
780+ </html>"""
779781
780782 return html
781783
@@ -798,7 +800,8 @@ def main():
798800 --default-quant bf16 --default-bs 512 \\
799801 --fd-attention "MLA_ATTN (FlashAttn v3)" --sg-attention flashmla \\
800802 --sg-version 0.5.10.post1
801- """ )
803+ """ ,
804+ )
802805
803806 # 数据来源(二选一)
804807 source = parser .add_mutually_exclusive_group (required = True )
@@ -818,6 +821,8 @@ def main():
818821 # 部署配置
819822 parser .add_argument ("--gpu-type" , default = "H800" , help = "GPU 型号" )
820823 parser .add_argument ("--tp" , type = int , default = 1 , help = "TP 大小" )
824+ parser .add_argument ("--dp" , type = int , default = 1 , help = "DP 大小" )
825+ parser .add_argument ("--ep" , type = int , default = 0 , help = "EP 大小 (0=不启用)" )
821826 parser .add_argument ("--max-model-len" , type = int , default = 65536 , help = "最大模型长度" )
822827 parser .add_argument ("--fd-attention" , default = "MLA_ATTN (FlashAttn v3)" , help = "FD Attention Backend" )
823828 parser .add_argument ("--sg-attention" , default = "flashmla" , help = "SG Attention Backend" )
@@ -851,7 +856,7 @@ def main():
851856 # 过滤掉不完整的场景(缺少 fd 或 sg)
852857 valid_data = {}
853858 for key , val in benchmark_data .items ():
854- if 'fd' in val and 'sg' in val and val ['fd' ] and val ['sg' ]:
859+ if "fd" in val and "sg" in val and val ["fd" ] and val ["sg" ]:
855860 valid_data [key ] = val
856861 else :
857862 print (f"[WARN] 场景 { key } 数据不完整,跳过" , file = sys .stderr )
@@ -864,25 +869,27 @@ def main():
864869
865870 # 构建配置
866871 config = {
867- 'model_name' : args .model_name ,
868- 'model_type' : args .model_type ,
869- 'model_size' : args .model_size ,
870- 'model_experts' : args .model_experts ,
871- 'model_layers_hidden' : args .model_layers_hidden ,
872- 'gpu_type' : args .gpu_type ,
873- 'tp_size' : args .tp ,
874- 'max_model_len' : args .max_model_len ,
875- 'fd_attention' : args .fd_attention ,
876- 'sg_attention' : args .sg_attention ,
877- 'sg_version' : args .sg_version ,
878- 'fd_commit_date' : args .fd_commit_date ,
879- 'fd_commit_short' : args .fd_commit_short ,
880- 'fd_commit_full' : args .fd_commit_full ,
881- 'default_quant' : args .default_quant ,
882- 'default_bs' : args .default_bs ,
883- 'test_date' : args .test_date ,
884- 'dataset_url' : args .dataset_url ,
885- 'dataset_desc' : args .dataset_desc ,
872+ "model_name" : args .model_name ,
873+ "model_type" : args .model_type ,
874+ "model_size" : args .model_size ,
875+ "model_experts" : args .model_experts ,
876+ "model_layers_hidden" : args .model_layers_hidden ,
877+ "gpu_type" : args .gpu_type ,
878+ "tp_size" : args .tp ,
879+ "dp_size" : args .dp ,
880+ "ep_size" : args .ep ,
881+ "max_model_len" : args .max_model_len ,
882+ "fd_attention" : args .fd_attention ,
883+ "sg_attention" : args .sg_attention ,
884+ "sg_version" : args .sg_version ,
885+ "fd_commit_date" : args .fd_commit_date ,
886+ "fd_commit_short" : args .fd_commit_short ,
887+ "fd_commit_full" : args .fd_commit_full ,
888+ "default_quant" : args .default_quant ,
889+ "default_bs" : args .default_bs ,
890+ "test_date" : args .test_date ,
891+ "dataset_url" : args .dataset_url ,
892+ "dataset_desc" : args .dataset_desc ,
886893 }
887894
888895 # 生成 HTML
0 commit comments