11dsr1-fp4-b200-sglang :
2- image : lmsysorg/sglang:v0.5.3rc1 -cu129-b200
2+ image : lmsysorg/sglang:v0.5.5 -cu129-amd64
33 model : nvidia/DeepSeek-R1-0528-FP4-V2
44 model-prefix : dsr1
55 runner : b200
@@ -9,18 +9,18 @@ dsr1-fp4-b200-sglang:
99 - isl : 1024
1010 osl : 1024
1111 search-space :
12- - { tp: 4, conc-start: 4, conc-end: 128 }
13- - { tp: 8, conc-start: 4, conc-end: 128 }
12+ - { tp: 4, ep: 4, conc-start: 4, conc-end: 128 }
13+ - { tp: 8, ep: 8, conc-start: 4, conc-end: 128 }
1414 - isl : 1024
1515 osl : 8192
1616 search-space :
17- - { tp: 4, conc-start: 4, conc-end: 128 }
18- - { tp: 8, conc-start: 4, conc-end: 128 }
17+ - { tp: 4, ep: 4, conc-start: 4, conc-end: 128 }
18+ - { tp: 8, ep: 8, conc-start: 4, conc-end: 128 }
1919 - isl : 8192
2020 osl : 1024
2121 search-space :
22- - { tp: 4, conc-start: 4, conc-end: 128 }
23- - { tp: 8, conc-start: 4, conc-end: 16 }
22+ - { tp: 4, ep: 4, conc-start: 4, conc-end: 128 }
23+ - { tp: 8, ep: 8, conc-start: 4, conc-end: 16 }
2424
2525dsr1-fp4-b200-trt :
2626 image : nvcr.io#nvidia/tensorrt-llm/release:1.1.0rc2.post2
@@ -73,7 +73,7 @@ dsr1-fp4-b200-trt:
7373 - { tp: 8, ep: 8, dp-attn: true, conc-start: 64, conc-end: 256 }
7474
7575dsr1-fp8-b200-sglang :
76- image : lmsysorg/sglang:v0.5.3rc1 -cu129-b200
76+ image : lmsysorg/sglang:v0.5.5 -cu129-amd64
7777 model : deepseek-ai/DeepSeek-R1-0528
7878 model-prefix : dsr1
7979 runner : b200
@@ -83,15 +83,15 @@ dsr1-fp8-b200-sglang:
8383 - isl : 1024
8484 osl : 1024
8585 search-space :
86- - { tp: 8, conc-start: 4, conc-end: 64 }
86+ - { tp: 8, ep: 8, conc-start: 4, conc-end: 64 }
8787 - isl : 1024
8888 osl : 8192
8989 search-space :
90- - { tp: 8, conc-start: 4, conc-end: 64 }
90+ - { tp: 8, ep: 8, conc-start: 4, conc-end: 64 }
9191 - isl : 8192
9292 osl : 1024
9393 search-space :
94- - { tp: 8, conc-start: 4, conc-end: 64 }
94+ - { tp: 8, ep: 8, conc-start: 4, conc-end: 64 }
9595
9696dsr1-fp8-b200-trt :
9797 image : nvcr.io#nvidia/tensorrt-llm/release:1.1.0rc2.post2
@@ -120,7 +120,7 @@ dsr1-fp8-b200-trt:
120120 - { tp: 8, ep: 8, conc-start: 4, conc-end: 64 }
121121
122122dsr1-fp8-h200-sglang :
123- image : lmsysorg/sglang:v0.5.2rc2-cu126
123+ image : lmsysorg/sglang:v0.5.5-cu129-amd64
124124 model : deepseek-ai/DeepSeek-R1-0528
125125 model-prefix : dsr1
126126 runner : h200
0 commit comments