File tree Expand file tree Collapse file tree
examples/inference/sglang Expand file tree Collapse file tree Original file line number Diff line number Diff line change 11type : service
2- name : prefill-decode-test
3- https : false
2+ name : prefill-decode
43image : lmsysorg/sglang:latest
54
65env :
76 - HF_TOKEN
87 - MODEL_ID=zai-org/GLM-4.5-Air-FP8
98
109replicas :
11- - count : 1..2
10+ - count : 1..4
1211 scaling :
1312 metric : rps
1413 target : 3
1514 commands :
16- - echo "Group Prefill" > /tmp/version.txt
1715 - |
1816 python -m sglang.launch_server \
1917 --model-path $MODEL_ID \
@@ -25,9 +23,11 @@ replicas:
2523 resources :
2624 gpu : 1
2725
28- - count : 1
26+ - count : 1..8
27+ scaling :
28+ metric : rps
29+ target : 2
2930 commands :
30- - echo "Group Decode" > /tmp/version.txt
3131 - |
3232 python -m sglang.launch_server \
3333 --model-path $MODEL_ID \
@@ -48,5 +48,4 @@ probes:
4848
4949router :
5050 type : sglang
51- policy : round_robin
5251 pd_disaggregation : true
You can’t perform that action at this time.
0 commit comments