Skip to content

Commit ef5bade

Browse files
committed
add start scripts
1 parent cccbd2a commit ef5bade

15 files changed

Lines changed: 214 additions & 0 deletions
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# H200 multi node deepseek R1 ep mode node 0
2+
# nccl_host: the ip of the nccl host
3+
# sh multi_node_ep_node0.sh <nccl_host>
4+
export nccl_host=$1
5+
MOE_MODE=EP LOADWORKER=18 python -m lightllm.server.api_server --port 8088 \
6+
--model_dir /path/DeepSeek-R1 \
7+
--tp 16 \
8+
--dp 16 \
9+
--enable_fa3 \
10+
--nnodes 2 \
11+
--node_rank 0 \
12+
--nccl_host $nccl_host \
13+
--nccl_port 2732
14+
# if you want to enable microbatch overlap, you can uncomment the following lines
15+
#--enable_prefill_microbatch_overlap
16+
#--enable_decode_microbatch_overlap
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# H200 multi node deepseek R1 ep mode node 1
2+
# nccl_host: the ip of the nccl host
3+
# sh multi_node_ep_node1.sh <nccl_host>
4+
export nccl_host=$1
5+
MOE_MODE=EP LOADWORKER=18 python -m lightllm.server.api_server --port 8088 \
6+
--model_dir /path/DeepSeek-R1 \
7+
--tp 16 \
8+
--dp 16 \
9+
--enable_fa3 \
10+
--nnodes 2 \
11+
--node_rank 1 \
12+
--nccl_host $nccl_host \
13+
--nccl_port 2732
14+
# if you want to enable microbatch overlap, you can uncomment the following lines
15+
#--enable_prefill_microbatch_overlap
16+
#--enable_decode_microbatch_overlap
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# H200/H100 multi node deepseek R1 tp mode node 0
2+
# nccl_host: the ip of the nccl host
3+
# sh multi_node_tp_node0.sh <nccl_host>
4+
export nccl_host=$1
5+
LOADWORKER=18 python -m lightllm.server.api_server --port 8088 \
6+
--model_dir /path/DeepSeek-R1 \
7+
--tp 16 \
8+
--enable_fa3 \
9+
--nnodes 2 \
10+
--node_rank 0 \
11+
--nccl_host $nccl_host \
12+
--nccl_port 2732
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# H200/H100 multi node deepseek R1 tp mode node 1
2+
# nccl_host: the ip of the nccl host
3+
# sh multi_node_tp_node1.sh <nccl_host>
4+
export nccl_host=$1
5+
LOADWORKER=18 python -m lightllm.server.api_server --port 8088 \
6+
--model_dir /path/DeepSeek-R1 \
7+
--tp 16 \
8+
--enable_fa3 \
9+
--nnodes 2 \
10+
--node_rank 1 \
11+
--nccl_host $nccl_host \
12+
--nccl_port 2732
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# 多 pd_master 节点部署示例
2+
python -m lightllm.server.api_server --run_mode "config_server" --config_server_host 10.120.114.74 --config_server_port 60088
3+
4+
python -m lightllm.server.api_server --model_dir /mtc/models/DeepSeek-V2-Lite-Chat --run_mode "pd_master" --host 10.120.114.74 --port 60011 --config_server_host 10.120.114.74 --config_server_port 60088
5+
6+
python -m lightllm.server.api_server --model_dir /mtc/models/DeepSeek-V2-Lite-Chat --run_mode "pd_master" --host 10.120.114.74 --port 60012 --config_server_host 10.120.114.74 --config_server_port 60088
7+
8+
nvidia-cuda-mps-control -d
9+
CUDA_VISIBLE_DEVICES=0 KV_TRANS_USE_P2P=1 LOADWORKER=1 python -m lightllm.server.api_server --model_dir /mtc/models/DeepSeek-V2-Lite-Chat \
10+
--run_mode "prefill" \
11+
--host 10.120.178.74 \
12+
--port 8019 \
13+
--tp 1 \
14+
--nccl_port 2732 \
15+
--max_total_token_num 40000 \
16+
--tokenizer_mode fast \
17+
--max_req_total_len 16000 \
18+
--running_max_req_size 128 \
19+
--disable_cudagraph \
20+
--config_server_host 10.120.114.74 \
21+
--config_server_port 60088
22+
23+
CUDA_VISIBLE_DEVICES=1 KV_TRANS_USE_P2P=1 LOADWORKER=10 python -m lightllm.server.api_server --model_dir /mtc/models/DeepSeek-V2-Lite-Chat \
24+
--run_mode "decode" \
25+
--host 10.120.178.74 \
26+
--port 8121 \
27+
--nccl_port 12322 \
28+
--tp 1 \
29+
--max_total_token_num 40000 \
30+
--graph_max_len_in_batch 2048 \
31+
--graph_max_batch_size 16 \
32+
--tokenizer_mode fast \
33+
--config_server_host 10.120.114.74 \
34+
--config_server_port 60088
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# config_server
2+
# config_server_host: the host of the config server
3+
# sh config_server.sh <config_server_host>
4+
export config_server_host=$1
5+
python -m lightllm.server.api_server --run_mode "config_server" --config_server_host $config_server_host --config_server_port 60088
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# decode
2+
# host: the host of the decode server
3+
# config_server_host: the host of the config server
4+
# sh decode.sh <host> <config_server_host>
5+
export host=$1
6+
export config_server_host=$2
7+
nvidia-cuda-mps-control -d
8+
MOE_MODE=EP LOADWORKER=18 python -m lightllm.server.api_server \
9+
--model_dir /path/DeepSeek-R1 \
10+
--run_mode "decode" \
11+
--host $host \
12+
--port 8121 \
13+
--nccl_port 12322 \
14+
--tp 8 \
15+
--dp 8 \
16+
--enable_fa3 \
17+
--config_server_host $config_server_host \
18+
--config_server_port 60088
19+
# if you want to enable microbatch overlap, you can uncomment the following lines
20+
#--enable_decode_microbatch_overlap
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# pd_master 1
2+
# host: the host of the pd master
3+
# config_server_host: the host of the config server
4+
# sh pd_master_1.sh <host> <config_server_host>
5+
export host=$1
6+
export config_server_host=$2
7+
python -m lightllm.server.api_server --model_dir /path/DeepSeek-R1 --run_mode "pd_master" --host $host --port 60011 --config_server_host $config_server_host --config_server_port 60088
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# pd_master 2
2+
# host: the host of the pd master
3+
# config_server_host: the host of the config server
4+
# sh pd_master_2.sh <host> <config_server_host>
5+
export host=$1
6+
export config_server_host=$2
7+
python -m lightllm.server.api_server --model_dir /path/DeepSeek-R1 --run_mode "pd_master" --host $host --port 60012 --config_server_host $config_server_host --config_server_port 60088
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# prefill
2+
# host: the host of the prefill server
3+
# config_server_host: the host of the config server
4+
# sh pd_prefill.sh <host> <config_server_host>
5+
export host=$1
6+
export config_server_host=$2
7+
nvidia-cuda-mps-control -d
8+
MOE_MODE=EP LOADWORKER=18 python -m lightllm.server.api_server \
9+
--model_dir /path/DeepSeek-R1 \
10+
--run_mode "prefill" \
11+
--host $host \
12+
--port 8019 \
13+
--tp 8 \
14+
--dp 8 \
15+
--nccl_port 2732 \
16+
--enable_fa3 \
17+
--disable_cudagraph \
18+
--config_server_host $config_server_host \
19+
--config_server_port 60088
20+
# if you want to enable microbatch overlap, you can uncomment the following lines
21+
#--enable_prefill_microbatch_overlap

0 commit comments

Comments
 (0)