File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 1+ # H200 multi node deepseek R1 ep mode node 0
2+ # nccl_host: the ip of the nccl host
3+ # sh multi_node_ep_node0.sh <nccl_host>
4+ export nccl_host=$1
5+ MOE_MODE=EP LOADWORKER=18 python -m lightllm.server.api_server --port 8088 \
6+ --model_dir /path/DeepSeek-R1 \
7+ --tp 16 \
8+ --dp 16 \
9+ --enable_fa3 \
10+ --nnodes 2 \
11+ --node_rank 0 \
12+ --nccl_host $nccl_host \
13+ --nccl_port 2732
14+ # if you want to enable microbatch overlap, you can uncomment the following lines
15+ # --enable_prefill_microbatch_overlap
16+ # --enable_decode_microbatch_overlap
Original file line number Diff line number Diff line change 1+ # H200 multi node deepseek R1 ep mode node 1
2+ # nccl_host: the ip of the nccl host
3+ # sh multi_node_ep_node1.sh <nccl_host>
4+ export nccl_host=$1
5+ MOE_MODE=EP LOADWORKER=18 python -m lightllm.server.api_server --port 8088 \
6+ --model_dir /path/DeepSeek-R1 \
7+ --tp 16 \
8+ --dp 16 \
9+ --enable_fa3 \
10+ --nnodes 2 \
11+ --node_rank 1 \
12+ --nccl_host $nccl_host \
13+ --nccl_port 2732
14+ # if you want to enable microbatch overlap, you can uncomment the following lines
15+ # --enable_prefill_microbatch_overlap
16+ # --enable_decode_microbatch_overlap
Original file line number Diff line number Diff line change 1+ # H200/H100 multi node deepseek R1 tp mode node 0
2+ # nccl_host: the ip of the nccl host
3+ # sh multi_node_tp_node0.sh <nccl_host>
4+ export nccl_host=$1
5+ LOADWORKER=18 python -m lightllm.server.api_server --port 8088 \
6+ --model_dir /path/DeepSeek-R1 \
7+ --tp 16 \
8+ --enable_fa3 \
9+ --nnodes 2 \
10+ --node_rank 0 \
11+ --nccl_host $nccl_host \
12+ --nccl_port 2732
Original file line number Diff line number Diff line change 1+ # H200/H100 multi node deepseek R1 tp mode node 1
2+ # nccl_host: the ip of the nccl host
3+ # sh multi_node_tp_node1.sh <nccl_host>
4+ export nccl_host=$1
5+ LOADWORKER=18 python -m lightllm.server.api_server --port 8088 \
6+ --model_dir /path/DeepSeek-R1 \
7+ --tp 16 \
8+ --enable_fa3 \
9+ --nnodes 2 \
10+ --node_rank 1 \
11+ --nccl_host $nccl_host \
12+ --nccl_port 2732
Original file line number Diff line number Diff line change 1+ # 多 pd_master 节点部署示例
2+ python -m lightllm.server.api_server --run_mode " config_server" --config_server_host 10.120.114.74 --config_server_port 60088
3+
4+ python -m lightllm.server.api_server --model_dir /mtc/models/DeepSeek-V2-Lite-Chat --run_mode " pd_master" --host 10.120.114.74 --port 60011 --config_server_host 10.120.114.74 --config_server_port 60088
5+
6+ python -m lightllm.server.api_server --model_dir /mtc/models/DeepSeek-V2-Lite-Chat --run_mode " pd_master" --host 10.120.114.74 --port 60012 --config_server_host 10.120.114.74 --config_server_port 60088
7+
8+ nvidia-cuda-mps-control -d
9+ CUDA_VISIBLE_DEVICES=0 KV_TRANS_USE_P2P=1 LOADWORKER=1 python -m lightllm.server.api_server --model_dir /mtc/models/DeepSeek-V2-Lite-Chat \
10+ --run_mode " prefill" \
11+ --host 10.120.178.74 \
12+ --port 8019 \
13+ --tp 1 \
14+ --nccl_port 2732 \
15+ --max_total_token_num 40000 \
16+ --tokenizer_mode fast \
17+ --max_req_total_len 16000 \
18+ --running_max_req_size 128 \
19+ --disable_cudagraph \
20+ --config_server_host 10.120.114.74 \
21+ --config_server_port 60088
22+
23+ CUDA_VISIBLE_DEVICES=1 KV_TRANS_USE_P2P=1 LOADWORKER=10 python -m lightllm.server.api_server --model_dir /mtc/models/DeepSeek-V2-Lite-Chat \
24+ --run_mode " decode" \
25+ --host 10.120.178.74 \
26+ --port 8121 \
27+ --nccl_port 12322 \
28+ --tp 1 \
29+ --max_total_token_num 40000 \
30+ --graph_max_len_in_batch 2048 \
31+ --graph_max_batch_size 16 \
32+ --tokenizer_mode fast \
33+ --config_server_host 10.120.114.74 \
34+ --config_server_port 60088
Original file line number Diff line number Diff line change 1+ # config_server
2+ # config_server_host: the host of the config server
3+ # sh config_server.sh <config_server_host>
4+ export config_server_host=$1
5+ python -m lightllm.server.api_server --run_mode " config_server" --config_server_host $config_server_host --config_server_port 60088
Original file line number Diff line number Diff line change 1+ # decode
2+ # host: the host of the decode server
3+ # config_server_host: the host of the config server
4+ # sh decode.sh <host> <config_server_host>
5+ export host=$1
6+ export config_server_host=$2
7+ nvidia-cuda-mps-control -d
8+ MOE_MODE=EP LOADWORKER=18 python -m lightllm.server.api_server \
9+ --model_dir /path/DeepSeek-R1 \
10+ --run_mode " decode" \
11+ --host $host \
12+ --port 8121 \
13+ --nccl_port 12322 \
14+ --tp 8 \
15+ --dp 8 \
16+ --enable_fa3 \
17+ --config_server_host $config_server_host \
18+ --config_server_port 60088
19+ # if you want to enable microbatch overlap, you can uncomment the following lines
20+ # --enable_decode_microbatch_overlap
Original file line number Diff line number Diff line change 1+ # pd_master 1
2+ # host: the host of the pd master
3+ # config_server_host: the host of the config server
4+ # sh pd_master_1.sh <host> <config_server_host>
5+ export host=$1
6+ export config_server_host=$2
7+ python -m lightllm.server.api_server --model_dir /path/DeepSeek-R1 --run_mode " pd_master" --host $host --port 60011 --config_server_host $config_server_host --config_server_port 60088
Original file line number Diff line number Diff line change 1+ # pd_master 2
2+ # host: the host of the pd master
3+ # config_server_host: the host of the config server
4+ # sh pd_master_2.sh <host> <config_server_host>
5+ export host=$1
6+ export config_server_host=$2
7+ python -m lightllm.server.api_server --model_dir /path/DeepSeek-R1 --run_mode " pd_master" --host $host --port 60012 --config_server_host $config_server_host --config_server_port 60088
Original file line number Diff line number Diff line change 1+ # prefill
2+ # host: the host of the prefill server
3+ # config_server_host: the host of the config server
4+ # sh pd_prefill.sh <host> <config_server_host>
5+ export host=$1
6+ export config_server_host=$2
7+ nvidia-cuda-mps-control -d
8+ MOE_MODE=EP LOADWORKER=18 python -m lightllm.server.api_server \
9+ --model_dir /path/DeepSeek-R1 \
10+ --run_mode " prefill" \
11+ --host $host \
12+ --port 8019 \
13+ --tp 8 \
14+ --dp 8 \
15+ --nccl_port 2732 \
16+ --enable_fa3 \
17+ --disable_cudagraph \
18+ --config_server_host $config_server_host \
19+ --config_server_port 60088
20+ # if you want to enable microbatch overlap, you can uncomment the following lines
21+ # --enable_prefill_microbatch_overlap
You can’t perform that action at this time.
0 commit comments