diff --git a/examples/train/multi-node/swift/train_node1.sh b/examples/train/multi-node/swift/train_node1.sh index 76d5dc9581..1d652ad907 100644 --- a/examples/train/multi-node/swift/train_node1.sh +++ b/examples/train/multi-node/swift/train_node1.sh @@ -1,12 +1,12 @@ nnodes=2 nproc_per_node=4 -CUDA_VISIBLE_DEVICES=0,1,2,3 \ -NNODES=$nnodes \ -NODE_RANK=0 \ -MASTER_ADDR=127.0.0.1 \ -MASTER_PORT=29500 \ -NPROC_PER_NODE=$nproc_per_node \ +export CUDA_VISIBLE_DEVICES=0,1,2,3 +export NNODES=$nnodes +export NODE_RANK=0 +export MASTER_ADDR=127.0.0.1 +export MASTER_PORT=29500 +export NPROC_PER_NODE=$nproc_per_node swift sft \ --model Qwen/Qwen2.5-7B-Instruct \ --tuner_type full \ diff --git a/examples/train/multi-node/swift/train_node2.sh b/examples/train/multi-node/swift/train_node2.sh index 4a64b23bdf..b2a9a80230 100644 --- a/examples/train/multi-node/swift/train_node2.sh +++ b/examples/train/multi-node/swift/train_node2.sh @@ -1,12 +1,12 @@ nnodes=2 nproc_per_node=4 -CUDA_VISIBLE_DEVICES=0,1,2,3 \ -NNODES=$nnodes \ -NODE_RANK=1 \ -MASTER_ADDR=xxx.xxx.xxx.xxx \ -MASTER_PORT=29500 \ -NPROC_PER_NODE=$nproc_per_node \ +export CUDA_VISIBLE_DEVICES=0,1,2,3 +export NNODES=$nnodes +export NODE_RANK=1 +export MASTER_ADDR=xxx.xxx.xxx.xxx # FIXME: Replace with the IP address of the master node (node 1) +export MASTER_PORT=29500 +export NPROC_PER_NODE=$nproc_per_node swift sft \ --model Qwen/Qwen2.5-7B-Instruct \ --tuner_type full \