File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -41,11 +41,23 @@ script_args:
4141 tp : 1
4242 dfpnl : " "
4343script : |-
44+ COPY_FLAG="/tmp/copy_done_${{SLURMD_NODENAME}}";
45+ NEW_DATA_PATH="/dev/shm/data_path_${{SLURMD_NODENAME}}";
46+ if [ "$SLURM_LOCALID" = "0" ]; then
47+ df -h;
48+ echo $NEW_DATA_PATH;
49+ time cp -r ${data_path}/ $NEW_DATA_PATH;
50+ touch $COPY_FLAG
51+ fi
52+ # All ranks wait until install flag file appears
53+ while [ ! -f $COPY_FLAG ]; do
54+ sleep 1
55+ done
4456 WANDB_API_KEY=$BIONEMO_WANDB_API_KEY ${variant}_${model} \
45- --train-cluster-path=${data_path} /train_clusters.parquet \
46- --train-database-path=${data_path} /train.db \
47- --valid-cluster-path=${data_path} /valid_clusters.parquet \
48- --valid-database-path=${data_path} /validation.db \
57+ --train-cluster-path=$NEW_DATA_PATH /train_clusters.parquet \
58+ --train-database-path=$NEW_DATA_PATH /train.db \
59+ --valid-cluster-path=$NEW_DATA_PATH /valid_clusters.parquet \
60+ --valid-database-path=$NEW_DATA_PATH /validation.db \
4961 --micro-batch-size=${batch_size} \
5062 --num-nodes=${nodes} \
5163 --num-gpus=${gpus} \
You can’t perform that action at this time.
0 commit comments