File tree Expand file tree Collapse file tree
docs/tutorials/posttraining Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -83,6 +83,7 @@ export MAXTEXT_CKPT_PATH=${BASE_OUTPUT_DIRECTORY?}/${WORKLOAD?}/0/items
8383export TPU_TYPE=< TPU Type> # e.g., 'v5p-128'
8484export TPU_CLUSTER=< cluster name>
8585export PROJECT_ID=< GCP project ID>
86+ export ZONE=< GCP zone>
8687export CLOUD_IMAGE_NAME=< your artifact registry image> # Name for the Docker image to be built
8788```
8889
@@ -195,6 +196,7 @@ xpk workload create-pathways --workload ${WORKLOAD?} \
195196--docker-image gcr.io/${PROJECT_ID?} /${CLOUD_IMAGE_NAME?} --cluster ${TPU_CLUSTER?} \
196197--tpu-type=${TPU_TYPE?} --num-slices=1 \
197198--project=${PROJECT_ID?} --priority=high \
199+ --zone=${ZONE?} \
198200--command " HF_TOKEN=${HF_TOKEN?} TF_CPP_MIN_LOG_LEVEL=0 JAX_PLATFORMS=proxy JAX_BACKEND_TARGET=grpc://127.0.0.1:29000 ENABLE_PATHWAYS_PERSISTENCE='1' \
199201python3 -m src.maxtext.trainers.post_train.rl.train_rl src/maxtext/configs/post_train/rl.yml \
200202 model_name=${MODEL?} \
@@ -212,6 +214,7 @@ xpk workload create-pathways --workload ${WORKLOAD?} \
212214--docker-image gcr.io/${PROJECT_ID?} /${CLOUD_IMAGE_NAME?} --cluster ${TPU_CLUSTER?} \
213215--tpu-type=${TPU_TYPE?} --num-slices=1 \
214216--project=${PROJECT_ID?} --priority=high \
217+ --zone=${ZONE?} \
215218--command " HF_TOKEN=${HF_TOKEN?} TF_CPP_MIN_LOG_LEVEL=0 JAX_PLATFORMS=proxy JAX_BACKEND_TARGET=grpc://127.0.0.1:29000 ENABLE_PATHWAYS_PERSISTENCE='1' \
216219python3 -m src.maxtext.trainers.post_train.rl.train_rl src/maxtext/configs/post_train/rl.yml \
217220 model_name=${MODEL?} \
You can’t perform that action at this time.
0 commit comments