1+ #! /bin/bash
2+
3+ # Copyright 2023–2025 Google LLC
4+ #
5+ # Licensed under the Apache License, Version 2.0 (the "License");
6+ # you may not use this file except in compliance with the License.
7+ # You may obtain a copy of the License at
8+ #
9+ # https://www.apache.org/licenses/LICENSE-2.0
10+ #
11+ # Unless required by applicable law or agreed to in writing, software
12+ # distributed under the License is distributed on an "AS IS" BASIS,
13+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+ # See the License for the specific language governing permissions and
15+ # limitations under the License.
16+
17+ # Script to build and upload the MaxText colocated Python Docker image.
18+ # This script should be run from the root directory of the MaxText repository.
19+ #
20+ # Arguments can be provided as KEY=VALUE pairs, e.g.:
21+ # bash build_upload_colocated_python_image.sh PROJECT=my-gcp LOCAL_IMAGE_NAME=my-colocated
22+ #
23+ # Supported KEYs:
24+ # PROJECT: Overrides the default GCP project for gcr.io. Defaults to 'cloud-tpu-multipod-dev'.
25+ # LOCAL_IMAGE_NAME: Overrides the local tag used during docker build. Defaults to 'maxtext-colocated-python'.
26+ # IMAGE_LOCATION: The full URL for the final image in the registry.
27+ # Defaults to gcr.io/${PROJECT}/${USER}_${LOCAL_IMAGE_NAME}:latest.
28+ # Setting this variable fully overrides the default construction using PROJECT and LOCAL_IMAGE_NAME.
29+ # For instructions on building the MaxText Docker image, please refer to the https://maxtext.readthedocs.io/en/latest/build_maxtext.html.
30+
31+ # Example 1: Use defaults for PROJECT and LOCAL_IMAGE_NAME
32+ # bash src/dependencies/scripts/build_upload_colocated_python_image.sh
33+
34+ # Example 2: Specify a custom project and local name
35+ # bash src/dependencies/scripts/build_upload_colocated_python_image.sh PROJECT=my-tpu-dev LOCAL_IMAGE_NAME=maxtext-cp
36+
37+ # Example 3: Provide a complete IMAGE_LOCATION, overriding defaults
38+ # bash src/dependencies/scripts/build_upload_colocated_python_image.sh IMAGE_LOCATION=us-docker.pkg.dev/my-artifact-repo/images/colocated-python:stable
39+
40+ for ARGUMENT in " $@ " ; do
41+ if [[ " $ARGUMENT " == * " =" * ]]; then
42+ IFS=' =' read -r RAW_KEY VALUE <<< " $ARGUMENT"
43+ KEY=$( echo " $RAW_KEY " | tr ' [:lower:]' ' [:upper:]' )
44+ export " $KEY " =" $VALUE "
45+ echo " Parsed: $KEY =$VALUE "
46+ else
47+ echo " Warning: Ignoring argument '$ARGUMENT '. Arguments should be in KEY=VALUE format (e.g., PROJECT=my-proj)."
48+ fi
49+ done
50+
51+ # Set default values using parameter expansion.
52+ : " ${PROJECT:= cloud-tpu-multipod-dev} "
53+ : " ${LOCAL_IMAGE_NAME:= maxtext-colocated-python} "
54+ : " ${IMAGE_LOCATION:= gcr.io/ ${PROJECT} / ${USER} _${LOCAL_IMAGE_NAME} : latest} "
55+
56+ echo " $( date) : Building and pushing MaxText Colocated Python image..."
57+ echo " PROJECT: ${PROJECT} "
58+ echo " LOCAL_IMAGE_NAME: ${LOCAL_IMAGE_NAME} "
59+ echo " IMAGE_LOCATION: ${IMAGE_LOCATION} "
60+ echo " Dockerfile: src/dependencies/dockerfiles/colocated_python.Dockerfile"
61+ echo " Build Context: maxtext/"
62+
63+ # Extract JAX Version from requirements.txt
64+ echo " $( date) : Extracting JAX version from requirements..."
65+ REQ_FILE=" src/dependencies/requirements/generated_requirements/tpu-requirements.txt"
66+ if [[ ! -f " ${REQ_FILE} " ]]; then
67+ echo " Error: Requirements file not found: ${REQ_FILE} " >&2
68+ exit 1
69+ fi
70+ # Extracts version like "0.10.0" from lines like "jax==0.10.0"
71+ JAX_VERSION=$( grep " ^jax>=" " ${REQ_FILE} " | head -1 | sed -E ' s/.*>=([0-9.]+).*/\1/' )
72+
73+ if [[ -z " ${JAX_VERSION} " ]]; then
74+ echo " Error: Could not extract jax version from ${REQ_FILE} . Ensure it's in the format 'jax==X.Y.Z'." >&2
75+ exit 1
76+ fi
77+ echo " Detected required JAX version: ${JAX_VERSION} "
78+
79+ # Find the Latest Compatible Base Image Tag
80+ BASE_IMAGE_REPO=" us-docker.pkg.dev/cloud-tpu-v2-images/pathways-colocated-python/sidecar"
81+ TARGET_JAX_TAG_PART=" jax_${JAX_VERSION} "
82+ echo " $( date) : Searching for base image tag in '${BASE_IMAGE_REPO} ' containing '${TARGET_JAX_TAG_PART} '..."
83+
84+ # Authenticate Docker for the base image registry (us-docker.pkg.dev)
85+ BASE_REGISTRY=$( echo " ${BASE_IMAGE_REPO} " | cut -d/ -f1)
86+ echo " $( date) : Configuring Docker for base image registry: ${BASE_REGISTRY} "
87+ gcloud auth configure-docker --quiet " ${BASE_REGISTRY} "
88+
89+ # List tags, filter by JAX version, sort by date (desc), and take the latest.
90+ BASE_IMAGE_TAG=$( gcloud artifacts docker images list " ${BASE_IMAGE_REPO} " --include-tags --format=json | \
91+ jq -r ' .[] | .tags[]?' | \
92+ grep " ${TARGET_JAX_TAG_PART} " | \
93+ sort -r | \
94+ head -1)
95+
96+ if [[ -z " ${BASE_IMAGE_TAG} " ]]; then
97+ echo " Error: Could not find a suitable base image tag in ${BASE_IMAGE_REPO} for JAX version '${JAX_VERSION} '." >&2
98+ echo " Searched for tags containing '${TARGET_JAX_TAG_PART} '." >&2
99+ echo " Available matching tags found:" >&2
100+ gcloud artifacts docker images list " ${BASE_IMAGE_REPO} " --include-tags --format=json | \
101+ jq -r ' .[] | .tags[]?' | sort -r >&2
102+ exit 1
103+ fi
104+ FULL_BASE_IMAGE=" ${BASE_IMAGE_REPO} :${BASE_IMAGE_TAG} "
105+ echo " Found latest compatible base image: ${FULL_BASE_IMAGE} "
106+
107+ # Create a Temporary Dockerfile with the Dynamic Base Image
108+ ORIGINAL_DOCKERFILE=" src/dependencies/dockerfiles/colocated_python.Dockerfile"
109+ TMP_DOCKERFILE=$( mktemp maxtext_colocated_python_Dockerfile.XXXXXX)
110+ # Ensure the temporary file is removed on script exit
111+ trap ' rm -f "${TMP_DOCKERFILE}"' EXIT
112+
113+ echo " $( date) : Creating temporary Dockerfile: ${TMP_DOCKERFILE} "
114+ # Replace the hardcoded FROM line with the dynamically determined base image
115+ sed " s|^FROM us-docker.pkg.dev/cloud-tpu-v2-images/pathways-colocated-python/sidecar:.*|FROM ${FULL_BASE_IMAGE} |" " ${ORIGINAL_DOCKERFILE} " > " ${TMP_DOCKERFILE} "
116+
117+ echo " $( date) : Running docker build with local tag '${LOCAL_IMAGE_NAME} ' using ${TMP_DOCKERFILE} ..."
118+ # The build context '.' is the maxtext/ root directory.
119+ # The Dockerfile should contain 'COPY . /app/maxtext/'.
120+ docker build --no-cache \
121+ -f " ${TMP_DOCKERFILE} " \
122+ -t " ${LOCAL_IMAGE_NAME} " \
123+ .
124+
125+ # Tag the locally built image with the final IMAGE_LOCATION.
126+ echo " $( date) : Tagging '${LOCAL_IMAGE_NAME} ' as '${IMAGE_LOCATION} '..."
127+ docker tag " ${LOCAL_IMAGE_NAME} " " ${IMAGE_LOCATION} "
128+
129+ # Push the image to the specified IMAGE_LOCATION.
130+ echo " $( date) : Pushing '${IMAGE_LOCATION} '..."
131+ docker push " ${IMAGE_LOCATION} "
132+
133+ # Clean up the local Docker image tag used during build.
134+ echo " $( date) : Cleaning up local tag '${LOCAL_IMAGE_NAME} '..."
135+ docker image rm " ${LOCAL_IMAGE_NAME} "
136+
137+ echo " $( date) : Build and push complete for ${IMAGE_LOCATION} "
0 commit comments