Skip to content

Commit e94fad5

Browse files
committed
Add colocated python Dockerfile and build upload script
1 parent 5d45f2e commit e94fad5

2 files changed

Lines changed: 155 additions & 0 deletions

File tree

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
FROM us-docker.pkg.dev/cloud-tpu-v2-images/pathways-colocated-python/sidecar:20260423-python_3.12-jax_0.10.0
2+
3+
RUN apt-get update && apt-get install -y git
4+
5+
WORKDIR /app
6+
7+
# Copy the current directory (MaxText repo) into the image
8+
COPY . /app/maxtext/
9+
10+
# Install MaxText dependencies
11+
# We assume requirements.txt is in maxtext/src/dependencies/requirements/generated_requirements/tpu-requirements.txt based on repo structure
12+
RUN uv pip install --upgrade pip setuptools wheel
13+
RUN uv pip install -r maxtext/src/dependencies/requirements/generated_requirements/tpu-requirements.txt -c /opt/venv/server_constraints.txt
14+
15+
# Ensure MaxText src is in PYTHONPATH
16+
ENV PYTHONPATH=/app/maxtext/src:$PYTHONPATH
17+
18+
WORKDIR /app/maxtext
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
#!/bin/bash
2+
3+
# Copyright 2023–2025 Google LLC
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# https://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
# Script to build and upload the MaxText colocated Python Docker image.
18+
# This script should be run from the root directory of the MaxText repository.
19+
#
20+
# Arguments can be provided as KEY=VALUE pairs, e.g.:
21+
# bash build_upload_colocated_python_image.sh PROJECT=my-gcp LOCAL_IMAGE_NAME=my-colocated
22+
#
23+
# Supported KEYs:
24+
# PROJECT: Overrides the default GCP project for gcr.io. Defaults to 'cloud-tpu-multipod-dev'.
25+
# LOCAL_IMAGE_NAME: Overrides the local tag used during docker build. Defaults to 'maxtext-colocated-python'.
26+
# IMAGE_LOCATION: The full URL for the final image in the registry.
27+
# Defaults to gcr.io/${PROJECT}/${USER}_${LOCAL_IMAGE_NAME}:latest.
28+
# Setting this variable fully overrides the default construction using PROJECT and LOCAL_IMAGE_NAME.
29+
# For instructions on building the MaxText Docker image, please refer to the https://maxtext.readthedocs.io/en/latest/build_maxtext.html.
30+
31+
# Example 1: Use defaults for PROJECT and LOCAL_IMAGE_NAME
32+
# bash src/dependencies/scripts/build_upload_colocated_python_image.sh
33+
34+
# Example 2: Specify a custom project and local name
35+
# bash src/dependencies/scripts/build_upload_colocated_python_image.sh PROJECT=my-tpu-dev LOCAL_IMAGE_NAME=maxtext-cp
36+
37+
# Example 3: Provide a complete IMAGE_LOCATION, overriding defaults
38+
# bash src/dependencies/scripts/build_upload_colocated_python_image.sh IMAGE_LOCATION=us-docker.pkg.dev/my-artifact-repo/images/colocated-python:stable
39+
40+
for ARGUMENT in "$@"; do
41+
if [[ "$ARGUMENT" == *"="* ]]; then
42+
IFS='=' read -r RAW_KEY VALUE <<< "$ARGUMENT"
43+
KEY=$(echo "$RAW_KEY" | tr '[:lower:]' '[:upper:]')
44+
export "$KEY"="$VALUE"
45+
echo " Parsed: $KEY=$VALUE"
46+
else
47+
echo "Warning: Ignoring argument '$ARGUMENT'. Arguments should be in KEY=VALUE format (e.g., PROJECT=my-proj)."
48+
fi
49+
done
50+
51+
# Set default values using parameter expansion.
52+
: "${PROJECT:=cloud-tpu-multipod-dev}"
53+
: "${LOCAL_IMAGE_NAME:=maxtext-colocated-python}"
54+
: "${IMAGE_LOCATION:=gcr.io/${PROJECT}/${USER}_${LOCAL_IMAGE_NAME}:latest}"
55+
56+
echo "$(date): Building and pushing MaxText Colocated Python image..."
57+
echo " PROJECT: ${PROJECT}"
58+
echo " LOCAL_IMAGE_NAME: ${LOCAL_IMAGE_NAME}"
59+
echo " IMAGE_LOCATION: ${IMAGE_LOCATION}"
60+
echo " Dockerfile: src/dependencies/dockerfiles/colocated_python.Dockerfile"
61+
echo " Build Context: maxtext/"
62+
63+
# Extract JAX Version from requirements.txt
64+
echo "$(date): Extracting JAX version from requirements..."
65+
REQ_FILE="src/dependencies/requirements/generated_requirements/tpu-requirements.txt"
66+
if [[ ! -f "${REQ_FILE}" ]]; then
67+
echo "Error: Requirements file not found: ${REQ_FILE}" >&2
68+
exit 1
69+
fi
70+
# Extracts version like "0.10.0" from lines like "jax==0.10.0"
71+
JAX_VERSION=$(grep "^jax>=" "${REQ_FILE}" | head -1 | sed -E 's/.*>=([0-9.]+).*/\1/')
72+
73+
if [[ -z "${JAX_VERSION}" ]]; then
74+
echo "Error: Could not extract jax version from ${REQ_FILE}. Ensure it's in the format 'jax==X.Y.Z'." >&2
75+
exit 1
76+
fi
77+
echo " Detected required JAX version: ${JAX_VERSION}"
78+
79+
# Find the Latest Compatible Base Image Tag
80+
BASE_IMAGE_REPO="us-docker.pkg.dev/cloud-tpu-v2-images/pathways-colocated-python/sidecar"
81+
TARGET_JAX_TAG_PART="jax_${JAX_VERSION}"
82+
echo "$(date): Searching for base image tag in '${BASE_IMAGE_REPO}' containing '${TARGET_JAX_TAG_PART}'..."
83+
84+
# Authenticate Docker for the base image registry (us-docker.pkg.dev)
85+
BASE_REGISTRY=$(echo "${BASE_IMAGE_REPO}" | cut -d/ -f1)
86+
echo "$(date): Configuring Docker for base image registry: ${BASE_REGISTRY}"
87+
gcloud auth configure-docker --quiet "${BASE_REGISTRY}"
88+
89+
# List tags, filter by JAX version, sort by date (desc), and take the latest.
90+
BASE_IMAGE_TAG=$(gcloud artifacts docker images list "${BASE_IMAGE_REPO}" --include-tags --format=json | \
91+
jq -r '.[] | .tags[]?' | \
92+
grep "${TARGET_JAX_TAG_PART}" | \
93+
sort -r | \
94+
head -1)
95+
96+
if [[ -z "${BASE_IMAGE_TAG}" ]]; then
97+
echo "Error: Could not find a suitable base image tag in ${BASE_IMAGE_REPO} for JAX version '${JAX_VERSION}'." >&2
98+
echo " Searched for tags containing '${TARGET_JAX_TAG_PART}'." >&2
99+
echo " Available matching tags found:" >&2
100+
gcloud artifacts docker images list "${BASE_IMAGE_REPO}" --include-tags --format=json | \
101+
jq -r '.[] | .tags[]?' | sort -r >&2
102+
exit 1
103+
fi
104+
FULL_BASE_IMAGE="${BASE_IMAGE_REPO}:${BASE_IMAGE_TAG}"
105+
echo " Found latest compatible base image: ${FULL_BASE_IMAGE}"
106+
107+
# Create a Temporary Dockerfile with the Dynamic Base Image
108+
ORIGINAL_DOCKERFILE="src/dependencies/dockerfiles/colocated_python.Dockerfile"
109+
TMP_DOCKERFILE=$(mktemp maxtext_colocated_python_Dockerfile.XXXXXX)
110+
# Ensure the temporary file is removed on script exit
111+
trap 'rm -f "${TMP_DOCKERFILE}"' EXIT
112+
113+
echo "$(date): Creating temporary Dockerfile: ${TMP_DOCKERFILE}"
114+
# Replace the hardcoded FROM line with the dynamically determined base image
115+
sed "s|^FROM us-docker.pkg.dev/cloud-tpu-v2-images/pathways-colocated-python/sidecar:.*|FROM ${FULL_BASE_IMAGE}|" "${ORIGINAL_DOCKERFILE}" > "${TMP_DOCKERFILE}"
116+
117+
echo "$(date): Running docker build with local tag '${LOCAL_IMAGE_NAME}' using ${TMP_DOCKERFILE}..."
118+
# The build context '.' is the maxtext/ root directory.
119+
# The Dockerfile should contain 'COPY . /app/maxtext/'.
120+
docker build --no-cache \
121+
-f "${TMP_DOCKERFILE}" \
122+
-t "${LOCAL_IMAGE_NAME}" \
123+
.
124+
125+
# Tag the locally built image with the final IMAGE_LOCATION.
126+
echo "$(date): Tagging '${LOCAL_IMAGE_NAME}' as '${IMAGE_LOCATION}'..."
127+
docker tag "${LOCAL_IMAGE_NAME}" "${IMAGE_LOCATION}"
128+
129+
# Push the image to the specified IMAGE_LOCATION.
130+
echo "$(date): Pushing '${IMAGE_LOCATION}'..."
131+
docker push "${IMAGE_LOCATION}"
132+
133+
# Clean up the local Docker image tag used during build.
134+
echo "$(date): Cleaning up local tag '${LOCAL_IMAGE_NAME}'..."
135+
docker image rm "${LOCAL_IMAGE_NAME}"
136+
137+
echo "$(date): Build and push complete for ${IMAGE_LOCATION}"

0 commit comments

Comments
 (0)