@@ -49,19 +49,36 @@ jobs:
4949 build_mode : stable
5050 image_name : maxtext_jax_stable
5151 dockerfile : ./dependencies/dockerfiles/maxtext_dependencies.Dockerfile
52+ run_secondary : false
5253 - device : tpu
5354 build_mode : nightly
5455 image_name : maxtext_jax_nightly
5556 dockerfile : ./dependencies/dockerfiles/maxtext_dependencies.Dockerfile
57+ run_secondary : false
58+ # Post-Training Image Builds
59+ - device : tpu
60+ build_mode : stable
61+ image_name : maxtext_post_training
62+ dockerfile : ./dependencies/dockerfiles/maxtext_dependencies.Dockerfile
63+ secondary_dockerfile : ./dependencies/dockerfiles/maxtext_post_training_dependencies.Dockerfile
64+ run_secondary : true
65+ - device : tpu
66+ build_mode : stable
67+ image_name : maxtext_post_training_local
68+ dockerfile : ./dependencies/dockerfiles/maxtext_dependencies.Dockerfile
69+ secondary_dockerfile : ./dependencies/dockerfiles/maxtext_post_training_local_dependencies.Dockerfile
70+ run_secondary : true
5671 # GPU Image Builds
5772 - device : gpu
5873 build_mode : stable
5974 image_name : maxtext_gpu_jax_stable
6075 dockerfile : ./dependencies/dockerfiles/maxtext_gpu_dependencies.Dockerfile
76+ run_secondary : false
6177 - device : gpu
6278 build_mode : nightly
6379 image_name : maxtext_gpu_jax_nightly
6480 dockerfile : ./dependencies/dockerfiles/maxtext_gpu_dependencies.Dockerfile
81+ run_secondary : false
6582
6683 if : >
6784 github.event_name == 'schedule' ||
@@ -86,13 +103,20 @@ jobs:
86103 echo "Building for device: ${{ matrix.device }} in ${{ matrix.build_mode }} mode."
87104 fi
88105
89- - name : Checkout git repository
106+ - name : Checkout MaxText
90107 uses : actions/checkout@v5
91108 if : steps.check.outputs.should_run == 'true'
92109
93- - name : Mark git repository as safe
110+ - name : Checkout post-training dependencies
111+ if : steps.check.outputs.should_run == 'true' && matrix.image_name == "maxtext_post_training_local"
112+ run : |
113+ git clone https://github.com/google/tunix.git ./tunix
114+ git clone https://github.com/vllm-project/vllm.git ./vllm
115+ git clone https://github.com/google/tpu-inference.git ./tpu-inference
116+
117+ - name : Mark git repositories as safe
94118 if : steps.check.outputs.should_run == 'true'
95- run : git config --global --add safe.directory ${GITHUB_WORKSPACE}
119+ run : git config --global --add safe.directory '*'
96120
97121 - name : Configure Docker
98122 if : steps.check.outputs.should_run == 'true'
@@ -110,21 +134,31 @@ jobs:
110134 id : vars
111135 if : steps.check.outputs.should_run == 'true'
112136 run : |
113- echo "commit_hash=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
114137 echo "image_date=$(date +%Y-%m-%d)" >> $GITHUB_OUTPUT
115138
116- # Docker BuildX command config
117- - name : Build and Push Docker Image
139+ # MaxText commit hash
140+ echo "maxtext_hash=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
141+
142+ # Post-training dependencies commit hashes
143+ for dir in tunix vllm tpu-inference; do
144+ if [ -d "./$dir" ]; then
145+ cd $dir
146+ echo "${dir}_hash=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
147+ cd ..
148+ else
149+ echo "${dir}_hash=" >> $GITHUB_OUTPUT
150+ fi
151+ done
152+
153+ # Docker BuildX command config for pre-training images
154+ - name : Build and Push Primary Docker Image
118155 uses : docker/build-push-action@v6
119156 if : steps.check.outputs.should_run == 'true'
120157 with :
121- push : true
158+ push : {{ !matrix.run_secondary }}
122159 context : .
123160 file : ${{ matrix.dockerfile }}
124- tags : |
125- gcr.io/tpu-prod-env-multipod/${{ matrix.image_name }}:maxtext_${{ steps.vars.outputs.commit_hash }}
126- gcr.io/tpu-prod-env-multipod/${{ matrix.image_name }}:${{ steps.vars.outputs.image_date }}
127- gcr.io/tpu-prod-env-multipod/${{ matrix.image_name }}:latest
161+ tags : gcr.io/tpu-prod-env-multipod/${{ matrix.image_name }}:latest
128162 cache-from : type=gha
129163 cache-to : type=gha,mode=max
130164 provenance : false
@@ -133,3 +167,40 @@ jobs:
133167 MODE=${{ matrix.build_mode }}
134168 JAX_VERSION=NONE
135169 LIBTPU_GCS_PATH=NONE
170+
171+ # Docker BuildX command config for post-training images
172+ - name : Build and Push Secondary Docker Image
173+ uses : docker/build-push-action@v6
174+ if : steps.check.outputs.should_run == 'true' && matrix.run_secondary
175+ with :
176+ push : true
177+ context : .
178+ file : ${{ matrix.secondary_dockerfile }}
179+ tags : gcr.io/tpu-prod-env-multipod/${{ matrix.image_name }}:latest
180+ cache-from : type=gha
181+ cache-to : type=gha,mode=max
182+ provenance : false
183+ build-args : |
184+ MODE=post-training
185+ BASEIMAGE=maxtext_base_image
186+
187+ - name : Add tags to Docker Image
188+ if : steps.check.outputs.should_run == 'true'
189+ run : |
190+ SOURCE_IMAGE="gcr.io/tpu-prod-env-multipod/${{ matrix.image_name }}"
191+
192+ # Add date tag
193+ gcloud container images add-tag "$SOURCE_IMAGE:latest" \
194+ "gcr.io/tpu-prod-env-multipod/${{ matrix.image_name }}:${{ steps.vars.outputs.image_date }}" --quiet
195+
196+ # Add MaxText tag
197+ gcloud container images add-tag "$SOURCE_IMAGE:latest" \
198+ "gcr.io/tpu-prod-env-multipod/${{ matrix.image_name }}:maxtext_${{ steps.vars.outputs.maxtext_hash }}" --quiet
199+
200+ # Add post-training dependencies tags
201+ for dep in tunix vllm tpu_inference; do
202+ HASH=$(echo "${{ steps.vars.outputs }}" | jq -r ".$dep" || echo "${{ steps.vars.outputs[format('{0}_hash', dep)] }}")
203+ if [ -n "$HASH" ]; then
204+ gcloud container images add-tag "$SOURCE_IMAGE:latest" "$SOURCE_IMAGE:${dep//_/-}_$HASH" --quiet
205+ fi
206+ done
0 commit comments