Skip to content

Commit f8d5241

Browse files
committed
Trigger unit tests for docker images upload workflow
- images will only be tagged to "latest" when units pass
1 parent 119b2c3 commit f8d5241

2 files changed

Lines changed: 149 additions & 5 deletions

File tree

.github/workflows/UploadDockerImages.yml

Lines changed: 56 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
# This workflow builds and pushes MaxText images for both TPU and GPU devices.
1616
# It runs automatically daily at 12am UTC, on Pull Requests, or manually via Workflow Dispatch.
1717

18-
name: Build Images
18+
name: Build and Test Images
1919

2020
on:
2121
schedule:
@@ -32,6 +32,11 @@ on:
3232
- all
3333
- tpu
3434
- gpu
35+
for_dev_test:
36+
description: 'For development test purpose. All images will be added a -test suffix'
37+
required: false
38+
type: boolean
39+
default: false
3540

3641
permissions:
3742
contents: read
@@ -42,6 +47,7 @@ jobs:
4247
outputs:
4348
maxtext_sha: ${{ steps.vars.outputs.maxtext_sha }}
4449
image_date: ${{ steps.vars.outputs.image_date }}
50+
image_suffix: ${{ steps.vars.outputs.image_suffix }}
4551
steps:
4652
- name: Checkout MaxText
4753
uses: actions/checkout@v5
@@ -55,6 +61,13 @@ jobs:
5561
# Image date
5662
echo "image_date=$(date +%Y-%m-%d)" >> $GITHUB_OUTPUT
5763
64+
# If for_dev_test is true, set suffix to -test, otherwise empty
65+
if [[ "${{ github.event.inputs.for_dev_test }}" == "true" ]]; then
66+
echo "image_suffix=-test" >> $GITHUB_OUTPUT
67+
else
68+
echo "image_suffix=" >> $GITHUB_OUTPUT
69+
fi
70+
5871
tpu-pre-training:
5972
name: ${{ matrix.image_name }}
6073
needs: setup
@@ -72,7 +85,7 @@ jobs:
7285
dockerfile: ./dependencies/dockerfiles/maxtext_tpu_dependencies.Dockerfile
7386
uses: ./.github/workflows/build_and_push_docker_image.yml
7487
with:
75-
image_name: ${{ matrix.image_name }}
88+
image_name: ${{ matrix.image_name }}${{ needs.setup.outputs.image_suffix }}
7689
device: ${{ matrix.device }}
7790
build_mode: ${{ matrix.build_mode }}
7891
dockerfile: ${{ matrix.dockerfile }}
@@ -96,14 +109,13 @@ jobs:
96109
dockerfile: ./dependencies/dockerfiles/maxtext_post_training_local_dependencies.Dockerfile
97110
uses: ./.github/workflows/build_and_push_docker_image.yml
98111
with:
99-
image_name: ${{ matrix.image_name }}
112+
image_name: ${{ matrix.image_name }}${{ needs.setup.outputs.image_suffix }}
100113
device: ${{ matrix.device }}
101114
build_mode: ${{ matrix.build_mode }}
102115
dockerfile: ${{ matrix.dockerfile }}
103116
maxtext_sha: ${{ needs.setup.outputs.maxtext_sha }}
104117
image_date: ${{ needs.setup.outputs.image_date }}
105118
base_image: gcr.io/tpu-prod-env-multipod/maxtext_jax_stable:${{ needs.setup.outputs.image_date }}
106-
is_post_training: true
107119

108120
gpu-pre-training:
109121
name: ${{ matrix.image_name }}
@@ -122,9 +134,48 @@ jobs:
122134
dockerfile: ./dependencies/dockerfiles/maxtext_gpu_dependencies.Dockerfile
123135
uses: ./.github/workflows/build_and_push_docker_image.yml
124136
with:
125-
image_name: ${{ matrix.image_name }}
137+
image_name: ${{ matrix.image_name }}${{ needs.setup.outputs.image_suffix }}
126138
device: ${{ matrix.device }}
127139
build_mode: ${{ matrix.build_mode }}
128140
dockerfile: ${{ matrix.dockerfile }}
129141
maxtext_sha: ${{ needs.setup.outputs.maxtext_sha }}
130142
image_date: ${{ needs.setup.outputs.image_date }}
143+
144+
# TEST JOBS
145+
pre-training-tpu-tests:
146+
needs: [setup, tpu-pre-training]
147+
strategy:
148+
fail-fast: false
149+
matrix:
150+
image: [maxtext_jax_stable, maxtext_jax_nightly]
151+
uses: ./.github/workflows/test_and_tag_docker_image.yml
152+
with:
153+
image_name: ${{ matrix.image }}${{ needs.setup.outputs.image_suffix }}
154+
image_date: ${{ needs.setup.outputs.image_date }}
155+
test_mode: tpu-pre-training
156+
157+
post-training-tpu-tests:
158+
needs: [setup, tpu-post-training]
159+
strategy:
160+
fail-fast: false
161+
matrix:
162+
image: [maxtext_post_training_stable, maxtext_post_training_nightly]
163+
uses: ./.github/workflows/test_and_tag_docker_image.yml
164+
with:
165+
image_name: ${{ matrix.image }}${{ needs.setup.outputs.image_suffix }}
166+
image_date: ${{ needs.setup.outputs.image_date }}
167+
test_mode: tpu-post-training
168+
169+
170+
pre-training-gpu-tests:
171+
needs: [setup, gpu-pre-training]
172+
strategy:
173+
fail-fast: false
174+
matrix:
175+
image: [maxtext_gpu_jax_stable, maxtext_gpu_jax_nightly]
176+
uses: ./.github/workflows/test_and_tag_docker_image.yml
177+
with:
178+
image_name: ${{ matrix.image }}${{ needs.setup.outputs.image_suffix }}
179+
image_date: ${{ needs.setup.outputs.image_date }}
180+
test_mode: gpu-pre-training
181+
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
# Copyright 2025 Google LLC
2+
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# This workflow will test and tag MaxText Docker image to GCR.
16+
name: Test and Tag MaxText Docker Images
17+
18+
on:
19+
workflow_call:
20+
inputs:
21+
image_name:
22+
required: true
23+
type: string
24+
test_mode:
25+
description: "Test mode (tpu-pre-training, tpu-post-training, gpu-pre-training)"
26+
required: true
27+
type: string
28+
image_date:
29+
required: true
30+
type: string
31+
32+
permissions:
33+
contents: read
34+
35+
jobs:
36+
test:
37+
strategy:
38+
fail-fast: false
39+
matrix:
40+
flavor: >-
41+
${{ fromJSON('{
42+
"gpu-pre-training": ["gpu-unit", "gpu-integration"],
43+
"tpu-post-training": ["post-training-tpu-unit", "post-training-tpu-integration"],
44+
"tpu-pre-training": ["tpu-unit", "tpu-integration", "cpu-unit"]
45+
}')[inputs.test_mode] }}
46+
uses: ./.github/workflows/run_tests_coordinator.yml
47+
with:
48+
flavor: ${{ matrix.flavor }}
49+
base_image: ${{ inputs.image_name }}:${{ inputs.image_date }}-build-${{ github.run_id }}
50+
is_scheduled_run: true
51+
maxtext_installed: true
52+
53+
notebook-test:
54+
if: inputs.test_mode == 'tpu-post-training'
55+
uses: ./.github/workflows/run_jupyter_notebooks.yml
56+
with:
57+
device_type: tpu
58+
device_name: v6e-4
59+
base_image: ${{ inputs.image_name }}:${{ inputs.image_date }}-build-${{ github.run_id }}
60+
cloud_runner: linux-x86-ct6e-180-4tpu
61+
maxtext_installed: true
62+
secrets:
63+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
64+
65+
tagging:
66+
needs: [test, notebook-test]
67+
if: |
68+
always() &&
69+
needs.test.result == 'success' &&
70+
(needs.notebook-test.result == 'success' || needs.notebook-test.result == 'skipped')
71+
runs-on: ubuntu-latest
72+
container: google/cloud-sdk:524.0.0
73+
steps:
74+
- name: Configure Docker
75+
run: gcloud auth configure-docker us-docker.pkg.dev,gcr.io -q
76+
77+
- name: Create Production Tags
78+
shell: bash
79+
run: |
80+
SOURCE_IMAGE="gcr.io/tpu-prod-env-multipod/${{ inputs.image_name }}"
81+
TEMP_IMG="$SOURCE_IMAGE:${{ inputs.image_date }}-build-${{ github.run_id }}"
82+
83+
# Validate existence first
84+
gcloud container images describe "$TEMP_IMG" > /dev/null
85+
86+
# 1. Date Tag
87+
gcloud container images add-tag "$TEMP_IMG" "$SOURCE_IMAGE:${{ inputs.image_date }}" --quiet
88+
89+
# 2. Latest Tag
90+
gcloud container images add-tag "$TEMP_IMG" "$SOURCE_IMAGE:latest" --quiet
91+
92+
# 3. Clean up Temporary Tag
93+
gcloud container images untag "$TEMP_IMG" --quiet

0 commit comments

Comments
 (0)