1515# This workflow builds and pushes MaxText images for both TPU and GPU devices.
1616# It runs automatically daily at 12am UTC, on Pull Requests, or manually via Workflow Dispatch.
1717
18- name : Build Images
18+ name : Build and Test Images
1919
2020on :
2121 schedule :
3232 - all
3333 - tpu
3434 - gpu
35+ for_dev_test :
36+ description : ' For development test purpose. All images will be added a -test suffix'
37+ required : false
38+ type : boolean
39+ default : false
3540
3641permissions :
3742 contents : read
4247 outputs :
4348 maxtext_sha : ${{ steps.vars.outputs.maxtext_sha }}
4449 image_date : ${{ steps.vars.outputs.image_date }}
50+ image_suffix : ${{ steps.vars.outputs.image_suffix }}
4551 steps :
4652 - name : Checkout MaxText
4753 uses : actions/checkout@v5
5561 # Image date
5662 echo "image_date=$(date +%Y-%m-%d)" >> $GITHUB_OUTPUT
5763
64+ # If for_dev_test is true, set suffix to -test, otherwise empty
65+ if [[ "${{ github.event.inputs.for_dev_test }}" == "true" ]]; then
66+ echo "image_suffix=-test" >> $GITHUB_OUTPUT
67+ else
68+ echo "image_suffix=" >> $GITHUB_OUTPUT
69+ fi
70+
5871 tpu-pre-training :
5972 name : ${{ matrix.image_name }}
6073 needs : setup
7285 dockerfile : ./dependencies/dockerfiles/maxtext_tpu_dependencies.Dockerfile
7386 uses : ./.github/workflows/build_and_push_docker_image.yml
7487 with :
75- image_name : ${{ matrix.image_name }}
88+ image_name : ${{ matrix.image_name }}${{ needs.setup.outputs.image_suffix }}
7689 device : ${{ matrix.device }}
7790 build_mode : ${{ matrix.build_mode }}
7891 dockerfile : ${{ matrix.dockerfile }}
8497 needs : setup
8598 uses : ./.github/workflows/build_and_push_docker_image.yml
8699 with :
87- image_name : maxtext_post_training_stable
100+ image_name : maxtext_post_training_stable${{ needs.setup.outputs.image_suffix }}
88101 device : tpu
89102 build_mode : stable
90103 workflow : post-training
@@ -97,14 +110,14 @@ jobs:
97110 needs : [setup, tpu-post-training-stable]
98111 uses : ./.github/workflows/build_and_push_docker_image.yml
99112 with :
100- image_name : maxtext_post_training_nightly
113+ image_name : maxtext_post_training_nightly${{ needs.setup.outputs.image_suffix }}
101114 device : tpu
102115 build_mode : nightly
103116 workflow : post-training
104117 dockerfile : ./dependencies/dockerfiles/maxtext_post_training_local_dependencies.Dockerfile
105118 maxtext_sha : ${{ needs.setup.outputs.maxtext_sha }}
106119 image_date : ${{ needs.setup.outputs.image_date }}
107- base_image : gcr.io/tpu-prod-env-multipod/maxtext_post_training_stable:${{ needs.setup.outputs.image_date }}
120+ base_image : gcr.io/tpu-prod-env-multipod/maxtext_post_training_stable${{ needs.setup.outputs.image_suffix }} :${{ needs.setup.outputs.image_date }}-build-${{ github.run_id }}
108121
109122 gpu-pre-training :
110123 name : ${{ matrix.image_name }}
@@ -123,9 +136,54 @@ jobs:
123136 dockerfile : ./dependencies/dockerfiles/maxtext_gpu_dependencies.Dockerfile
124137 uses : ./.github/workflows/build_and_push_docker_image.yml
125138 with :
126- image_name : ${{ matrix.image_name }}
139+ image_name : ${{ matrix.image_name }}${{ needs.setup.outputs.image_suffix }}
127140 device : ${{ matrix.device }}
128141 build_mode : ${{ matrix.build_mode }}
129142 dockerfile : ${{ matrix.dockerfile }}
130143 maxtext_sha : ${{ needs.setup.outputs.maxtext_sha }}
131144 image_date : ${{ needs.setup.outputs.image_date }}
145+
146+ # TEST JOBS
147+ pre-training-tpu-tests :
148+ needs : [setup, tpu-pre-training]
149+ strategy :
150+ fail-fast : false
151+ matrix :
152+ image : [maxtext_jax_stable, maxtext_jax_nightly]
153+ uses : ./.github/workflows/test_and_tag_docker_image.yml
154+ with :
155+ image_name : ${{ matrix.image }}${{ needs.setup.outputs.image_suffix }}
156+ image_date : ${{ needs.setup.outputs.image_date }}
157+ test_mode : tpu-pre-training
158+
159+ post-training-tpu-stable-tests :
160+ needs : [setup, tpu-post-training-stable]
161+ strategy :
162+ fail-fast : false
163+ uses : ./.github/workflows/test_and_tag_docker_image.yml
164+ with :
165+ image_name : maxtext_post_training_stable${{ needs.setup.outputs.image_suffix }}
166+ image_date : ${{ needs.setup.outputs.image_date }}
167+ test_mode : tpu-post-training
168+
169+ post-training-tpu-nightly-tests :
170+ needs : [setup, tpu-post-training-nightly]
171+ strategy :
172+ fail-fast : false
173+ uses : ./.github/workflows/test_and_tag_docker_image.yml
174+ with :
175+ image_name : maxtext_post_training_nightly${{ needs.setup.outputs.image_suffix }}
176+ image_date : ${{ needs.setup.outputs.image_date }}
177+ test_mode : tpu-post-training
178+
179+ pre-training-gpu-tests :
180+ needs : [setup, gpu-pre-training]
181+ strategy :
182+ fail-fast : false
183+ matrix :
184+ image : [maxtext_gpu_jax_stable, maxtext_gpu_jax_nightly]
185+ uses : ./.github/workflows/test_and_tag_docker_image.yml
186+ with :
187+ image_name : ${{ matrix.image }}${{ needs.setup.outputs.image_suffix }}
188+ image_date : ${{ needs.setup.outputs.image_date }}
189+ test_mode : gpu-pre-training
0 commit comments