Skip to content

Commit 2def6c9

Browse files
committed
Trigger unit tests for docker images upload workflow
- images will only be tagged with date and to "latest" when tests pass
1 parent 5563359 commit 2def6c9

4 files changed

Lines changed: 98 additions & 18 deletions

File tree

.github/workflows/UploadDockerImages.yml

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
# This workflow builds and pushes MaxText images for both TPU and GPU devices.
1616
# It runs automatically daily at 12am UTC, on Pull Requests, or manually via Workflow Dispatch.
1717

18-
name: Build Images
18+
name: Build and Test Images
1919

2020
on:
2121
schedule:
@@ -32,6 +32,11 @@ on:
3232
- all
3333
- tpu
3434
- gpu
35+
for_dev_test:
36+
description: 'For development test purpose. All images will be added a -test suffix'
37+
required: false
38+
type: boolean
39+
default: false
3540

3641
permissions:
3742
contents: read
@@ -42,6 +47,7 @@ jobs:
4247
outputs:
4348
maxtext_sha: ${{ steps.vars.outputs.maxtext_sha }}
4449
image_date: ${{ steps.vars.outputs.image_date }}
50+
image_suffix: ${{ steps.vars.outputs.image_suffix }}
4551
steps:
4652
- name: Checkout MaxText
4753
uses: actions/checkout@v5
@@ -55,6 +61,13 @@ jobs:
5561
# Image date
5662
echo "image_date=$(date +%Y-%m-%d)" >> $GITHUB_OUTPUT
5763
64+
# If for_dev_test is true, set suffix to -test, otherwise empty
65+
if [[ "${{ github.event.inputs.for_dev_test }}" == "true" ]]; then
66+
echo "image_suffix=-test" >> $GITHUB_OUTPUT
67+
else
68+
echo "image_suffix=" >> $GITHUB_OUTPUT
69+
fi
70+
5871
tpu-pre-training:
5972
name: ${{ matrix.image_name }}
6073
needs: setup
@@ -72,25 +85,27 @@ jobs:
7285
dockerfile: ./src/dependencies/dockerfiles/maxtext_tpu_dependencies.Dockerfile
7386
uses: ./.github/workflows/build_and_push_docker_image.yml
7487
with:
75-
image_name: ${{ matrix.image_name }}
88+
image_name: ${{ matrix.image_name }}${{ needs.setup.outputs.image_suffix }}
7689
device: ${{ matrix.device }}
7790
build_mode: ${{ matrix.build_mode }}
7891
dockerfile: ${{ matrix.dockerfile }}
7992
maxtext_sha: ${{ needs.setup.outputs.maxtext_sha }}
8093
image_date: ${{ needs.setup.outputs.image_date }}
94+
test_mode: tpu-pre-training
8195

8296
tpu-post-training-nightly:
8397
name: tpu-post-training-nightly
8498
needs: [setup]
8599
uses: ./.github/workflows/build_and_push_docker_image.yml
86100
with:
87-
image_name: maxtext_post_training_nightly
101+
image_name: maxtext_post_training_nightly${{ needs.setup.outputs.image_suffix }}
88102
device: tpu
89103
build_mode: nightly
90104
workflow: post-training
91105
dockerfile: ./src/dependencies/dockerfiles/maxtext_tpu_dependencies.Dockerfile
92106
maxtext_sha: ${{ needs.setup.outputs.maxtext_sha }}
93107
image_date: ${{ needs.setup.outputs.image_date }}
108+
test_mode: tpu-post-training
94109

95110
gpu-pre-training:
96111
name: ${{ matrix.image_name }}
@@ -109,9 +124,10 @@ jobs:
109124
dockerfile: ./src/dependencies/dockerfiles/maxtext_gpu_dependencies.Dockerfile
110125
uses: ./.github/workflows/build_and_push_docker_image.yml
111126
with:
112-
image_name: ${{ matrix.image_name }}
127+
image_name: ${{ matrix.image_name }}${{ needs.setup.outputs.image_suffix }}
113128
device: ${{ matrix.device }}
114129
build_mode: ${{ matrix.build_mode }}
115130
dockerfile: ${{ matrix.dockerfile }}
116131
maxtext_sha: ${{ needs.setup.outputs.maxtext_sha }}
117132
image_date: ${{ needs.setup.outputs.image_date }}
133+
test_mode: gpu-pre-training

.github/workflows/build_and_push_docker_image.yml

Lines changed: 71 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ on:
4545
required: false
4646
type: string
4747
default: ''
48+
test_mode:
49+
description: "Test mode (tpu-pre-training, tpu-post-training, gpu-pre-training)"
50+
required: true
51+
type: string
4852

4953
permissions:
5054
contents: read
@@ -61,6 +65,8 @@ jobs:
6165
github.event.inputs.target_device == 'tpu' ||
6266
github.event.inputs.target_device == 'gpu'
6367
)
68+
outputs:
69+
should_run: ${{ steps.check.outputs.should_run }} # Map the step output to the job level
6470
steps:
6571
- name: Check if build should run
6672
id: check
@@ -87,7 +93,9 @@ jobs:
8793
ref: ${{ inputs.maxtext_sha }}
8894

8995
- name: Checkout post-training dependencies
90-
if: steps.check.outputs.should_run == 'true' && inputs.image_name == 'maxtext_post_training_nightly'
96+
if: |
97+
steps.check.outputs.should_run == 'true' &&
98+
contains(inputs.image_name, 'post_training_nightly')
9199
run: |
92100
git clone https://github.com/google/tunix.git ./tunix
93101
git clone https://github.com/vllm-project/vllm.git ./vllm
@@ -127,38 +135,89 @@ jobs:
127135
LIBTPU_VERSION=NONE
128136
INCLUDE_TEST_ASSETS=true
129137
138+
test:
139+
needs: build_and_push
140+
if: |
141+
needs.build_and_push.result == 'success' &&
142+
needs.build_and_push.outputs.should_run == 'true'
143+
strategy:
144+
fail-fast: false
145+
matrix:
146+
flavor: >-
147+
${{ fromJSON('{
148+
"gpu-pre-training": ["gpu-unit", "gpu-integration"],
149+
"tpu-post-training": ["post-training-tpu-unit", "post-training-tpu-integration", "post-training-cpu-unit"],
150+
"tpu-pre-training": ["tpu-unit", "tpu-integration", "cpu-unit"]
151+
}')[inputs.test_mode] }}
152+
uses: ./.github/workflows/run_tests_coordinator.yml
153+
with:
154+
flavor: ${{ matrix.flavor }}
155+
base_image: ${{ inputs.image_name }}:${{ github.run_id }}
156+
is_scheduled_run: true
157+
maxtext_installed: true
158+
159+
notebook-test:
160+
needs: build_and_push
161+
if: |
162+
inputs.test_mode == 'tpu-post-training' &&
163+
needs.build_and_push.result == 'success' &&
164+
needs.build_and_push.outputs.should_run == 'true'
165+
uses: ./.github/workflows/run_jupyter_notebooks.yml
166+
with:
167+
device_type: tpu
168+
device_name: v6e-4
169+
base_image: ${{ inputs.image_name }}:${{ github.run_id }}
170+
cloud_runner: linux-x86-ct6e-180-4tpu
171+
maxtext_installed: true
172+
secrets:
173+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
174+
175+
tagging:
176+
needs: [test, notebook-test]
177+
if: |
178+
always() &&
179+
needs.test.result == 'success' &&
180+
(needs.notebook-test.result == 'success' || needs.notebook-test.result == 'skipped')
181+
runs-on: linux-x86-n2-16-buildkit
182+
container: google/cloud-sdk:524.0.0
183+
steps:
184+
- name: Configure Docker
185+
run: gcloud auth configure-docker us-docker.pkg.dev,gcr.io -q
186+
130187
- name: Add tags to Docker image
131-
if: steps.check.outputs.should_run == 'true'
132188
shell: bash
133189
run: |
134190
SOURCE_IMAGE="gcr.io/tpu-prod-env-multipod/${INPUTS_IMAGE_NAME}"
191+
TEMP_IMG="${SOURCE_IMAGE}:${{ github.run_id }}"
135192
136193
if [[ $INPUTS_VERSION_NAME ]]; then
137194
echo "Tagging docker images corresponding to PyPI release..."
138-
gcloud container images add-tag "$SOURCE_IMAGE:${{ github.run_id }}" "$SOURCE_IMAGE:${INPUTS_VERSION_NAME}" --quiet
195+
gcloud container images add-tag "${TEMP_IMG}" "${SOURCE_IMAGE}:${INPUTS_VERSION_NAME}" --quiet
139196
else
140197
echo "Tagging docker images corresponding to nightly release..."
141198
142199
# Add date tag
143-
gcloud container images add-tag "$SOURCE_IMAGE:${{ github.run_id }}" "$SOURCE_IMAGE:${INPUTS_IMAGE_DATE}" --quiet
200+
gcloud container images add-tag "${TEMP_IMG}" "$SOURCE_IMAGE:${INPUTS_IMAGE_DATE}" --quiet
144201
145202
# Convert date to YYYYMMDD format
146203
clean_date=$(echo "${INPUTS_IMAGE_DATE}" | sed 's/[-:]//g' | cut -c1-8)
147204
148205
# Add MaxText tag
149206
maxtext_hash=$(git rev-parse --short HEAD)
150-
gcloud container images add-tag "$SOURCE_IMAGE:${{ github.run_id }}" "$SOURCE_IMAGE:maxtext_${maxtext_hash}_${clean_date}" --quiet
151-
152-
# Add post-training dependencies tags
153-
if [ "${{ inputs.workflow }}" == "post-training" ]; then
154-
for dir in tunix vllm tpu-inference; do
155-
if [ -d "./$dir" ]; then
156-
dir_hash=$(git -C "$dir" rev-parse --short HEAD)
157-
gcloud container images add-tag "$SOURCE_IMAGE:${{ github.run_id }}" "$SOURCE_IMAGE:${dir}_${dir_hash}_${clean_date}" --quiet
207+
gcloud container images add-tag "${TEMP_IMG}" "${SOURCE_IMAGE}:maxtext_${maxtext_hash}_${clean_date}" --quiet
208+
209+
# Add post-training dependencies tags
210+
if [ "${{ inputs.workflow }}" == "post-training" ]; then
211+
for dir in tunix vllm tpu-inference; do
212+
if [ -d "./$dir" ]; then
213+
dir_hash=$(git -C "$dir" rev-parse --short HEAD)
214+
gcloud container images add-tag "${TEMP_IMG}" "${SOURCE_IMAGE}:${dir}_${dir_hash}_${clean_date}" --quiet
158215
fi
159216
done
160217
fi
161218
fi
219+
# Latest Tag
220+
gcloud container images add-tag "${TEMP_IMG}" "${SOURCE_IMAGE}:latest" --quiet
162221
env:
163222
INPUTS_IMAGE_NAME: ${{ inputs.image_name }}
164223
INPUTS_IMAGE_DATE: ${{ inputs.image_date }}

.github/workflows/pypi_release.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,3 +123,4 @@ jobs:
123123
dockerfile: ${{ matrix.dockerfile }}
124124
maxtext_sha: ${{ github.sha }}
125125
version_name: ${{ needs.get_latest_maxtext_pypi_version.outputs.latest_pypi_version }}
126+
test_mode: ${{ matrix.device}}-${{ matrix.workflow }}

.github/workflows/run_tests_against_package.yml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,16 +128,21 @@ jobs:
128128
if [ "${INPUTS_MAXTEXT_INSTALLED}" == "true" ]; then
129129
# Move to the directory where code is baked into the image. See the Dockerfile.
130130
cd /deps
131+
REPO_ROOT="/deps"
132+
131133
PYTHON_EXE="python3"
132134
# Disable coverage flags when testing against a pre-installed package
133135
PYTEST_COV_ARGS=""
134136
else
137+
REPO_ROOT="${{ github.workspace }}"
138+
135139
# Use the local virtual environment created in Step 3
136140
PYTHON_EXE=".venv/bin/python3"
137141
# Ensure pytest-cov is available and enable coverage flags
138142
$PYTHON_EXE -m pip install --quiet pytest-cov
139143
PYTEST_COV_ARGS="--cov=src/MaxText --cov=maxtext --cov-report=xml --cov-report=term"
140144
fi
145+
export PYTHONPATH="${REPO_ROOT}/src${PYTHONPATH:+:${PYTHONPATH}}"
141146
142147
if [ "${INPUTS_IS_SCHEDULED_RUN}" == "true" ]; then
143148
FINAL_PYTEST_MARKER="${INPUTS_PYTEST_MARKER}"
@@ -159,7 +164,7 @@ jobs:
159164
else
160165
SPLIT_ARGS=""
161166
fi
162-
$PYTHON_EXE -m pytest ${INPUTS_PYTEST_ADDOPTS} \
167+
$PYTHON_EXE -m pytest ${REPO_ROOT}/tests ${REPO_ROOT}/src ${INPUTS_PYTEST_ADDOPTS} \
163168
-v \
164169
-m "${FINAL_PYTEST_MARKER}" \
165170
--durations=0 \
@@ -168,7 +173,6 @@ jobs:
168173
${INPUTS_PYTEST_EXTRA_ARGS}
169174
170175
env:
171-
PYTHONPATH: "${{ github.workspace }}/src"
172176
INPUTS_IS_SCHEDULED_RUN: ${{ inputs.is_scheduled_run }}
173177
INPUTS_PYTEST_MARKER: ${{ inputs.pytest_marker }}
174178
INPUTS_DEVICE_TYPE: ${{ inputs.device_type }}

0 commit comments

Comments
 (0)