Skip to content

Commit b74e21b

Browse files
committed
Trigger unit tests for docker images upload workflow
- images will only be tagged with date and to "latest" when tests pass
1 parent e735af1 commit b74e21b

7 files changed

Lines changed: 101 additions & 40 deletions

.github/workflows/UploadDockerImages.yml

Lines changed: 19 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,11 @@ on:
3232
- all
3333
- tpu
3434
- gpu
35+
image_suffix:
36+
description: 'An image suffix can be provided to add to the image name'
37+
required: false
38+
type: string
39+
default: ""
3540

3641
permissions:
3742
contents: read
@@ -55,7 +60,7 @@ jobs:
5560
# Image date
5661
echo "image_date=$(date +%Y-%m-%d)" >> $GITHUB_OUTPUT
5762
58-
tpu-pre-training:
63+
build-and-test:
5964
name: ${{ matrix.image_name }}
6065
needs: setup
6166
strategy:
@@ -64,54 +69,37 @@ jobs:
6469
include:
6570
- device: tpu
6671
build_mode: stable
72+
workflow: pre-training
6773
image_name: maxtext_jax_stable
6874
dockerfile: ./src/dependencies/dockerfiles/maxtext_tpu_dependencies.Dockerfile
6975
- device: tpu
7076
build_mode: nightly
77+
workflow: pre-training
7178
image_name: maxtext_jax_nightly
7279
dockerfile: ./src/dependencies/dockerfiles/maxtext_tpu_dependencies.Dockerfile
73-
uses: ./.github/workflows/build_and_push_docker_image.yml
74-
with:
75-
image_name: ${{ matrix.image_name }}
76-
device: ${{ matrix.device }}
77-
build_mode: ${{ matrix.build_mode }}
78-
dockerfile: ${{ matrix.dockerfile }}
79-
maxtext_sha: ${{ needs.setup.outputs.maxtext_sha }}
80-
image_date: ${{ needs.setup.outputs.image_date }}
81-
82-
tpu-post-training-nightly:
83-
name: tpu-post-training-nightly
84-
needs: [setup]
85-
uses: ./.github/workflows/build_and_push_docker_image.yml
86-
with:
87-
image_name: maxtext_post_training_nightly
88-
device: tpu
89-
build_mode: nightly
90-
workflow: post-training
91-
dockerfile: ./src/dependencies/dockerfiles/maxtext_tpu_dependencies.Dockerfile
92-
maxtext_sha: ${{ needs.setup.outputs.maxtext_sha }}
93-
image_date: ${{ needs.setup.outputs.image_date }}
94-
95-
gpu-pre-training:
96-
name: ${{ matrix.image_name }}
97-
needs: setup
98-
strategy:
99-
fail-fast: false
100-
matrix:
101-
include:
80+
- device: tpu
81+
build_mode: nightly
82+
workflow: post-training
83+
image_name: maxtext_post_training_nightly
84+
dockerfile: ./src/dependencies/dockerfiles/maxtext_tpu_dependencies.Dockerfile
10285
- device: gpu
10386
build_mode: stable
87+
workflow: pre-training
10488
image_name: maxtext_gpu_jax_stable
10589
dockerfile: ./src/dependencies/dockerfiles/maxtext_gpu_dependencies.Dockerfile
10690
- device: gpu
10791
build_mode: nightly
92+
workflow: pre-training
10893
image_name: maxtext_gpu_jax_nightly
10994
dockerfile: ./src/dependencies/dockerfiles/maxtext_gpu_dependencies.Dockerfile
11095
uses: ./.github/workflows/build_and_push_docker_image.yml
11196
with:
112-
image_name: ${{ matrix.image_name }}
97+
image_name: ${{ matrix.image_name }}${{ inputs.image_suffix }}
11398
device: ${{ matrix.device }}
11499
build_mode: ${{ matrix.build_mode }}
100+
workflow: ${{ matrix.workflow }}
115101
dockerfile: ${{ matrix.dockerfile }}
116102
maxtext_sha: ${{ needs.setup.outputs.maxtext_sha }}
117103
image_date: ${{ needs.setup.outputs.image_date }}
104+
secrets:
105+
HF_TOKEN: ${{ secrets.HF_TOKEN }}

.github/workflows/build_and_push_docker_image.yml

Lines changed: 70 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ on:
4545
required: false
4646
type: string
4747
default: ''
48+
secrets:
49+
HF_TOKEN:
50+
required: true
4851

4952
permissions:
5053
contents: read
@@ -62,6 +65,8 @@ jobs:
6265
github.event.inputs.target_device == 'tpu' ||
6366
github.event.inputs.target_device == 'gpu'
6467
)
68+
outputs:
69+
should_run: ${{ steps.check.outputs.should_run }} # Map the step output to the job level
6570
steps:
6671
- name: Check if build should run
6772
id: check
@@ -80,6 +85,14 @@ jobs:
8085
INPUTS_IMAGE_NAME: ${{ inputs.image_name }}
8186
INPUTS_BUILD_MODE: ${{ inputs.build_mode }}
8287

88+
- name: Matrix Debugger
89+
run: |
90+
echo "device: ${{ inputs.device }}"
91+
echo "workflow: ${{ inputs.workflow }}"
92+
echo "build_mode: ${{ inputs.build_mode }}"
93+
echo "image_name: ${{ inputs.image_name }}"
94+
echo "dockerfile: ${{ inputs.dockerfile }}"
95+
8396
- name: Checkout MaxText
8497
uses: actions/checkout@v5
8598
if: steps.check.outputs.should_run == 'true'
@@ -121,29 +134,81 @@ jobs:
121134
LIBTPU_VERSION=NONE
122135
INCLUDE_TEST_ASSETS=true
123136
137+
test:
138+
needs: build_and_push
139+
if: |
140+
needs.build_and_push.result == 'success' &&
141+
needs.build_and_push.outputs.should_run == 'true'
142+
strategy:
143+
fail-fast: false
144+
matrix:
145+
flavor: >-
146+
${{ fromJSON('{
147+
"gpu-pre-training": ["gpu-unit", "gpu-integration"],
148+
"tpu-post-training": ["tpu-post-training-unit", "tpu-post-training-integration", "cpu-post-training-unit"],
149+
"tpu-pre-training": ["tpu-unit", "tpu-integration", "cpu-unit"]
150+
}')[format('{0}-{1}', inputs.device, inputs.workflow)] }}
151+
uses: ./.github/workflows/run_tests_coordinator.yml
152+
with:
153+
flavor: ${{ matrix.flavor }}
154+
base_image: ${{ inputs.image_name }}:${{ github.run_id }}
155+
is_scheduled_run: true
156+
maxtext_installed: true
157+
158+
notebook-test:
159+
needs: build_and_push
160+
if: |
161+
inputs.device == 'tpu' &&
162+
inputs.workflow == 'post-training' &&
163+
needs.build_and_push.result == 'success' &&
164+
needs.build_and_push.outputs.should_run == 'true'
165+
uses: ./.github/workflows/run_jupyter_notebooks.yml
166+
with:
167+
device_type: tpu
168+
device_name: v6e-4
169+
base_image: ${{ inputs.image_name }}:${{ github.run_id }}
170+
cloud_runner: linux-x86-ct6e-180-4tpu
171+
maxtext_installed: true
172+
secrets:
173+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
174+
175+
tagging:
176+
needs: [test, notebook-test]
177+
if: |
178+
always() &&
179+
needs.test.result == 'success' &&
180+
(needs.notebook-test.result == 'success' || needs.notebook-test.result == 'skipped')
181+
runs-on: linux-x86-n2-16-buildkit
182+
container: google/cloud-sdk:524.0.0
183+
steps:
184+
- name: Configure Docker
185+
run: gcloud auth configure-docker us-docker.pkg.dev,gcr.io -q
186+
124187
- name: Add tags to Docker image
125-
if: steps.check.outputs.should_run == 'true'
126188
shell: bash
127189
run: |
128190
SOURCE_IMAGE="gcr.io/tpu-prod-env-multipod/${INPUTS_IMAGE_NAME}"
191+
TEMP_IMG="${SOURCE_IMAGE}:${{ github.run_id }}"
129192
130193
if [[ $INPUTS_VERSION_NAME ]]; then
131194
echo "Tagging docker images corresponding to PyPI release..."
132-
gcloud container images add-tag "$SOURCE_IMAGE:${{ github.run_id }}" "$SOURCE_IMAGE:${INPUTS_VERSION_NAME}" --quiet
195+
gcloud container images add-tag "${TEMP_IMG}" "${SOURCE_IMAGE}:${INPUTS_VERSION_NAME}" --quiet
133196
else
134197
echo "Tagging docker images corresponding to nightly release..."
135198
136199
# Add date tag
137-
gcloud container images add-tag "$SOURCE_IMAGE:${{ github.run_id }}" "$SOURCE_IMAGE:${INPUTS_IMAGE_DATE}" --quiet
200+
gcloud container images add-tag "${TEMP_IMG}" "$SOURCE_IMAGE:${INPUTS_IMAGE_DATE}" --quiet
138201
139202
# Convert date to YYYYMMDD format
140203
clean_date=$(echo "${INPUTS_IMAGE_DATE}" | sed 's/[-:]//g' | cut -c1-8)
141204
142205
# Add MaxText tag
143-
maxtext_hash=$(git rev-parse --short HEAD)
144-
gcloud container images add-tag "$SOURCE_IMAGE:${{ github.run_id }}" "$SOURCE_IMAGE:maxtext_${maxtext_hash}_${clean_date}" --quiet
206+
gcloud container images add-tag "${TEMP_IMG}" "${SOURCE_IMAGE}:maxtext_${MAXTEXT_SHA}_${clean_date}" --quiet
145207
fi
208+
# Latest Tag
209+
gcloud container images add-tag "${TEMP_IMG}" "${SOURCE_IMAGE}:latest" --quiet
146210
env:
147211
INPUTS_IMAGE_NAME: ${{ inputs.image_name }}
148212
INPUTS_IMAGE_DATE: ${{ inputs.image_date }}
149213
INPUTS_VERSION_NAME: ${{ inputs.version_name }}
214+
MAXTEXT_SHA: ${{ inputs.maxtext_sha }}

.github/workflows/pypi_release.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,3 +123,5 @@ jobs:
123123
dockerfile: ${{ matrix.dockerfile }}
124124
maxtext_sha: ${{ github.sha }}
125125
version_name: ${{ needs.get_latest_maxtext_pypi_version.outputs.latest_pypi_version }}
126+
secrets:
127+
HF_TOKEN: ${{ secrets.HF_TOKEN }}

.github/workflows/run_jupyter_notebooks.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,6 @@ jobs:
7878
- name: Run Post-Training Notebooks
7979
shell: bash
8080
env:
81-
PYTHONPATH: "${{ github.workspace }}/src"
8281
HF_TOKEN: ${{ secrets.HF_TOKEN }}
8382
MAXTEXT_INSTALLED: ${{ inputs.maxtext_installed }}
8483
run: |
@@ -93,6 +92,7 @@ jobs:
9392
PAPERMILL_EXE=".venv/bin/papermill"
9493
source .venv/bin/activate
9594
fi
95+
export PYTHONPATH="${pwd}/src${PYTHONPATH:+:${PYTHONPATH}}"
9696
9797
export MAXTEXT_REPO_ROOT=$(pwd)
9898
export MAXTEXT_PKG_DIR=$(pwd)/src/maxtext

.github/workflows/run_tests_against_package.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ jobs:
137137
uv pip install pytest-cov
138138
PYTEST_COV_ARGS="--cov=MaxText --cov=maxtext --cov-report=xml --cov-report=term"
139139
fi
140+
export PYTHONPATH="${pwd}/src${PYTHONPATH:+:${PYTHONPATH}}"
140141
141142
if [ "${INPUTS_IS_SCHEDULED_RUN}" == "true" ]; then
142143
FINAL_PYTEST_MARKER="${INPUTS_PYTEST_MARKER}"
@@ -168,7 +169,6 @@ jobs:
168169
${INPUTS_PYTEST_EXTRA_ARGS}
169170
170171
env:
171-
PYTHONPATH: "${{ github.workspace }}/src"
172172
INPUTS_IS_SCHEDULED_RUN: ${{ inputs.is_scheduled_run }}
173173
INPUTS_PYTEST_MARKER: ${{ inputs.pytest_marker }}
174174
INPUTS_DEVICE_TYPE: ${{ inputs.device_type }}

src/dependencies/dockerfiles/maxtext_gpu_dependencies.Dockerfile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,12 @@ RUN --mount=type=cache,target=/root/.cache/uv \
6464
# Now copy the remaining code (source files that may change frequently)
6565
COPY ${PACKAGE_DIR}/maxtext/ src/maxtext/
6666
COPY ${PACKAGE_DIR}/MaxText/ src/MaxText/
67+
# Now copy resource needed for pytest:
6768
COPY tests*/ tests/
69+
COPY pytest.ini pytest.ini
6870
COPY benchmarks*/ benchmarks/
6971

72+
7073
# Download test assets from GCS if building image with test assets
7174
ARG INCLUDE_TEST_ASSETS=false
7275
RUN if [ "$INCLUDE_TEST_ASSETS" = "true" ]; then \
@@ -76,4 +79,4 @@ RUN if [ "$INCLUDE_TEST_ASSETS" = "true" ]; then \
7679
fi; \
7780
fi
7881

79-
ENV PYTHONPATH="/deps/src:${PYTHONPATH}"
82+
ENV PYTHONPATH="/deps/src${PYTHONPATH:+:${PYTHONPATH}}"

src/dependencies/dockerfiles/maxtext_tpu_dependencies.Dockerfile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,10 @@ RUN --mount=type=cache,target=/root/.cache/uv \
6464
# Now copy the remaining code (source files that may change frequently)
6565
COPY ${PACKAGE_DIR}/maxtext/ src/maxtext/
6666
COPY ${PACKAGE_DIR}/MaxText/ src/MaxText/
67+
# Now copy resource needed for pytest:
68+
COPY tools*/ tools/
6769
COPY tests*/ tests/
70+
COPY pytest.ini pytest.ini
6871
COPY benchmarks*/ benchmarks/
6972

7073
# Download test assets from GCS if building image with test assets
@@ -76,4 +79,4 @@ RUN if [ "$INCLUDE_TEST_ASSETS" = "true" ]; then \
7679
fi; \
7780
fi
7881

79-
ENV PYTHONPATH="/deps/src:${PYTHONPATH}"
82+
ENV PYTHONPATH="/deps/src${PYTHONPATH:+:${PYTHONPATH}}"

0 commit comments

Comments
 (0)