|
45 | 45 | required: false |
46 | 46 | type: string |
47 | 47 | default: '' |
| 48 | + secrets: |
| 49 | + HF_TOKEN: |
| 50 | + required: true |
48 | 51 |
|
49 | 52 | permissions: |
50 | 53 | contents: read |
|
62 | 65 | github.event.inputs.target_device == 'tpu' || |
63 | 66 | github.event.inputs.target_device == 'gpu' |
64 | 67 | ) |
| 68 | + outputs: |
| 69 | + should_run: ${{ steps.check.outputs.should_run }} # Map the step output to the job level |
65 | 70 | steps: |
66 | 71 | - name: Check if build should run |
67 | 72 | id: check |
|
80 | 85 | INPUTS_IMAGE_NAME: ${{ inputs.image_name }} |
81 | 86 | INPUTS_BUILD_MODE: ${{ inputs.build_mode }} |
82 | 87 |
|
| 88 | + - name: Matrix Debugger |
| 89 | + run: | |
| 90 | + echo "device: ${{ inputs.device }}" |
| 91 | + echo "workflow: ${{ inputs.workflow }}" |
| 92 | + echo "build_mode: ${{ inputs.build_mode }}" |
| 93 | + echo "image_name: ${{ inputs.image_name }}" |
| 94 | + echo "dockerfile: ${{ inputs.dockerfile }}" |
| 95 | +
|
83 | 96 | - name: Checkout MaxText |
84 | 97 | uses: actions/checkout@v5 |
85 | 98 | if: steps.check.outputs.should_run == 'true' |
@@ -126,27 +139,85 @@ jobs: |
126 | 139 | shell: bash |
127 | 140 | run: | |
128 | 141 | SOURCE_IMAGE="gcr.io/tpu-prod-env-multipod/${INPUTS_IMAGE_NAME}" |
| 142 | + TEMP_IMG="${SOURCE_IMAGE}:${{ github.run_id }}" |
129 | 143 |
|
130 | 144 | if [[ $INPUTS_VERSION_NAME ]]; then |
131 | 145 | echo "Tagging docker images corresponding to PyPI release..." |
132 | | - gcloud container images add-tag "$SOURCE_IMAGE:${{ github.run_id }}" "$SOURCE_IMAGE:${INPUTS_VERSION_NAME}" --quiet |
| 146 | + gcloud container images add-tag "${TEMP_IMG}" "${SOURCE_IMAGE}:${INPUTS_VERSION_NAME}" --quiet |
133 | 147 | else |
134 | 148 | echo "Tagging docker images corresponding to nightly release..." |
135 | 149 |
|
136 | 150 | # Add date tag |
137 | | - gcloud container images add-tag "$SOURCE_IMAGE:${{ github.run_id }}" "$SOURCE_IMAGE:${INPUTS_IMAGE_DATE}" --quiet |
| 151 | + gcloud container images add-tag "${TEMP_IMG}" "$SOURCE_IMAGE:${INPUTS_IMAGE_DATE}" --quiet |
138 | 152 |
|
139 | 153 | # Convert date to YYYYMMDD format |
140 | 154 | clean_date=$(echo "${INPUTS_IMAGE_DATE}" | sed 's/[-:]//g' | cut -c1-8) |
141 | 155 |
|
142 | 156 | # Add MaxText tag |
143 | | - maxtext_hash=$(git rev-parse --short HEAD) |
144 | | - gcloud container images add-tag "$SOURCE_IMAGE:${{ github.run_id }}" "$SOURCE_IMAGE:maxtext_${maxtext_hash}_${clean_date}" --quiet |
145 | | -
|
146 | | - # Add latest tag (TODO: add this tag only after tests pass) |
147 | | - gcloud container images add-tag "$SOURCE_IMAGE:${{ github.run_id }}" "$SOURCE_IMAGE:latest" --quiet |
| 157 | + gcloud container images add-tag "${TEMP_IMG}" "${SOURCE_IMAGE}:maxtext_${MAXTEXT_SHA}_${clean_date}" --quiet |
148 | 158 | fi |
149 | 159 | env: |
150 | 160 | INPUTS_IMAGE_NAME: ${{ inputs.image_name }} |
151 | 161 | INPUTS_IMAGE_DATE: ${{ inputs.image_date }} |
152 | 162 | INPUTS_VERSION_NAME: ${{ inputs.version_name }} |
| 163 | + MAXTEXT_SHA: ${{ inputs.maxtext_sha }} |
| 164 | + |
| 165 | + test: |
| 166 | + needs: build_and_push |
| 167 | + if: | |
| 168 | + needs.build_and_push.result == 'success' && |
| 169 | + needs.build_and_push.outputs.should_run == 'true' |
| 170 | + strategy: |
| 171 | + fail-fast: false |
| 172 | + matrix: |
| 173 | + flavor: >- |
| 174 | + ${{ fromJSON('{ |
| 175 | + "gpu-pre-training": ["gpu-unit", "gpu-integration"], |
| 176 | + "tpu-post-training": ["tpu-post-training-unit", "tpu-post-training-integration", "cpu-post-training-unit"], |
| 177 | + "tpu-pre-training": ["tpu-unit", "tpu-integration", "cpu-unit"] |
| 178 | + }')[format('{0}-{1}', inputs.device, inputs.workflow)] }} |
| 179 | + uses: ./.github/workflows/run_tests_coordinator.yml |
| 180 | + with: |
| 181 | + flavor: ${{ matrix.flavor }} |
| 182 | + base_image: ${{ inputs.image_name }}:${{ github.run_id }} |
| 183 | + is_scheduled_run: true |
| 184 | + maxtext_installed: true |
| 185 | + |
| 186 | + notebook-test: |
| 187 | + needs: build_and_push |
| 188 | + if: | |
| 189 | + inputs.device == 'tpu' && |
| 190 | + inputs.workflow == 'post-training' && |
| 191 | + needs.build_and_push.result == 'success' && |
| 192 | + needs.build_and_push.outputs.should_run == 'true' |
| 193 | + uses: ./.github/workflows/run_jupyter_notebooks.yml |
| 194 | + with: |
| 195 | + device_type: tpu |
| 196 | + device_name: v6e-4 |
| 197 | + base_image: ${{ inputs.image_name }}:${{ github.run_id }} |
| 198 | + cloud_runner: linux-x86-ct6e-180-4tpu |
| 199 | + maxtext_installed: true |
| 200 | + secrets: |
| 201 | + HF_TOKEN: ${{ secrets.HF_TOKEN }} |
| 202 | + |
| 203 | + tagging: |
| 204 | + needs: [test, notebook-test] |
| 205 | + if: | |
| 206 | + always() && |
| 207 | + needs.test.result == 'success' && |
| 208 | + (needs.notebook-test.result == 'success' || needs.notebook-test.result == 'skipped') |
| 209 | + runs-on: linux-x86-n2-16-buildkit |
| 210 | + container: google/cloud-sdk:524.0.0 |
| 211 | + steps: |
| 212 | + - name: Configure Docker |
| 213 | + run: gcloud auth configure-docker us-docker.pkg.dev,gcr.io -q |
| 214 | + |
| 215 | + - name: Add tags to Docker image |
| 216 | + shell: bash |
| 217 | + run: | |
| 218 | + SOURCE_IMAGE="gcr.io/tpu-prod-env-multipod/${INPUTS_IMAGE_NAME}" |
| 219 | + TEMP_IMG="${SOURCE_IMAGE}:${{ github.run_id }}" |
| 220 | + # Latest Tag |
| 221 | + gcloud container images add-tag "${TEMP_IMG}" "${SOURCE_IMAGE}:latest" --quiet |
| 222 | + env: |
| 223 | + INPUTS_IMAGE_NAME: ${{ inputs.image_name }} |
0 commit comments