|
45 | 45 | required: false |
46 | 46 | type: string |
47 | 47 | default: '' |
| 48 | + test_mode: |
| 49 | + description: "Test mode (tpu-pre-training, tpu-post-training, gpu-pre-training)" |
| 50 | + required: true |
| 51 | + type: string |
48 | 52 |
|
49 | 53 | permissions: |
50 | 54 | contents: read |
|
61 | 65 | github.event.inputs.target_device == 'tpu' || |
62 | 66 | github.event.inputs.target_device == 'gpu' |
63 | 67 | ) |
| 68 | + outputs: |
| 69 | + should_run: ${{ steps.check.outputs.should_run }} # Map the step output to the job level |
64 | 70 | steps: |
65 | 71 | - name: Check if build should run |
66 | 72 | id: check |
|
87 | 93 | ref: ${{ inputs.maxtext_sha }} |
88 | 94 |
|
89 | 95 | - name: Checkout post-training dependencies |
90 | | - if: steps.check.outputs.should_run == 'true' && inputs.image_name == 'maxtext_post_training_nightly' |
| 96 | + if: | |
| 97 | + steps.check.outputs.should_run == 'true' && |
| 98 | + contains(inputs.image_name, 'post_training_nightly') |
91 | 99 | run: | |
92 | 100 | git clone https://github.com/google/tunix.git ./tunix |
93 | 101 | git clone https://github.com/vllm-project/vllm.git ./vllm |
@@ -127,38 +135,89 @@ jobs: |
127 | 135 | LIBTPU_VERSION=NONE |
128 | 136 | INCLUDE_TEST_ASSETS=true |
129 | 137 |
|
| 138 | + test: |
| 139 | + needs: build_and_push |
| 140 | + if: | |
| 141 | + needs.build_and_push.result == 'success' && |
| 142 | + needs.build_and_push.outputs.should_run == 'true' |
| 143 | + strategy: |
| 144 | + fail-fast: false |
| 145 | + matrix: |
| 146 | + flavor: >- |
| 147 | + ${{ fromJSON('{ |
| 148 | + "gpu-pre-training": ["gpu-unit", "gpu-integration"], |
| 149 | + "tpu-post-training": ["post-training-tpu-unit", "post-training-tpu-integration", "post-training-cpu-unit"], |
| 150 | + "tpu-pre-training": ["tpu-unit", "tpu-integration", "cpu-unit"] |
| 151 | + }')[inputs.test_mode] }} |
| 152 | + uses: ./.github/workflows/run_tests_coordinator.yml |
| 153 | + with: |
| 154 | + flavor: ${{ matrix.flavor }} |
| 155 | + base_image: ${{ inputs.image_name }}:${{ github.run_id }} |
| 156 | + is_scheduled_run: true |
| 157 | + maxtext_installed: true |
| 158 | + |
| 159 | + notebook-test: |
| 160 | + needs: build_and_push |
| 161 | + if: | |
| 162 | + inputs.test_mode == 'tpu-post-training' && |
| 163 | + needs.build_and_push.result == 'success' && |
| 164 | + needs.build_and_push.outputs.should_run == 'true' |
| 165 | + uses: ./.github/workflows/run_jupyter_notebooks.yml |
| 166 | + with: |
| 167 | + device_type: tpu |
| 168 | + device_name: v6e-4 |
| 169 | + base_image: ${{ inputs.image_name }}:${{ github.run_id }} |
| 170 | + cloud_runner: linux-x86-ct6e-180-4tpu |
| 171 | + maxtext_installed: true |
| 172 | + secrets: |
| 173 | + HF_TOKEN: ${{ secrets.HF_TOKEN }} |
| 174 | + |
| 175 | + tagging: |
| 176 | + needs: [test, notebook-test] |
| 177 | + if: | |
| 178 | + always() && |
| 179 | + needs.test.result == 'success' && |
| 180 | + (needs.notebook-test.result == 'success' || needs.notebook-test.result == 'skipped') |
| 181 | + runs-on: linux-x86-n2-16-buildkit |
| 182 | + container: google/cloud-sdk:524.0.0 |
| 183 | + steps: |
| 184 | + - name: Configure Docker |
| 185 | + run: gcloud auth configure-docker us-docker.pkg.dev,gcr.io -q |
| 186 | + |
130 | 187 | - name: Add tags to Docker image |
131 | | - if: steps.check.outputs.should_run == 'true' |
132 | 188 | shell: bash |
133 | 189 | run: | |
134 | 190 | SOURCE_IMAGE="gcr.io/tpu-prod-env-multipod/${INPUTS_IMAGE_NAME}" |
| 191 | + TEMP_IMG="${SOURCE_IMAGE}:${{ github.run_id }}" |
135 | 192 |
|
136 | 193 | if [[ $INPUTS_VERSION_NAME ]]; then |
137 | 194 | echo "Tagging docker images corresponding to PyPI release..." |
138 | | - gcloud container images add-tag "$SOURCE_IMAGE:${{ github.run_id }}" "$SOURCE_IMAGE:${INPUTS_VERSION_NAME}" --quiet |
| 195 | + gcloud container images add-tag "${TEMP_IMG}" "${SOURCE_IMAGE}:${INPUTS_VERSION_NAME}" --quiet |
139 | 196 | else |
140 | 197 | echo "Tagging docker images corresponding to nightly release..." |
141 | 198 |
|
142 | 199 | # Add date tag |
143 | | - gcloud container images add-tag "$SOURCE_IMAGE:${{ github.run_id }}" "$SOURCE_IMAGE:${INPUTS_IMAGE_DATE}" --quiet |
| 200 | + gcloud container images add-tag "${TEMP_IMG}" "$SOURCE_IMAGE:${INPUTS_IMAGE_DATE}" --quiet |
144 | 201 |
|
145 | 202 | # Convert date to YYYYMMDD format |
146 | 203 | clean_date=$(echo "${INPUTS_IMAGE_DATE}" | sed 's/[-:]//g' | cut -c1-8) |
147 | 204 |
|
148 | 205 | # Add MaxText tag |
149 | 206 | maxtext_hash=$(git rev-parse --short HEAD) |
150 | | - gcloud container images add-tag "$SOURCE_IMAGE:${{ github.run_id }}" "$SOURCE_IMAGE:maxtext_${maxtext_hash}_${clean_date}" --quiet |
151 | | -
|
152 | | - # Add post-training dependencies tags |
153 | | - if [ "${{ inputs.workflow }}" == "post-training" ]; then |
154 | | - for dir in tunix vllm tpu-inference; do |
155 | | - if [ -d "./$dir" ]; then |
156 | | - dir_hash=$(git -C "$dir" rev-parse --short HEAD) |
157 | | - gcloud container images add-tag "$SOURCE_IMAGE:${{ github.run_id }}" "$SOURCE_IMAGE:${dir}_${dir_hash}_${clean_date}" --quiet |
| 207 | + gcloud container images add-tag "${TEMP_IMG}" "${SOURCE_IMAGE}:maxtext_${maxtext_hash}_${clean_date}" --quiet |
| 208 | +
|
| 209 | + # Add post-training dependencies tags |
| 210 | + if [ "${{ inputs.workflow }}" == "post-training" ]; then |
| 211 | + for dir in tunix vllm tpu-inference; do |
| 212 | + if [ -d "./$dir" ]; then |
| 213 | + dir_hash=$(git -C "$dir" rev-parse --short HEAD) |
| 214 | + gcloud container images add-tag "${TEMP_IMG}" "${SOURCE_IMAGE}:${dir}_${dir_hash}_${clean_date}" --quiet |
158 | 215 | fi |
159 | 216 | done |
160 | 217 | fi |
161 | 218 | fi |
| 219 | + # Latest Tag |
| 220 | + gcloud container images add-tag "${TEMP_IMG}" "${SOURCE_IMAGE}:latest" --quiet |
162 | 221 | env: |
163 | 222 | INPUTS_IMAGE_NAME: ${{ inputs.image_name }} |
164 | 223 | INPUTS_IMAGE_DATE: ${{ inputs.image_date }} |
|
0 commit comments