Skip to content

Commit 4ea8263

Browse files
WIP
1 parent ffe9b2d commit 4ea8263

1 file changed

Lines changed: 68 additions & 49 deletions

File tree

.github/workflows/run_maxtext_jetstream_tests.yaml

Lines changed: 68 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ jobs:
4545
LOCAL_IMAGE_TAG: jetstream-maxtext-stable-stack/tpu:github_${{ github.run_id }}
4646
steps:
4747
- uses: actions/checkout@v4
48+
- name: Authenticate gcloud
49+
run: gcloud auth configure-docker gcr.io --quiet
4850
- name: Build
4951
run: |
5052
pushd experimental/jetstream-maxtext-stable-stack
@@ -54,84 +56,101 @@ jobs:
5456
- name: Test
5557
run: |
5658
pushd experimental/jetstream-maxtext-stable-stack
57-
./test.sh \
58-
LOCAL_IMAGE_TAG=${LOCAL_IMAGE_TAG}
59+
# ./test.sh \
60+
# LOCAL_IMAGE_TAG=${LOCAL_IMAGE_TAG}
5961
popd
6062
- name: Upload image
6163
run: |
62-
UPLOAD_IMAGE_TAG=gcr.io/cloud-tpu-inference-test/${LOCAL_IMAGE_TAG}
64+
UPLOAD_IMAGE_TAG=gcr.io/cloud-ml-auto-solutions/${LOCAL_IMAGE_TAG}
6365
docker tag ${LOCAL_IMAGE_TAG} ${UPLOAD_IMAGE_TAG}
6466
docker push ${UPLOAD_IMAGE_TAG}
65-
NIGHTLY_TAG=${UPLOAD_IMAGE_TAG%:*}:nightly
66-
NIGHTLY_TAG_DATE=${NIGHTLY_TAG}-$(date +"%Y%m%d")
67-
docker tag ${LOCAL_IMAGE_TAG} ${NIGHTLY_TAG}
68-
docker tag ${LOCAL_IMAGE_TAG} ${NIGHTLY_TAG_DATE}
69-
docker push ${NIGHTLY_TAG}
70-
docker push ${NIGHTLY_TAG_DATE}
7167
7268
benchmark_report:
7369
name: Benchmark Report
7470
needs: build_stable_stack
7571
runs-on: ["self-hosted", "tpu", "v6e-8"]
7672
container:
7773
# sync with the image uploaded from build_stable_stack stage
78-
image: gcr.io/cloud-tpu-inference-test/jetstream-maxtext-stable-stack/tpu:github_${{ github.run_id }}
79-
options: "--net=host --privileged"
74+
image: gcr.io/cloud-ml-auto-solutions/jetstream-maxtext-stable-stack/tpu:github_${{ github.run_id }}
75+
options: "--privileged"
8076
env:
8177
OUTPUT_DIR: /workspace/test_dir/
8278
steps:
8379
- name: Create output directory # Ensure directory exists in container
8480
run: mkdir -p ${OUTPUT_DIR}
8581
- name: Test MOEBenchmarks
8682
# Report should generated in OUTPUT_DIR depend on ENV
87-
run: bash JetStream/.github/workflows/test_moe_benchmarks.sh
88-
- name: Upload build artifact
89-
uses: actions/upload-artifact@v4
90-
with:
91-
name: benchmark_report
92-
path: ${{ env.OUTPUT_DIR }}
83+
# run: bash JetStream/.github/workflows/test_moe_benchmarks.sh
84+
run: find .
85+
# - name: Upload build artifact
86+
# uses: actions/upload-artifact@v4
87+
# with:
88+
# name: benchmark_report
89+
# path: ${{ env.OUTPUT_DIR }}
9390

94-
clean_up:
95-
if: ${{ always() }} # always execute, regardless of previous jobs or steps.
91+
clean_up_on_fail:
92+
if: ${{ failure() }}
9693
needs: [build_stable_stack, benchmark_report]
9794
name: "Clean up"
9895
runs-on: ["self-hosted"]
9996
permissions:
10097
contents: read
10198
issues: write # for failed-build-issue
10299
steps:
100+
- name: Authenticate gcloud
101+
run: gcloud auth configure-docker gcr.io --quiet
103102
- name: Delete TPU image
104103
# sync with the image uploaded from build_stable_stack stage
105-
run: gcloud container images delete gcr.io/cloud-tpu-inference-test/jetstream-maxtext-stable-stack/tpu:github_${{ github.run_id }} --force-delete-tags --quiet
104+
run: gcloud container images delete gcr.io/cloud-ml-auto-solutions/jetstream-maxtext-stable-stack/tpu:github_${{ github.run_id }} --force-delete-tags --quiet
106105

107-
notify:
108-
name: Notify test build # creates an issue or modifies last open existing issue for failed build
106+
upload_night_image:
109107
needs: [build_stable_stack, benchmark_report]
110-
runs-on: ["self-hosted", "tpu", "v6e-8"]
108+
name: "Upload night image"
109+
runs-on: ["self-hosted"]
110+
permissions:
111+
contents: read
112+
issues: write # for failed-build-issue
111113
steps:
112-
- name: Download benchmark artifact
113-
uses: actions/download-artifact@v4
114-
with:
115-
name: benchmark_report
116-
path: ./benchmark_report
117-
- name: Check whether one of the jobs failed
118-
if: ${{ failure() }}
119-
uses: jayqi/failed-build-issue-action@1a893bbf43ef1c2a8705e2b115cd4f0fe3c5649b # v1.2.0
120-
with:
121-
github-token: ${{ secrets.GITHUB_TOKEN }}
122-
- name: Log message if dependent job succeeded
123-
if: ${{ ! (failure() && github.event.pull_request == null) }}
124-
run: echo "Conditions for creating/updating issue not met. Skipping."
125-
- name: Send email
126-
uses: dawidd6/action-send-mail@v3.6.0
127-
with:
128-
server_address: smtp.gmail.com
129-
server_port: 465
130-
username: ${{secrets.MAIL_USERNAME}}
131-
password: ${{secrets.MAIL_PASSWORD}}
132-
subject: Message from Inference Stable Stack Runs.
133-
to: singhvijaya@google.com, yuyanpeng@google.com, vipannalla@google.com
134-
from: JetStream Runs
135-
secure: true
136-
attachments: ./benchmark_report/moe_8x7b.txt,./benchmark_report/moe_8x22b.txt,./benchmark_report/moe_8x22b_long_context_8k_prefill.txt,./benchmark_report/moe_8x7b_jetstream.txt
137-
body: workflow for ${{github.repository}} completed successfully!
114+
- name: Authenticate gcloud
115+
run: gcloud auth configure-docker gcr.io --quiet
116+
- name: Upload night image
117+
# sync with the image uploaded from build_stable_stack stage
118+
run: |
119+
UPLOAD_IMAGE_TAG=gcr.io/cloud-ml-auto-solutions/jetstream-maxtext-stable-stack/tpu:github_${{ github.run_id }}
120+
NIGHTLY_TAG=${UPLOAD_IMAGE_TAG%:*}:nightly
121+
NIGHTLY_TAG_DATE=${NIGHTLY_TAG}-$(date +"%Y%m%d")
122+
gcloud container images add-tag ${UPLOAD_IMAGE_TAG} ${NIGHTLY_TAG}
123+
gcloud container images add-tag ${UPLOAD_IMAGE_TAG} ${NIGHTLY_TAG_DATE}
124+
gcloud container images untag ${UPLOAD_IMAGE_TAG}
125+
126+
# notify:
127+
# name: Notify test build # creates an issue or modifies last open existing issue for failed build
128+
# needs: [build_stable_stack, benchmark_report]
129+
# runs-on: ["self-hosted", "tpu", "v6e-8"]
130+
# steps:
131+
# - name: Download benchmark artifact
132+
# uses: actions/download-artifact@v4
133+
# with:
134+
# name: benchmark_report
135+
# path: ./benchmark_report
136+
# - name: Check whether one of the jobs failed
137+
# if: ${{ failure() }}
138+
# uses: jayqi/failed-build-issue-action@1a893bbf43ef1c2a8705e2b115cd4f0fe3c5649b # v1.2.0
139+
# with:
140+
# github-token: ${{ secrets.GITHUB_TOKEN }}
141+
# - name: Log message if dependent job succeeded
142+
# if: ${{ ! (failure() && github.event.pull_request == null) }}
143+
# run: echo "Conditions for creating/updating issue not met. Skipping."
144+
# - name: Send email
145+
# uses: dawidd6/action-send-mail@v3.6.0
146+
# with:
147+
# server_address: smtp.gmail.com
148+
# server_port: 465
149+
# username: ${{secrets.MAIL_USERNAME}}
150+
# password: ${{secrets.MAIL_PASSWORD}}
151+
# subject: Message from Inference Stable Stack Runs.
152+
# to: singhvijaya@google.com, yuyanpeng@google.com, vipannalla@google.com
153+
# from: JetStream Runs
154+
# secure: true
155+
# attachments: ./benchmark_report/moe_8x7b.txt,./benchmark_report/moe_8x22b.txt,./benchmark_report/moe_8x22b_long_context_8k_prefill.txt,./benchmark_report/moe_8x7b_jetstream.txt
156+
# body: workflow for ${{github.repository}} completed successfully!

0 commit comments

Comments
 (0)