4545 LOCAL_IMAGE_TAG : jetstream-maxtext-stable-stack/tpu:github_${{ github.run_id }}
4646 steps :
4747 - uses : actions/checkout@v4
48+ - name : Authenticate gcloud
49+ run : gcloud auth configure-docker gcr.io --quiet
4850 - name : Build
4951 run : |
5052 pushd experimental/jetstream-maxtext-stable-stack
@@ -54,84 +56,101 @@ jobs:
5456 - name : Test
5557 run : |
5658 pushd experimental/jetstream-maxtext-stable-stack
57- ./test.sh \
58- LOCAL_IMAGE_TAG=${LOCAL_IMAGE_TAG}
59+ # ./test.sh \
60+ # LOCAL_IMAGE_TAG=${LOCAL_IMAGE_TAG}
5961 popd
6062 - name : Upload image
6163 run : |
62- UPLOAD_IMAGE_TAG=gcr.io/cloud-tpu-inference-test /${LOCAL_IMAGE_TAG}
64+ UPLOAD_IMAGE_TAG=gcr.io/cloud-ml-auto-solutions /${LOCAL_IMAGE_TAG}
6365 docker tag ${LOCAL_IMAGE_TAG} ${UPLOAD_IMAGE_TAG}
6466 docker push ${UPLOAD_IMAGE_TAG}
65- NIGHTLY_TAG=${UPLOAD_IMAGE_TAG%:*}:nightly
66- NIGHTLY_TAG_DATE=${NIGHTLY_TAG}-$(date +"%Y%m%d")
67- docker tag ${LOCAL_IMAGE_TAG} ${NIGHTLY_TAG}
68- docker tag ${LOCAL_IMAGE_TAG} ${NIGHTLY_TAG_DATE}
69- docker push ${NIGHTLY_TAG}
70- docker push ${NIGHTLY_TAG_DATE}
7167
7268 benchmark_report :
7369 name : Benchmark Report
7470 needs : build_stable_stack
7571 runs-on : ["self-hosted", "tpu", "v6e-8"]
7672 container :
7773 # sync with the image uploaded from build_stable_stack stage
78- image : gcr.io/cloud-tpu-inference-test /jetstream-maxtext-stable-stack/tpu:github_${{ github.run_id }}
79- options : " --net=host -- privileged"
74+ image : gcr.io/cloud-ml-auto-solutions /jetstream-maxtext-stable-stack/tpu:github_${{ github.run_id }}
75+ options : " --privileged"
8076 env :
8177 OUTPUT_DIR : /workspace/test_dir/
8278 steps :
8379 - name : Create output directory # Ensure directory exists in container
8480 run : mkdir -p ${OUTPUT_DIR}
8581 - name : Test MOEBenchmarks
8682 # Report should generated in OUTPUT_DIR depend on ENV
87- run : bash JetStream/.github/workflows/test_moe_benchmarks.sh
88- - name : Upload build artifact
89- uses : actions/upload-artifact@v4
90- with :
91- name : benchmark_report
92- path : ${{ env.OUTPUT_DIR }}
83+ # run: bash JetStream/.github/workflows/test_moe_benchmarks.sh
84+ run : find .
85+ # - name: Upload build artifact
86+ # uses: actions/upload-artifact@v4
87+ # with:
88+ # name: benchmark_report
89+ # path: ${{ env.OUTPUT_DIR }}
9390
94- clean_up :
95- if : ${{ always () }} # always execute, regardless of previous jobs or steps.
91+ clean_up_on_fail :
92+ if : ${{ failure () }}
9693 needs : [build_stable_stack, benchmark_report]
9794 name : " Clean up"
9895 runs-on : ["self-hosted"]
9996 permissions :
10097 contents : read
10198 issues : write # for failed-build-issue
10299 steps :
100+ - name : Authenticate gcloud
101+ run : gcloud auth configure-docker gcr.io --quiet
103102 - name : Delete TPU image
104103 # sync with the image uploaded from build_stable_stack stage
105- run : gcloud container images delete gcr.io/cloud-tpu-inference-test /jetstream-maxtext-stable-stack/tpu:github_${{ github.run_id }} --force-delete-tags --quiet
104+ run : gcloud container images delete gcr.io/cloud-ml-auto-solutions /jetstream-maxtext-stable-stack/tpu:github_${{ github.run_id }} --force-delete-tags --quiet
106105
107- notify :
108- name : Notify test build # creates an issue or modifies last open existing issue for failed build
106+ upload_night_image :
109107 needs : [build_stable_stack, benchmark_report]
110- runs-on : ["self-hosted", "tpu", "v6e-8"]
108+ name : " Upload night image"
109+ runs-on : ["self-hosted"]
110+ permissions :
111+ contents : read
112+ issues : write # for failed-build-issue
111113 steps :
112- - name : Download benchmark artifact
113- uses : actions/download-artifact@v4
114- with :
115- name : benchmark_report
116- path : ./benchmark_report
117- - name : Check whether one of the jobs failed
118- if : ${{ failure() }}
119- uses : jayqi/failed-build-issue-action@1a893bbf43ef1c2a8705e2b115cd4f0fe3c5649b # v1.2.0
120- with :
121- github-token : ${{ secrets.GITHUB_TOKEN }}
122- - name : Log message if dependent job succeeded
123- if : ${{ ! (failure() && github.event.pull_request == null) }}
124- run : echo "Conditions for creating/updating issue not met. Skipping."
125- - name : Send email
126- uses : dawidd6/action-send-mail@v3.6.0
127- with :
128- server_address : smtp.gmail.com
129- server_port : 465
130- username : ${{secrets.MAIL_USERNAME}}
131- password : ${{secrets.MAIL_PASSWORD}}
132- subject : Message from Inference Stable Stack Runs.
133- to : singhvijaya@google.com, yuyanpeng@google.com, vipannalla@google.com
134- from : JetStream Runs
135- secure : true
136- attachments : ./benchmark_report/moe_8x7b.txt,./benchmark_report/moe_8x22b.txt,./benchmark_report/moe_8x22b_long_context_8k_prefill.txt,./benchmark_report/moe_8x7b_jetstream.txt
137- body : workflow for ${{github.repository}} completed successfully!
114+ - name : Authenticate gcloud
115+ run : gcloud auth configure-docker gcr.io --quiet
116+ - name : Upload night image
117+ # sync with the image uploaded from build_stable_stack stage
118+ run : |
119+ UPLOAD_IMAGE_TAG=gcr.io/cloud-ml-auto-solutions/jetstream-maxtext-stable-stack/tpu:github_${{ github.run_id }}
120+ NIGHTLY_TAG=${UPLOAD_IMAGE_TAG%:*}:nightly
121+ NIGHTLY_TAG_DATE=${NIGHTLY_TAG}-$(date +"%Y%m%d")
122+ gcloud container images add-tag ${UPLOAD_IMAGE_TAG} ${NIGHTLY_TAG}
123+ gcloud container images add-tag ${UPLOAD_IMAGE_TAG} ${NIGHTLY_TAG_DATE}
124+ gcloud container images untag ${UPLOAD_IMAGE_TAG}
125+
126+ # notify:
127+ # name: Notify test build # creates an issue or modifies last open existing issue for failed build
128+ # needs: [build_stable_stack, benchmark_report]
129+ # runs-on: ["self-hosted", "tpu", "v6e-8"]
130+ # steps:
131+ # - name: Download benchmark artifact
132+ # uses: actions/download-artifact@v4
133+ # with:
134+ # name: benchmark_report
135+ # path: ./benchmark_report
136+ # - name: Check whether one of the jobs failed
137+ # if: ${{ failure() }}
138+ # uses: jayqi/failed-build-issue-action@1a893bbf43ef1c2a8705e2b115cd4f0fe3c5649b # v1.2.0
139+ # with:
140+ # github-token: ${{ secrets.GITHUB_TOKEN }}
141+ # - name: Log message if dependent job succeeded
142+ # if: ${{ ! (failure() && github.event.pull_request == null) }}
143+ # run: echo "Conditions for creating/updating issue not met. Skipping."
144+ # - name: Send email
145+ # uses: dawidd6/action-send-mail@v3.6.0
146+ # with:
147+ # server_address: smtp.gmail.com
148+ # server_port: 465
149+ # username: ${{secrets.MAIL_USERNAME}}
150+ # password: ${{secrets.MAIL_PASSWORD}}
151+ # subject: Message from Inference Stable Stack Runs.
152+ # to: singhvijaya@google.com, yuyanpeng@google.com, vipannalla@google.com
153+ # from: JetStream Runs
154+ # secure: true
155+ # attachments: ./benchmark_report/moe_8x7b.txt,./benchmark_report/moe_8x22b.txt,./benchmark_report/moe_8x22b_long_context_8k_prefill.txt,./benchmark_report/moe_8x7b_jetstream.txt
156+ # body: workflow for ${{github.repository}} completed successfully!
0 commit comments