@@ -43,10 +43,17 @@ jobs:
4343 runs-on : ["self-hosted", "tpu", "v6e-8"]
4444 env :
4545 LOCAL_IMAGE_TAG : jetstream-maxtext-stable-stack/tpu:github_${{ github.run_id }}
46+ OUTPUT_DIR : /output_dir
47+ outputs :
48+ manifest_name : ${{ steps.copy_build_manifest.outputs.manifest_name }}
4649 steps :
4750 - uses : actions/checkout@v4
4851 - name : Authenticate gcloud
4952 run : gcloud auth configure-docker gcr.io --quiet
53+ - name : Prepare output directory
54+ run : |
55+ rm -rf ${OUTPUT_DIR}
56+ mkdir -p ${OUTPUT_DIR}
5057 - name : Build
5158 run : |
5259 pushd experimental/jetstream-maxtext-stable-stack
6471 UPLOAD_IMAGE_TAG=gcr.io/cloud-ml-auto-solutions/${LOCAL_IMAGE_TAG}
6572 docker tag ${LOCAL_IMAGE_TAG} ${UPLOAD_IMAGE_TAG}
6673 docker push ${UPLOAD_IMAGE_TAG}
74+ - name : Copy build manifest
75+ id : copy_build_manifest
76+ run : |
77+ TEMP_CONTAINER_ID=$(docker create ${LOCAL_IMAGE_TAG} bash -c 'ls jetstream_maxtext_manifest*')
78+ MANIFEST_NAME=$(docker start -a $TEMP_CONTAINER_ID)
79+ docker cp $TEMP_CONTAINER_ID:/jetstream_maxtext_stable_stack/$MANIFEST_NAME ${OUTPUT_DIR}
80+ docker rm $TEMP_CONTAINER_ID
81+ echo "manifest_name=${MANIFEST_NAME}" >> $GITHUB_OUTPUT
82+ - name : Upload build artifact
83+ uses : actions/upload-artifact@v4
84+ with :
85+ name : build_manifest
86+ path : ${{ env.OUTPUT_DIR }}
6787
6888 benchmark_report :
6989 name : Benchmark Report
7292 env :
7393 OUTPUT_DIR : ./test_dir
7494 steps :
75- - name : Test MOEBenchmarks
95+ - name : Test MOE Benchmarks
7696 run : |
7797 rm -rf ${OUTPUT_DIR}
7898 mkdir -p ${OUTPUT_DIR}
@@ -87,6 +107,19 @@ jobs:
87107 bash -c "
88108 bash JetStream/.github/workflows/test_moe_benchmarks.sh
89109 "
110+ - name : Test llama 70b Benchmarks
111+ run : |
112+ # sync with the image uploaded from build_stable_stack stage
113+ # Report should generated in OUTPUT_DIR depend on ENV
114+ DOCKER_OUTPUT_DIR=/output
115+ docker run \
116+ -v ${OUTPUT_DIR}:${DOCKER_OUTPUT_DIR} \
117+ --env OUTPUT_DIR=${DOCKER_OUTPUT_DIR} \
118+ --privileged --net=host --rm -i \
119+ gcr.io/cloud-ml-auto-solutions/jetstream-maxtext-stable-stack/tpu:github_${{ github.run_id }} \
120+ bash -c "
121+ bash JetStream/.github/workflows/test_llama_benchmarks.sh
122+ "
90123 - name : Upload build artifact
91124 uses : actions/upload-artifact@v4
92125 with :
@@ -134,14 +167,22 @@ jobs:
134167 runs-on : ["self-hosted", "tpu", "v6e-8"]
135168 env :
136169 BENCHMARK_REPORT_DIR : ./benchmark_report
170+ BUILD_MANIFEST_DIR : ./build_manifest
137171 steps :
138172 - name : Clean previous artifact
139- run : rm -rf ${{ env.BENCHMARK_REPORT_DIR }}
173+ run : |
174+ rm -rf ${{ env.BENCHMARK_REPORT_DIR }}
175+ rm -rf ${{ env.BUILD_MANIFEST_DIR }}
140176 - name : Download benchmark artifact
141177 uses : actions/download-artifact@v4
142178 with :
143179 name : benchmark_report
144180 path : ${{ env.BENCHMARK_REPORT_DIR }}
181+ - name : Download build manifest
182+ uses : actions/download-artifact@v4
183+ with :
184+ name : build_manifest
185+ path : ${{ env.BUILD_MANIFEST_DIR }}
145186 - name : Check whether one of the jobs failed
146187 if : ${{ failure() }}
147188 uses : jayqi/failed-build-issue-action@1a893bbf43ef1c2a8705e2b115cd4f0fe3c5649b # v1.2.0
@@ -161,5 +202,5 @@ jobs:
161202 to : singhvijaya@google.com, yuyanpeng@google.com, vipannalla@google.com
162203 from : JetStream Runs
163204 secure : true
164- attachments : ${{ env.BENCHMARK_REPORT_DIR }}/moe_8x7b.txt,${{ env.BENCHMARK_REPORT_DIR }}/moe_8x22b.txt,${{ env.BENCHMARK_REPORT_DIR }}/moe_8x22b_long_context_8k_prefill.txt,${{ env.BENCHMARK_REPORT_DIR }}/moe_8x7b_jetstream.txt
205+ attachments : ${{ env.BUILD_MANIFEST_DIR }}/${{ needs.build_stable_stack.outputs.manifest_name }},${{ env. BENCHMARK_REPORT_DIR }}/moe_8x7b.txt,${{ env.BENCHMARK_REPORT_DIR }}/moe_8x22b.txt,${{ env.BENCHMARK_REPORT_DIR }}/moe_8x22b_long_context_8k_prefill.txt,${{ env.BENCHMARK_REPORT_DIR }}/moe_8x7b_jetstream.txt,${{ env.BENCHMARK_REPORT_DIR }}/llama_70b_jetstream .txt
165206 body : workflow for ${{github.repository}} completed successfully!
0 commit comments