Skip to content

Commit 517bcbb

Browse files
llama tests and upload stable stack manifest
1 parent 8fe9a20 commit 517bcbb

2 files changed

Lines changed: 76 additions & 3 deletions

File tree

.github/workflows/run_maxtext_jetstream_tests.yaml

Lines changed: 44 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,17 @@ jobs:
4343
runs-on: ["self-hosted", "tpu", "v6e-8"]
4444
env:
4545
LOCAL_IMAGE_TAG: jetstream-maxtext-stable-stack/tpu:github_${{ github.run_id }}
46+
OUTPUT_DIR: /output_dir
47+
outputs:
48+
manifest_name: ${{ steps.copy_build_manifest.outputs.manifest_name }}
4649
steps:
4750
- uses: actions/checkout@v4
4851
- name: Authenticate gcloud
4952
run: gcloud auth configure-docker gcr.io --quiet
53+
- name: Prepare output directory
54+
run: |
55+
rm -rf ${OUTPUT_DIR}
56+
mkdir -p ${OUTPUT_DIR}
5057
- name: Build
5158
run: |
5259
pushd experimental/jetstream-maxtext-stable-stack
@@ -64,6 +71,19 @@ jobs:
6471
UPLOAD_IMAGE_TAG=gcr.io/cloud-ml-auto-solutions/${LOCAL_IMAGE_TAG}
6572
docker tag ${LOCAL_IMAGE_TAG} ${UPLOAD_IMAGE_TAG}
6673
docker push ${UPLOAD_IMAGE_TAG}
74+
- name: Copy build manifest
75+
id: copy_build_manifest
76+
run: |
77+
TEMP_CONTAINER_ID=$(docker create ${LOCAL_IMAGE_TAG} bash -c 'ls jetstream_maxtext_manifest*')
78+
MANIFEST_NAME=$(docker start -a $TEMP_CONTAINER_ID)
79+
docker cp $TEMP_CONTAINER_ID:/jetstream_maxtext_stable_stack/$MANIFEST_NAME ${OUTPUT_DIR}
80+
docker rm $TEMP_CONTAINER_ID
81+
echo "manifest_name=${MANIFEST_NAME}" >> $GITHUB_OUTPUT
82+
- name: Upload build artifact
83+
uses: actions/upload-artifact@v4
84+
with:
85+
name: build_manifest
86+
path: ${{ env.OUTPUT_DIR }}
6787

6888
benchmark_report:
6989
name: Benchmark Report
@@ -72,7 +92,7 @@ jobs:
7292
env:
7393
OUTPUT_DIR: ./test_dir
7494
steps:
75-
- name: Test MOEBenchmarks
95+
- name: Test MOE Benchmarks
7696
run: |
7797
rm -rf ${OUTPUT_DIR}
7898
mkdir -p ${OUTPUT_DIR}
@@ -87,6 +107,19 @@ jobs:
87107
bash -c "
88108
bash JetStream/.github/workflows/test_moe_benchmarks.sh
89109
"
110+
- name: Test llama 70b Benchmarks
111+
run: |
112+
# sync with the image uploaded from build_stable_stack stage
113+
# Report should generated in OUTPUT_DIR depend on ENV
114+
DOCKER_OUTPUT_DIR=/output
115+
docker run \
116+
-v ${OUTPUT_DIR}:${DOCKER_OUTPUT_DIR} \
117+
--env OUTPUT_DIR=${DOCKER_OUTPUT_DIR} \
118+
--privileged --net=host --rm -i \
119+
gcr.io/cloud-ml-auto-solutions/jetstream-maxtext-stable-stack/tpu:github_${{ github.run_id }} \
120+
bash -c "
121+
bash JetStream/.github/workflows/test_llama_benchmarks.sh
122+
"
90123
- name: Upload build artifact
91124
uses: actions/upload-artifact@v4
92125
with:
@@ -134,14 +167,22 @@ jobs:
134167
runs-on: ["self-hosted", "tpu", "v6e-8"]
135168
env:
136169
BENCHMARK_REPORT_DIR: ./benchmark_report
170+
BUILD_MANIFEST_DIR: ./build_manifest
137171
steps:
138172
- name: Clean previous artifact
139-
run: rm -rf ${{ env.BENCHMARK_REPORT_DIR }}
173+
run: |
174+
rm -rf ${{ env.BENCHMARK_REPORT_DIR }}
175+
rm -rf ${{ env.BUILD_MANIFEST_DIR }}
140176
- name: Download benchmark artifact
141177
uses: actions/download-artifact@v4
142178
with:
143179
name: benchmark_report
144180
path: ${{ env.BENCHMARK_REPORT_DIR }}
181+
- name: Download build manifest
182+
uses: actions/download-artifact@v4
183+
with:
184+
name: build_manifest
185+
path: ${{ env.BUILD_MANIFEST_DIR }}
145186
- name: Check whether one of the jobs failed
146187
if: ${{ failure() }}
147188
uses: jayqi/failed-build-issue-action@1a893bbf43ef1c2a8705e2b115cd4f0fe3c5649b # v1.2.0
@@ -161,5 +202,5 @@ jobs:
161202
to: singhvijaya@google.com, yuyanpeng@google.com, vipannalla@google.com
162203
from: JetStream Runs
163204
secure: true
164-
attachments: ${{ env.BENCHMARK_REPORT_DIR }}/moe_8x7b.txt,${{ env.BENCHMARK_REPORT_DIR }}/moe_8x22b.txt,${{ env.BENCHMARK_REPORT_DIR }}/moe_8x22b_long_context_8k_prefill.txt,${{ env.BENCHMARK_REPORT_DIR }}/moe_8x7b_jetstream.txt
205+
attachments: ${{ env.BUILD_MANIFEST_DIR }}/${{ needs.build_stable_stack.outputs.manifest_name }},${{ env.BENCHMARK_REPORT_DIR }}/moe_8x7b.txt,${{ env.BENCHMARK_REPORT_DIR }}/moe_8x22b.txt,${{ env.BENCHMARK_REPORT_DIR }}/moe_8x22b_long_context_8k_prefill.txt,${{ env.BENCHMARK_REPORT_DIR }}/moe_8x7b_jetstream.txt,${{ env.BENCHMARK_REPORT_DIR }}/llama_70b_jetstream.txt
165206
body: workflow for ${{github.repository}} completed successfully!
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#!/bin/bash
2+
3+
OUTPUT_DIR=${OUTPUT_DIR:-$(pwd)/test_dir}
4+
5+
pip install nltk==3.8.1
6+
python -c "import nltk; nltk.download('punkt')"
7+
8+
cd maxtext
9+
10+
export TOKENIZER_PATH=assets/tokenizer.llama2
11+
export MAX_PREFILL_PREDICT_LENGTH=1024
12+
export MAX_TARGET_LENGTH=2048
13+
export MODEL_NAME=llama2-70b
14+
export ICI_FSDP_PARALLELISM=1
15+
export ICI_AUTOREGRESSIVE_PARALLELISM=1
16+
export ICI_TENSOR_PARALLELISM=-1
17+
export SCAN_LAYERS=false
18+
export WEIGHT_DTYPE=bfloat16
19+
export PER_DEVICE_BATCH_SIZE=54
20+
export LOAD_PARAMETERS_PATH=gs://jetstream-runner/llama-70B-int8/int8_
21+
22+
python -m MaxText.maxengine_server MaxText/configs/base.yml tokenizer_path=${TOKENIZER_PATH} load_parameters_path=${LOAD_PARAMETERS_PATH} max_prefill_predict_length=${MAX_PREFILL_PREDICT_LENGTH} max_target_length=${MAX_TARGET_LENGTH} model_name=${MODEL_NAME} ici_fsdp_parallelism=${ICI_FSDP_PARALLELISM} ici_autoregressive_parallelism=${ICI_AUTOREGRESSIVE_PARALLELISM} ici_tensor_parallelism=${ICI_TENSOR_PARALLELISM} scan_layers=${SCAN_LAYERS} weight_dtype=${WEIGHT_DTYPE} per_device_batch_size=${PER_DEVICE_BATCH_SIZE} checkpoint_is_quantized=True quantization=int8 quantize_kvcache=True enable_jax_profiler=True &
23+
24+
sleep 800
25+
26+
cd ..
27+
28+
python JetStream/benchmarks/benchmark_serving.py --tokenizer maxtext/assets/tokenizer.llama2 --save-result --save-request-outputs --request-outputs-file-path outputs.json --num-prompts 1200 --max-output-length 1024 --dataset openorca --run-eval True > ${OUTPUT_DIR}/llama_70b_jetstream.txt
29+
tail -n25 ${OUTPUT_DIR}/llama_70b_jetstream.txt > ${OUTPUT_DIR}/llama_70b_jetstream.tmp && mv ${OUTPUT_DIR}/llama_70b_jetstream.tmp ${OUTPUT_DIR}/llama_70b_jetstream.txt
30+
31+
# kill Jetstream server
32+
kill -9 %%

0 commit comments

Comments
 (0)