Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 15 additions & 2 deletions .github/workflows/run_maxtext_jetstream_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ jobs:
env:
OUTPUT_DIR: ./test_dir
steps:
- name: Test MOEBenchmarks
- name: Test MOE Benchmarks
run: |
rm -rf ${OUTPUT_DIR}
mkdir -p ${OUTPUT_DIR}
Expand All @@ -87,6 +87,19 @@ jobs:
bash -c "
bash JetStream/.github/workflows/test_moe_benchmarks.sh
"
- name: Test llama 70b Benchmarks
run: |
# sync with the image uploaded from build_stable_stack stage
# Report should generated in OUTPUT_DIR depend on ENV
DOCKER_OUTPUT_DIR=/output
docker run \
-v ${OUTPUT_DIR}:${DOCKER_OUTPUT_DIR} \
--env OUTPUT_DIR=${DOCKER_OUTPUT_DIR} \
--privileged --net=host --rm -i \
gcr.io/cloud-ml-auto-solutions/jetstream-maxtext-stable-stack/tpu:github_${{ github.run_id }} \
bash -c "
bash JetStream/.github/workflows/test_llama_benchmarks.sh
"
- name: Upload build artifact
uses: actions/upload-artifact@v4
with:
Expand Down Expand Up @@ -161,5 +174,5 @@ jobs:
to: singhvijaya@google.com, yuyanpeng@google.com, vipannalla@google.com
from: JetStream Runs
secure: true
attachments: ${{ env.BENCHMARK_REPORT_DIR }}/moe_8x7b.txt,${{ env.BENCHMARK_REPORT_DIR }}/moe_8x22b.txt,${{ env.BENCHMARK_REPORT_DIR }}/moe_8x22b_long_context_8k_prefill.txt,${{ env.BENCHMARK_REPORT_DIR }}/moe_8x7b_jetstream.txt
attachments: ${{ env.BENCHMARK_REPORT_DIR }}/moe_8x7b.txt,${{ env.BENCHMARK_REPORT_DIR }}/moe_8x22b.txt,${{ env.BENCHMARK_REPORT_DIR }}/moe_8x22b_long_context_8k_prefill.txt,${{ env.BENCHMARK_REPORT_DIR }}/moe_8x7b_jetstream.txt,${{ env.BENCHMARK_REPORT_DIR }}/llama_70b_jetstream.txt
body: workflow for ${{github.repository}} completed successfully!
30 changes: 30 additions & 0 deletions .github/workflows/test_llama_benchmarks.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/bash

OUTPUT_DIR=${OUTPUT_DIR:-$(pwd)/test_dir}

pip install nltk==3.8.1
python -c "import nltk; nltk.download('punkt')"

cd maxtext

export TOKENIZER_PATH=assets/tokenizer.llama2
export MAX_PREFILL_PREDICT_LENGTH=1024
export MAX_TARGET_LENGTH=2048
export MODEL_NAME=llama2-70b
export ICI_FSDP_PARALLELISM=1
export ICI_AUTOREGRESSIVE_PARALLELISM=1
export ICI_TENSOR_PARALLELISM=-1
export SCAN_LAYERS=false
export WEIGHT_DTYPE=bfloat16
export PER_DEVICE_BATCH_SIZE=54
export LOAD_PARAMETERS_PATH=gs://jetstream-runner/llama-70B-int8/int8_

python MaxText/maxengine_server.py MaxText/configs/base.yml tokenizer_path=${TOKENIZER_PATH} load_parameters_path=${LOAD_PARAMETERS_PATH} max_prefill_predict_length=${MAX_PREFILL_PREDICT_LENGTH} max_target_length=${MAX_TARGET_LENGTH} model_name=${MODEL_NAME} ici_fsdp_parallelism=${ICI_FSDP_PARALLELISM} ici_autoregressive_parallelism=${ICI_AUTOREGRESSIVE_PARALLELISM} ici_tensor_parallelism=${ICI_TENSOR_PARALLELISM} scan_layers=${SCAN_LAYERS} weight_dtype=${WEIGHT_DTYPE} per_device_batch_size=${PER_DEVICE_BATCH_SIZE} checkpoint_is_quantized=True quantization=int8 quantize_kvcache=True enable_jax_profiler=True &

sleep 800

python JetStream/benchmarks/benchmark_serving.py --tokenizer maxtext/assets/tokenizer.llama2 --save-result --save-request-outputs --request-outputs-file-path outputs.json --num-prompts 1200 --max-output-length 1024 --dataset openorca --run-eval True > ${OUTPUT_DIR}/llama_70b_jetstream.txt
tail -n25 ${OUTPUT_DIR}/llama_70b_jetstream.txt > ${OUTPUT_DIR}/llama_70b_jetstream.tmp && mv ${OUTPUT_DIR}/llama_70b_jetstream.tmp ${OUTPUT_DIR}/llama_70b_jetstream.txt

# kill Jetstream server
kill -9 %%