Skip to content

Commit bcdeb42

Browse files
WIP
1 parent 19b2086 commit bcdeb42

3 files changed

Lines changed: 66 additions & 46 deletions

File tree

.github/workflows/run_maxtext_jetstream_tests.yaml

Lines changed: 57 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -50,57 +50,66 @@ jobs:
5050
- name: Build
5151
run: |
5252
pushd experimental/jetstream-maxtext-stable-stack
53-
./build.sh \
53+
# ./build.sh \
5454
LOCAL_IMAGE_TAG="${LOCAL_IMAGE_TAG}"
5555
popd
5656
- name: Test
5757
run: |
5858
pushd experimental/jetstream-maxtext-stable-stack
59-
./test.sh \
59+
# ./test.sh \
6060
LOCAL_IMAGE_TAG=${LOCAL_IMAGE_TAG}
6161
popd
6262
- name: Upload image
6363
run: |
64+
# UPLOAD_IMAGE_TAG=gcr.io/cloud-ml-auto-solutions/${LOCAL_IMAGE_TAG}
65+
# docker tag ${LOCAL_IMAGE_TAG} ${UPLOAD_IMAGE_TAG}
66+
# docker push ${UPLOAD_IMAGE_TAG}
6467
UPLOAD_IMAGE_TAG=gcr.io/cloud-ml-auto-solutions/${LOCAL_IMAGE_TAG}
65-
docker tag ${LOCAL_IMAGE_TAG} ${UPLOAD_IMAGE_TAG}
66-
docker push ${UPLOAD_IMAGE_TAG}
68+
gcloud container images add-tag gcr.io/cloud-ml-auto-solutions/jetstream-maxtext-stable-stack/tpu:nightly ${UPLOAD_IMAGE_TAG} --quiet
69+
6770
6871
benchmark_report:
6972
name: Benchmark Report
7073
needs: build_stable_stack
7174
runs-on: ["self-hosted", "tpu", "v6e-8"]
72-
container:
73-
# sync with the image uploaded from build_stable_stack stage
74-
image: gcr.io/cloud-ml-auto-solutions/jetstream-maxtext-stable-stack/tpu:github_${{ github.run_id }}
75-
options: "--privileged"
7675
env:
77-
OUTPUT_DIR: /workspace/test_dir/
76+
OUTPUT_DIR: ./test_dir
7877
steps:
79-
- name: Create output directory # Ensure directory exists in container
80-
run: mkdir -p ${OUTPUT_DIR}
8178
- name: Test MOEBenchmarks
82-
# Report should generated in OUTPUT_DIR depend on ENV
83-
run: bash JetStream/.github/workflows/test_moe_benchmarks.sh
79+
run: |
80+
rm -rf ${OUTPUT_DIR}
81+
mkdir -p ${OUTPUT_DIR}
82+
# sync with the image uploaded from build_stable_stack stage
83+
# Report should generated in OUTPUT_DIR depend on ENV
84+
DOCKER_OUTPUT_DIR=/output
85+
docker run \
86+
-v ${OUTPUT_DIR}:${DOCKER_OUTPUT_DIR} \
87+
--env OUTPUT_DIR=${DOCKER_OUTPUT_DIR} \
88+
--privileged --net=host --rm -i \
89+
gcr.io/cloud-ml-auto-solutions/jetstream-maxtext-stable-stack/tpu:github_${{ github.run_id }} \
90+
bash -c "
91+
bash JetStream/.github/workflows/test_moe_benchmarks.sh
92+
"
8493
- name: Upload build artifact
8594
uses: actions/upload-artifact@v4
8695
with:
8796
name: benchmark_report
8897
path: ${{ env.OUTPUT_DIR }}
8998

90-
clean_up_on_fail:
91-
if: ${{ failure() }}
92-
needs: [build_stable_stack, benchmark_report]
93-
name: "Clean up"
94-
runs-on: ["self-hosted"]
95-
permissions:
96-
contents: read
97-
issues: write # for failed-build-issue
98-
steps:
99-
- name: Authenticate gcloud
100-
run: gcloud auth configure-docker gcr.io --quiet
101-
- name: Delete TPU image
102-
# sync with the image uploaded from build_stable_stack stage
103-
run: gcloud container images delete gcr.io/cloud-ml-auto-solutions/jetstream-maxtext-stable-stack/tpu:github_${{ github.run_id }} --force-delete-tags --quiet
99+
# clean_up_on_fail:
100+
# if: ${{ failure() }}
101+
# needs: [build_stable_stack, benchmark_report]
102+
# name: "Clean up"
103+
# runs-on: ["self-hosted"]
104+
# permissions:
105+
# contents: read
106+
# issues: write # for failed-build-issue
107+
# steps:
108+
# - name: Authenticate gcloud
109+
# run: gcloud auth configure-docker gcr.io --quiet
110+
# - name: Delete TPU image
111+
# # sync with the image uploaded from build_stable_stack stage
112+
# run: gcloud container images delete gcr.io/cloud-ml-auto-solutions/jetstream-maxtext-stable-stack/tpu:github_${{ github.run_id }} --force-delete-tags --quiet
104113

105114
tag_night_image:
106115
needs: [build_stable_stack, benchmark_report]
@@ -126,12 +135,16 @@ jobs:
126135
name: Notify test build # creates an issue or modifies last open existing issue for failed build
127136
needs: [build_stable_stack, benchmark_report]
128137
runs-on: ["self-hosted", "tpu", "v6e-8"]
138+
env:
139+
BENCHMARK_REPORT_DIR: ./benchmark_report
129140
steps:
141+
- name: Clean previous artifact
142+
run: rm -rf ${{ env.OUTPUT_DIR }}
130143
- name: Download benchmark artifact
131144
uses: actions/download-artifact@v4
132145
with:
133146
name: benchmark_report
134-
path: ./benchmark_report
147+
path: ${{ env.OUTPUT_DIR }}
135148
- name: Check whether one of the jobs failed
136149
if: ${{ failure() }}
137150
uses: jayqi/failed-build-issue-action@1a893bbf43ef1c2a8705e2b115cd4f0fe3c5649b # v1.2.0
@@ -140,16 +153,19 @@ jobs:
140153
- name: Log message if dependent job succeeded
141154
if: ${{ ! (failure() && github.event.pull_request == null) }}
142155
run: echo "Conditions for creating/updating issue not met. Skipping."
143-
- name: Send email
144-
uses: dawidd6/action-send-mail@v3.6.0
145-
with:
146-
server_address: smtp.gmail.com
147-
server_port: 465
148-
username: ${{secrets.MAIL_USERNAME}}
149-
password: ${{secrets.MAIL_PASSWORD}}
150-
subject: Message from Inference Stable Stack Runs.
151-
to: singhvijaya@google.com, yuyanpeng@google.com, vipannalla@google.com
152-
from: JetStream Runs
153-
secure: true
154-
attachments: ./benchmark_report/moe_8x7b.txt,./benchmark_report/moe_8x22b.txt,./benchmark_report/moe_8x22b_long_context_8k_prefill.txt,./benchmark_report/moe_8x7b_jetstream.txt
155-
body: workflow for ${{github.repository}} completed successfully!
156+
- name: cat report
157+
run: |
158+
cat ${{ env.OUTPUT_DIR }}/moe_8x7b.txt ${{ env.OUTPUT_DIR }}/moe_8x22b.txt ${{ env.OUTPUT_DIR }}/moe_8x22b_long_context_8k_prefill.txt ${{ env.OUTPUT_DIR }}/moe_8x7b_jetstream.txt
159+
# - name: Send email
160+
# uses: dawidd6/action-send-mail@v3.6.0
161+
# with:
162+
# server_address: smtp.gmail.com
163+
# server_port: 465
164+
# username: ${{secrets.MAIL_USERNAME}}
165+
# password: ${{secrets.MAIL_PASSWORD}}
166+
# subject: Message from Inference Stable Stack Runs.
167+
# to: singhvijaya@google.com, yuyanpeng@google.com, vipannalla@google.com
168+
# from: JetStream Runs
169+
# secure: true
170+
# attachments: ./benchmark_report/moe_8x7b.txt,./benchmark_report/moe_8x22b.txt,./benchmark_report/moe_8x22b_long_context_8k_prefill.txt,./benchmark_report/moe_8x7b_jetstream.txt
171+
# body: workflow for ${{github.repository}} completed successfully!

.github/workflows/test_moe_benchmarks.sh

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,18 @@ python -c "import nltk; nltk.download('punkt')"
88
cd maxtext
99

1010
# moe 8x7b microbenchmark
11-
LIBTPU_INIT_ARGS="--xla_tpu_enable_windowed_einsum_for_reduce_scatter=false --xla_jf_spmd_threshold_for_windowed_einsum_mib=1000000" python -m MaxText.inference_microbenchmark MaxText/configs/inference.yml tokenizer_path=assets/tokenizer.mistral-v1 max_prefill_predict_length=1024 max_target_length=2048 model_name=mixtral-8x7b ici_fsdp_parallelism=1 ici_autoregressive_parallelism=1 ici_tensor_parallelism=1 ici_context_autoregressive_parallelism=8 scan_layers=false weight_dtype=bfloat16 per_device_batch_size=8 megablox=False quantization=int8 quantize_kvcache=False checkpoint_is_quantized=True load_parameters_path=gs://jetstream-runner/8-7B-int8 capacity_factor=1 attention=dot_product model_call_mode=inference sparse_matmul=False weight_dtype=bfloat16 > ${OUTPUT_DIR}/moe_8x7b.txt
11+
# LIBTPU_INIT_ARGS="--xla_tpu_enable_windowed_einsum_for_reduce_scatter=false --xla_jf_spmd_threshold_for_windowed_einsum_mib=1000000" python -m MaxText.inference_microbenchmark MaxText/configs/inference.yml tokenizer_path=assets/tokenizer.mistral-v1 max_prefill_predict_length=1024 max_target_length=2048 model_name=mixtral-8x7b ici_fsdp_parallelism=1 ici_autoregressive_parallelism=1 ici_tensor_parallelism=1 ici_context_autoregressive_parallelism=8 scan_layers=false weight_dtype=bfloat16 per_device_batch_size=8 megablox=False quantization=int8 quantize_kvcache=False checkpoint_is_quantized=True load_parameters_path=gs://jetstream-runner/8-7B-int8 capacity_factor=1 attention=dot_product model_call_mode=inference sparse_matmul=False weight_dtype=bfloat16 > ${OUTPUT_DIR}/moe_8x7b.txt
12+
echo "> ${OUTPUT_DIR}/moe_8x7b.txt" > ${OUTPUT_DIR}/moe_8x7b.txt
1213
tail -n5 ${OUTPUT_DIR}/moe_8x7b.txt > ${OUTPUT_DIR}/moe_8x7b.tmp && mv ${OUTPUT_DIR}/moe_8x7b.tmp ${OUTPUT_DIR}/moe_8x7b.txt
1314

1415
# moe 8x22B microbenchmark
15-
LIBTPU_INIT_ARGS="--xla_tpu_enable_windowed_einsum_for_reduce_scatter=false --xla_jf_spmd_threshold_for_windowed_einsum_mib=1000000" python -m MaxText.inference_microbenchmark MaxText/configs/inference.yml load_parameters_path=gs://jetstream-runner/8-22B-int8 max_prefill_predict_length=1024 max_target_length=2048 model_name=mixtral-8x22b ici_fsdp_parallelism=1 ici_autoregressive_parallelism=1 ici_tensor_parallelism=1 ici_context_autoregressive_parallelism=8 scan_layers=false per_device_batch_size=24 attention=dot_product megablox=False quantization=int8 checkpoint_is_quantized=True quantize_kvcache=True capacity_factor=1 tokenizer_path=assets/tokenizer.mistral-v3 inference_microbenchmark_prefill_lengths="128,1024" sparse_matmul=False model_call_mode=inference > ${OUTPUT_DIR}/moe_8x22b.txt
16+
# LIBTPU_INIT_ARGS="--xla_tpu_enable_windowed_einsum_for_reduce_scatter=false --xla_jf_spmd_threshold_for_windowed_einsum_mib=1000000" python -m MaxText.inference_microbenchmark MaxText/configs/inference.yml load_parameters_path=gs://jetstream-runner/8-22B-int8 max_prefill_predict_length=1024 max_target_length=2048 model_name=mixtral-8x22b ici_fsdp_parallelism=1 ici_autoregressive_parallelism=1 ici_tensor_parallelism=1 ici_context_autoregressive_parallelism=8 scan_layers=false per_device_batch_size=24 attention=dot_product megablox=False quantization=int8 checkpoint_is_quantized=True quantize_kvcache=True capacity_factor=1 tokenizer_path=assets/tokenizer.mistral-v3 inference_microbenchmark_prefill_lengths="128,1024" sparse_matmul=False model_call_mode=inference > ${OUTPUT_DIR}/moe_8x22b.txt
17+
echo "${OUTPUT_DIR}/moe_8x22b.txt" > ${OUTPUT_DIR}/moe_8x22b.txt
1618
tail -n5 ${OUTPUT_DIR}/moe_8x22b.txt > ${OUTPUT_DIR}/moe_8x22b.tmp && mv ${OUTPUT_DIR}/moe_8x22b.tmp ${OUTPUT_DIR}/moe_8x22b.txt
1719

1820
# moe 8x22B 8k context length chunked prefill with 2k prefill chunk size
19-
LIBTPU_INIT_ARGS="--xla_tpu_enable_windowed_einsum_for_reduce_scatter=false --xla_jf_spmd_threshold_for_windowed_einsum_mib=1000000" python -m MaxText.benchmark_chunked_prefill MaxText/configs/inference.yml load_parameters_path=gs://jetstream-runner/8-22B-int8 max_prefill_predict_length=8192 max_target_length=9000 model_name=mixtral-8x22b ici_fsdp_parallelism=1 ici_autoregressive_parallelism=1 ici_tensor_parallelism=1 scan_layers=false per_device_batch_size=24 attention=dot_product megablox=False quantization=int8 checkpoint_is_quantized=True quantize_kvcache=False capacity_factor=1 tokenizer_path=assets/tokenizer.mistral-v3 inference_microbenchmark_prefill_lengths="8192" sparse_matmul=False model_call_mode=inference ici_context_autoregressive_parallelism=8 use_chunked_prefill=True prefill_chunk_size=2048 > ${OUTPUT_DIR}/moe_8x22b_long_context_8k_prefill.txt
20-
tail -n5 ${OUTPUT_DIR}/moe_8x22b_long_context_8k_prefill.txt > ${OUTPUT_DIR}/moe_8x22b_long_context_8k_prefill.tmp && mv ${OUTPUT_DIR}/moe_8x22b_long_context_8k_prefill.tmp ${OUTPUT_DIR}/moe_8x22b_long_context_8k_prefill.txt
21+
# LIBTPU_INIT_ARGS="--xla_tpu_enable_windowed_einsum_for_reduce_scatter=false --xla_jf_spmd_threshold_for_windowed_einsum_mib=1000000" python -m MaxText.benchmark_chunked_prefill MaxText/configs/inference.yml load_parameters_path=gs://jetstream-runner/8-22B-int8 max_prefill_predict_length=8192 max_target_length=9000 model_name=mixtral-8x22b ici_fsdp_parallelism=1 ici_autoregressive_parallelism=1 ici_tensor_parallelism=1 scan_layers=false per_device_batch_size=24 attention=dot_product megablox=False quantization=int8 checkpoint_is_quantized=True quantize_kvcache=False capacity_factor=1 tokenizer_path=assets/tokenizer.mistral-v3 inference_microbenchmark_prefill_lengths="8192" sparse_matmul=False model_call_mode=inference ici_context_autoregressive_parallelism=8 use_chunked_prefill=True prefill_chunk_size=2048 > ${OUTPUT_DIR}/moe_8x22b_long_context_8k_prefill.txt
22+
# tail -n5 ${OUTPUT_DIR}/moe_8x22b_long_context_8k_prefill.txt > ${OUTPUT_DIR}/moe_8x22b_long_context_8k_prefill.tmp && mv ${OUTPUT_DIR}/moe_8x22b_long_context_8k_prefill.tmp ${OUTPUT_DIR}/moe_8x22b_long_context_8k_prefill.txt
2123

2224

2325
# moe 8x7B Maxtext Jetstream
@@ -28,7 +30,8 @@ sleep 600
2830

2931
cd ..
3032

31-
python JetStream/benchmarks/benchmark_serving.py --tokenizer maxtext/assets/tokenizer.mistral-v1 --save-result --save-request-outputs --request-outputs-file-path outputs.json --num-prompts 1200 --max-output-length 1024 --dataset openorca --run-eval True > ${OUTPUT_DIR}/moe_8x7b_jetstream.txt
33+
# python JetStream/benchmarks/benchmark_serving.py --tokenizer maxtext/assets/tokenizer.mistral-v1 --save-result --save-request-outputs --request-outputs-file-path outputs.json --num-prompts 1200 --max-output-length 1024 --dataset openorca --run-eval True > ${OUTPUT_DIR}/moe_8x7b_jetstream.txt
34+
echo "> ${OUTPUT_DIR}/moe_8x7b_jetstream.txt" > ${OUTPUT_DIR}/moe_8x7b_jetstream.txt
3235
tail -n25 ${OUTPUT_DIR}/moe_8x7b_jetstream.txt > ${OUTPUT_DIR}/moe_8x7b_jetstream.tmp && mv ${OUTPUT_DIR}/moe_8x7b_jetstream.tmp ${OUTPUT_DIR}/moe_8x7b_jetstream.txt
3336

3437
# kill Jetstream server

experimental/jetstream-maxtext-stable-stack/test_script/benchmark_chunked_prefill_example.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
exit 0
12
cd maxtext
23

34
LIBTPU_INIT_ARGS="--xla_tpu_enable_windowed_einsum_for_reduce_scatter=false --xla_jf_spmd_threshold_for_windowed_einsum_mib=1000000" \

0 commit comments

Comments
 (0)