Skip to content

Commit 87928d0

Browse files
WIP
1 parent 19b2086 commit 87928d0

3 files changed

Lines changed: 21 additions & 15 deletions

File tree

.github/workflows/run_maxtext_jetstream_tests.yaml

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -50,37 +50,42 @@ jobs:
5050
- name: Build
5151
run: |
5252
pushd experimental/jetstream-maxtext-stable-stack
53-
./build.sh \
53+
# ./build.sh \
5454
LOCAL_IMAGE_TAG="${LOCAL_IMAGE_TAG}"
5555
popd
5656
- name: Test
5757
run: |
5858
pushd experimental/jetstream-maxtext-stable-stack
59-
./test.sh \
59+
# ./test.sh \
6060
LOCAL_IMAGE_TAG=${LOCAL_IMAGE_TAG}
6161
popd
6262
- name: Upload image
6363
run: |
6464
UPLOAD_IMAGE_TAG=gcr.io/cloud-ml-auto-solutions/${LOCAL_IMAGE_TAG}
65-
docker tag ${LOCAL_IMAGE_TAG} ${UPLOAD_IMAGE_TAG}
66-
docker push ${UPLOAD_IMAGE_TAG}
65+
gcloud container images add-tag gcr.io/cloud-ml-auto-solutions/jetstream-maxtext-stable-stack/tpu:nightly ${UPLOAD_IMAGE_TAG} --quiet
66+
6767
6868
benchmark_report:
6969
name: Benchmark Report
7070
needs: build_stable_stack
7171
runs-on: ["self-hosted", "tpu", "v6e-8"]
72-
container:
73-
# sync with the image uploaded from build_stable_stack stage
74-
image: gcr.io/cloud-ml-auto-solutions/jetstream-maxtext-stable-stack/tpu:github_${{ github.run_id }}
75-
options: "--privileged"
7672
env:
77-
OUTPUT_DIR: /workspace/test_dir/
73+
OUTPUT_DIR: ./test_dir/
7874
steps:
79-
- name: Create output directory # Ensure directory exists in container
80-
run: mkdir -p ${OUTPUT_DIR}
8175
- name: Test MOEBenchmarks
82-
# Report should generated in OUTPUT_DIR depend on ENV
83-
run: bash JetStream/.github/workflows/test_moe_benchmarks.sh
76+
run: |
77+
mkdir -p ${OUTPUT_DIR}
78+
# sync with the image uploaded from build_stable_stack stage
79+
# Report should generated in OUTPUT_DIR depend on ENV
80+
DOCKER_OUTPUT_DIR=/output
81+
docker run \
82+
-v ${OUTPUT_DIR}:${DOCKER_OUTPUT_DIR}} \
83+
--env OUTPUT_DIR:${DOCKER_OUTPUT_DIR} \
84+
--privileged --net=host --rm -i \
85+
gcr.io/cloud-ml-auto-solutions/jetstream-maxtext-stable-stack/tpu:github_${{ github.run_id }} \
86+
bash -c "
87+
bash JetStream/.github/workflows/test_moe_benchmarks.sh
88+
"
8489
- name: Upload build artifact
8590
uses: actions/upload-artifact@v4
8691
with:

.github/workflows/test_moe_benchmarks.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ LIBTPU_INIT_ARGS="--xla_tpu_enable_windowed_einsum_for_reduce_scatter=false --xl
1616
tail -n5 ${OUTPUT_DIR}/moe_8x22b.txt > ${OUTPUT_DIR}/moe_8x22b.tmp && mv ${OUTPUT_DIR}/moe_8x22b.tmp ${OUTPUT_DIR}/moe_8x22b.txt
1717

1818
# moe 8x22B 8k context length chunked prefill with 2k prefill chunk size
19-
LIBTPU_INIT_ARGS="--xla_tpu_enable_windowed_einsum_for_reduce_scatter=false --xla_jf_spmd_threshold_for_windowed_einsum_mib=1000000" python -m MaxText.benchmark_chunked_prefill MaxText/configs/inference.yml load_parameters_path=gs://jetstream-runner/8-22B-int8 max_prefill_predict_length=8192 max_target_length=9000 model_name=mixtral-8x22b ici_fsdp_parallelism=1 ici_autoregressive_parallelism=1 ici_tensor_parallelism=1 scan_layers=false per_device_batch_size=24 attention=dot_product megablox=False quantization=int8 checkpoint_is_quantized=True quantize_kvcache=False capacity_factor=1 tokenizer_path=assets/tokenizer.mistral-v3 inference_microbenchmark_prefill_lengths="8192" sparse_matmul=False model_call_mode=inference ici_context_autoregressive_parallelism=8 use_chunked_prefill=True prefill_chunk_size=2048 > ${OUTPUT_DIR}/moe_8x22b_long_context_8k_prefill.txt
20-
tail -n5 ${OUTPUT_DIR}/moe_8x22b_long_context_8k_prefill.txt > ${OUTPUT_DIR}/moe_8x22b_long_context_8k_prefill.tmp && mv ${OUTPUT_DIR}/moe_8x22b_long_context_8k_prefill.tmp ${OUTPUT_DIR}/moe_8x22b_long_context_8k_prefill.txt
19+
# LIBTPU_INIT_ARGS="--xla_tpu_enable_windowed_einsum_for_reduce_scatter=false --xla_jf_spmd_threshold_for_windowed_einsum_mib=1000000" python -m MaxText.benchmark_chunked_prefill MaxText/configs/inference.yml load_parameters_path=gs://jetstream-runner/8-22B-int8 max_prefill_predict_length=8192 max_target_length=9000 model_name=mixtral-8x22b ici_fsdp_parallelism=1 ici_autoregressive_parallelism=1 ici_tensor_parallelism=1 scan_layers=false per_device_batch_size=24 attention=dot_product megablox=False quantization=int8 checkpoint_is_quantized=True quantize_kvcache=False capacity_factor=1 tokenizer_path=assets/tokenizer.mistral-v3 inference_microbenchmark_prefill_lengths="8192" sparse_matmul=False model_call_mode=inference ici_context_autoregressive_parallelism=8 use_chunked_prefill=True prefill_chunk_size=2048 > ${OUTPUT_DIR}/moe_8x22b_long_context_8k_prefill.txt
20+
# tail -n5 ${OUTPUT_DIR}/moe_8x22b_long_context_8k_prefill.txt > ${OUTPUT_DIR}/moe_8x22b_long_context_8k_prefill.tmp && mv ${OUTPUT_DIR}/moe_8x22b_long_context_8k_prefill.tmp ${OUTPUT_DIR}/moe_8x22b_long_context_8k_prefill.txt
2121

2222

2323
# moe 8x7B Maxtext Jetstream

experimental/jetstream-maxtext-stable-stack/test_script/benchmark_chunked_prefill_example.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
exit 0
12
cd maxtext
23

34
LIBTPU_INIT_ARGS="--xla_tpu_enable_windowed_einsum_for_reduce_scatter=false --xla_jf_spmd_threshold_for_windowed_einsum_mib=1000000" \

0 commit comments

Comments
 (0)