Skip to content

Commit e58aa50

Browse files
committed
Merge branch 'main' into amangu-lora-3
2 parents 453db81 + 8fe9a20 commit e58aa50

79 files changed

Lines changed: 57509 additions & 946 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.coveragerc

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
[run]
2+
branch = True
3+
4+
[report]
5+
# Regexes for lines to exclude from consideration
6+
exclude_lines =
7+
# Don't complain if non-runnable code isn't run:
8+
if 0:
9+
if __name__ == .__main__.:
10+
11+
.*# pragma: no cover
12+
.*# pragma: no branch
13+

.github/CODEOWNERS

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
1-
* @JoeZijunZhou
1+
* @mailvijayasingh
2+
* @yuyanpeng-google
23
* @vipannalla

.github/workflows/add_label.yaml

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
name: Add Label
16+
17+
on:
18+
workflow_run:
19+
workflows: [Tests, CodeQL]
20+
types:
21+
- completed
22+
pull_request_review:
23+
pull_request_review_comment:
24+
workflow_dispatch:
25+
26+
jobs:
27+
AddPullReady:
28+
permissions:
29+
checks: read
30+
pull-requests: write
31+
runs-on: ubuntu-latest
32+
33+
steps:
34+
- uses: actions/github-script@v6
35+
with:
36+
script: |
37+
const owner = "google"
38+
const repo = "jetstream"
39+
let pull_number = -1
40+
if (context.payload.pull_request !== undefined) {
41+
pull_number = context.payload.pull_request.number
42+
} else if (context.payload.workflow_run !== undefined) {
43+
if (context.payload.workflow_run.pull_requests.length === 0) {
44+
console.log("This workflow is NOT running within a PR's context")
45+
process.exit(1)
46+
}
47+
console.log(context.payload.workflow_run.pull_requests)
48+
pull_number = context.payload.workflow_run.pull_requests[0].number
49+
} else {
50+
console.log("This workflow is running within an invalid context")
51+
process.exit(1)
52+
}
53+
54+
// This list should match with CODEOWNERS.
55+
const reviews = await github.rest.pulls.listReviews({
56+
owner,
57+
repo,
58+
pull_number,
59+
})
60+
61+
const pullRequest = await github.rest.pulls.get({
62+
owner,
63+
repo,
64+
pull_number,
65+
});
66+
const pullRequester = pullRequest.data.user.login;
67+
68+
if (reviews.data.length === 0) {
69+
console.log("Not adding pull ready because the PR is not approved yet.")
70+
process.exit(1)
71+
}
72+
let is_approved=false
73+
for (const review of reviews.data) {
74+
if (review.state === "APPROVED") {
75+
is_approved=true
76+
break;
77+
}
78+
}
79+
if (!is_approved) {
80+
console.log("Not adding pull ready because the PR is not approved yet by a code owner.")
81+
process.exit(1)
82+
}
83+
84+
const commits = await github.rest.pulls.listCommits({
85+
owner,
86+
repo,
87+
pull_number,
88+
per_page: 100,
89+
})
90+
// Check that the number of commits in the PR is 1.
91+
if (commits.data.length !== 1) {
92+
console.log("The PR has more than one commit. Please squash your commits.")
93+
}
94+
const ref = commits.data.slice(-1)[0].sha
95+
const checkRuns = await github.rest.checks.listForRef({
96+
owner,
97+
repo,
98+
ref,
99+
})
100+
if (checkRuns.data.check_runs.length === 0) {
101+
console.log("Not adding pull ready because no check runs are associated with the last commit: " + ref)
102+
process.exit(1)
103+
}
104+
for (const checkRun of checkRuns.data.check_runs) {
105+
if (checkRun.name.endsWith(context.job)) continue
106+
if (checkRun.conclusion !== "success") {
107+
console.log("Not adding pull ready because " + checkRun.name + " has not passed yet: " + checkRun.html_url)
108+
process.exit(1)
109+
}
110+
}
111+
console.log("Adding pull ready label because the PR is approved AND all the check runs have passed")
112+
await github.rest.issues.addLabels({
113+
issue_number: pull_number,
114+
labels: ["pull ready"],
115+
owner,
116+
repo,
117+
})

.github/workflows/release.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ jobs:
3333
runs-on: ${{ matrix.os }}
3434
strategy:
3535
matrix:
36-
os: [ubuntu-20.04]
36+
os: [ubuntu-24.04]
3737
python-version: ['3.10']
3838
steps:
3939
- name: Checkout
@@ -59,7 +59,7 @@ jobs:
5959
needs: release
6060
strategy:
6161
matrix:
62-
os: [ubuntu-20.04]
62+
os: [ubuntu-24.04]
6363
python-version: ['3.10']
6464
environment:
6565
name: pypi
Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# This workflow builds a stable stack for JetStream+Maxtext, runs benchmarks,
16+
# cleans up resources, and sends notifications.
17+
18+
name: Run Maxtext JetStream Tests
19+
20+
on:
21+
# pull_request:
22+
# push:
23+
# branches: [ "main" ]
24+
workflow_dispatch:
25+
schedule:
26+
# Run the job daily at midnight UTC
27+
- cron: '0 0 * * *'
28+
29+
jobs:
30+
prelim:
31+
runs-on: ["self-hosted", "tpu", "v6e-8"]
32+
steps:
33+
- name: Test gsutil installation
34+
run: which gsutil >/dev/null 2>&1 || { echo >&2 "gsutil is required but not installed. Aborting"; exit 24;}
35+
- name: Cleanup old docker images
36+
run: docker system prune --all --force
37+
- name: Authenticate gcloud
38+
run: gcloud auth configure-docker gcr.io --quiet
39+
40+
build_stable_stack:
41+
name: Build Stable Stack
42+
needs: prelim
43+
runs-on: ["self-hosted", "tpu", "v6e-8"]
44+
env:
45+
LOCAL_IMAGE_TAG: jetstream-maxtext-stable-stack/tpu:github_${{ github.run_id }}
46+
steps:
47+
- uses: actions/checkout@v4
48+
- name: Authenticate gcloud
49+
run: gcloud auth configure-docker gcr.io --quiet
50+
- name: Build
51+
run: |
52+
pushd experimental/jetstream-maxtext-stable-stack
53+
./build.sh \
54+
LOCAL_IMAGE_TAG="${LOCAL_IMAGE_TAG}"
55+
popd
56+
- name: Test
57+
run: |
58+
pushd experimental/jetstream-maxtext-stable-stack
59+
./test.sh \
60+
LOCAL_IMAGE_TAG=${LOCAL_IMAGE_TAG}
61+
popd
62+
- name: Upload image
63+
run: |
64+
UPLOAD_IMAGE_TAG=gcr.io/cloud-ml-auto-solutions/${LOCAL_IMAGE_TAG}
65+
docker tag ${LOCAL_IMAGE_TAG} ${UPLOAD_IMAGE_TAG}
66+
docker push ${UPLOAD_IMAGE_TAG}
67+
68+
benchmark_report:
69+
name: Benchmark Report
70+
needs: build_stable_stack
71+
runs-on: ["self-hosted", "tpu", "v6e-8"]
72+
env:
73+
OUTPUT_DIR: ./test_dir
74+
steps:
75+
- name: Test MOEBenchmarks
76+
run: |
77+
rm -rf ${OUTPUT_DIR}
78+
mkdir -p ${OUTPUT_DIR}
79+
# sync with the image uploaded from build_stable_stack stage
80+
# Report should generated in OUTPUT_DIR depend on ENV
81+
DOCKER_OUTPUT_DIR=/output
82+
docker run \
83+
-v ${OUTPUT_DIR}:${DOCKER_OUTPUT_DIR} \
84+
--env OUTPUT_DIR=${DOCKER_OUTPUT_DIR} \
85+
--privileged --net=host --rm -i \
86+
gcr.io/cloud-ml-auto-solutions/jetstream-maxtext-stable-stack/tpu:github_${{ github.run_id }} \
87+
bash -c "
88+
bash JetStream/.github/workflows/test_moe_benchmarks.sh
89+
"
90+
- name: Upload build artifact
91+
uses: actions/upload-artifact@v4
92+
with:
93+
name: benchmark_report
94+
path: ${{ env.OUTPUT_DIR }}
95+
96+
clean_up_on_fail:
97+
if: ${{ failure() }}
98+
needs: [build_stable_stack, benchmark_report]
99+
name: "Clean up"
100+
runs-on: ["self-hosted"]
101+
permissions:
102+
contents: read
103+
issues: write # for failed-build-issue
104+
steps:
105+
- name: Authenticate gcloud
106+
run: gcloud auth configure-docker gcr.io --quiet
107+
- name: Delete TPU image
108+
# sync with the image uploaded from build_stable_stack stage
109+
run: gcloud container images delete gcr.io/cloud-ml-auto-solutions/jetstream-maxtext-stable-stack/tpu:github_${{ github.run_id }} --force-delete-tags --quiet
110+
111+
tag_night_image:
112+
needs: [build_stable_stack, benchmark_report]
113+
name: "Tag night image"
114+
runs-on: ["self-hosted"]
115+
permissions:
116+
contents: read
117+
issues: write # for failed-build-issue
118+
steps:
119+
- name: Authenticate gcloud
120+
run: gcloud auth configure-docker gcr.io --quiet
121+
- name: Upload night image
122+
# sync with the image uploaded from build_stable_stack stage
123+
run: |
124+
UPLOAD_IMAGE_TAG=gcr.io/cloud-ml-auto-solutions/jetstream-maxtext-stable-stack/tpu:github_${{ github.run_id }}
125+
NIGHTLY_TAG=${UPLOAD_IMAGE_TAG%:*}:nightly
126+
NIGHTLY_TAG_DATE=${NIGHTLY_TAG}-$(date +"%Y%m%d")
127+
gcloud container images add-tag ${UPLOAD_IMAGE_TAG} ${NIGHTLY_TAG} --quiet
128+
gcloud container images add-tag ${UPLOAD_IMAGE_TAG} ${NIGHTLY_TAG_DATE} --quiet
129+
gcloud container images untag ${UPLOAD_IMAGE_TAG} --quiet
130+
131+
notify:
132+
name: Notify test build # creates an issue or modifies last open existing issue for failed build
133+
needs: [build_stable_stack, benchmark_report]
134+
runs-on: ["self-hosted", "tpu", "v6e-8"]
135+
env:
136+
BENCHMARK_REPORT_DIR: ./benchmark_report
137+
steps:
138+
- name: Clean previous artifact
139+
run: rm -rf ${{ env.BENCHMARK_REPORT_DIR }}
140+
- name: Download benchmark artifact
141+
uses: actions/download-artifact@v4
142+
with:
143+
name: benchmark_report
144+
path: ${{ env.BENCHMARK_REPORT_DIR }}
145+
- name: Check whether one of the jobs failed
146+
if: ${{ failure() }}
147+
uses: jayqi/failed-build-issue-action@1a893bbf43ef1c2a8705e2b115cd4f0fe3c5649b # v1.2.0
148+
with:
149+
github-token: ${{ secrets.GITHUB_TOKEN }}
150+
- name: Log message if dependent job succeeded
151+
if: ${{ ! (failure() && github.event.pull_request == null) }}
152+
run: echo "Conditions for creating/updating issue not met. Skipping."
153+
- name: Send email
154+
uses: dawidd6/action-send-mail@v3.6.0
155+
with:
156+
server_address: smtp.gmail.com
157+
server_port: 465
158+
username: ${{secrets.MAIL_USERNAME}}
159+
password: ${{secrets.MAIL_PASSWORD}}
160+
subject: Message from Inference Stable Stack Runs.
161+
to: singhvijaya@google.com, yuyanpeng@google.com, vipannalla@google.com
162+
from: JetStream Runs
163+
secure: true
164+
attachments: ${{ env.BENCHMARK_REPORT_DIR }}/moe_8x7b.txt,${{ env.BENCHMARK_REPORT_DIR }}/moe_8x22b.txt,${{ env.BENCHMARK_REPORT_DIR }}/moe_8x22b_long_context_8k_prefill.txt,${{ env.BENCHMARK_REPORT_DIR }}/moe_8x7b_jetstream.txt
165+
body: workflow for ${{github.repository}} completed successfully!
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#!/bin/bash
2+
3+
OUTPUT_DIR=${OUTPUT_DIR:-$(pwd)/test_dir}
4+
5+
pip install nltk==3.8.1
6+
python -c "import nltk; nltk.download('punkt')"
7+
8+
cd maxtext
9+
10+
# moe 8x7b microbenchmark
11+
LIBTPU_INIT_ARGS="--xla_tpu_enable_windowed_einsum_for_reduce_scatter=false --xla_jf_spmd_threshold_for_windowed_einsum_mib=1000000" python -m MaxText.inference_microbenchmark MaxText/configs/inference.yml tokenizer_path=assets/tokenizer.mistral-v1 max_prefill_predict_length=1024 max_target_length=2048 model_name=mixtral-8x7b ici_fsdp_parallelism=1 ici_autoregressive_parallelism=1 ici_tensor_parallelism=1 ici_context_autoregressive_parallelism=8 scan_layers=false weight_dtype=bfloat16 per_device_batch_size=8 megablox=False quantization=int8 quantize_kvcache=False checkpoint_is_quantized=True load_parameters_path=gs://jetstream-runner/8-7B-int8 capacity_factor=1 attention=dot_product model_call_mode=inference sparse_matmul=False weight_dtype=bfloat16 > ${OUTPUT_DIR}/moe_8x7b.txt
12+
tail -n5 ${OUTPUT_DIR}/moe_8x7b.txt > ${OUTPUT_DIR}/moe_8x7b.tmp && mv ${OUTPUT_DIR}/moe_8x7b.tmp ${OUTPUT_DIR}/moe_8x7b.txt
13+
14+
# moe 8x22B microbenchmark
15+
LIBTPU_INIT_ARGS="--xla_tpu_enable_windowed_einsum_for_reduce_scatter=false --xla_jf_spmd_threshold_for_windowed_einsum_mib=1000000" python -m MaxText.inference_microbenchmark MaxText/configs/inference.yml load_parameters_path=gs://jetstream-runner/8-22B-int8 max_prefill_predict_length=1024 max_target_length=2048 model_name=mixtral-8x22b ici_fsdp_parallelism=1 ici_autoregressive_parallelism=1 ici_tensor_parallelism=1 ici_context_autoregressive_parallelism=8 scan_layers=false per_device_batch_size=24 attention=dot_product megablox=False quantization=int8 checkpoint_is_quantized=True quantize_kvcache=True capacity_factor=1 tokenizer_path=assets/tokenizer.mistral-v3 inference_microbenchmark_prefill_lengths="128,1024" sparse_matmul=False model_call_mode=inference > ${OUTPUT_DIR}/moe_8x22b.txt
16+
tail -n5 ${OUTPUT_DIR}/moe_8x22b.txt > ${OUTPUT_DIR}/moe_8x22b.tmp && mv ${OUTPUT_DIR}/moe_8x22b.tmp ${OUTPUT_DIR}/moe_8x22b.txt
17+
18+
# moe 8x22B 8k context length chunked prefill with 2k prefill chunk size
19+
LIBTPU_INIT_ARGS="--xla_tpu_enable_windowed_einsum_for_reduce_scatter=false --xla_jf_spmd_threshold_for_windowed_einsum_mib=1000000" python -m MaxText.benchmark_chunked_prefill MaxText/configs/inference.yml load_parameters_path=gs://jetstream-runner/8-22B-int8 max_prefill_predict_length=8192 max_target_length=9000 model_name=mixtral-8x22b ici_fsdp_parallelism=1 ici_autoregressive_parallelism=1 ici_tensor_parallelism=1 scan_layers=false per_device_batch_size=24 attention=dot_product megablox=False quantization=int8 checkpoint_is_quantized=True quantize_kvcache=False capacity_factor=1 tokenizer_path=assets/tokenizer.mistral-v3 inference_microbenchmark_prefill_lengths="8192" sparse_matmul=False model_call_mode=inference ici_context_autoregressive_parallelism=8 use_chunked_prefill=True prefill_chunk_size=2048 > ${OUTPUT_DIR}/moe_8x22b_long_context_8k_prefill.txt
20+
tail -n5 ${OUTPUT_DIR}/moe_8x22b_long_context_8k_prefill.txt > ${OUTPUT_DIR}/moe_8x22b_long_context_8k_prefill.tmp && mv ${OUTPUT_DIR}/moe_8x22b_long_context_8k_prefill.tmp ${OUTPUT_DIR}/moe_8x22b_long_context_8k_prefill.txt
21+
22+
23+
# moe 8x7B Maxtext Jetstream
24+
25+
LIBTPU_INIT_ARGS="--xla_tpu_enable_windowed_einsum_for_reduce_scatter=false --xla_jf_spmd_threshold_for_windowed_einsum_mib=1000000" python -m MaxText.maxengine_server MaxText/configs/inference.yml tokenizer_path=assets/tokenizer.mistral-v1 max_prefill_predict_length=1024 max_target_length=2048 model_name=mixtral-8x7b ici_fsdp_parallelism=1 ici_autoregressive_parallelism=1 ici_tensor_parallelism=1 ici_context_autoregressive_parallelism=8 scan_layers=false weight_dtype=bfloat16 per_device_batch_size=24 megablox=False quantization=int8 quantize_kvcache=True checkpoint_is_quantized=True load_parameters_path=gs://jetstream-runner/8-7B-int8 capacity_factor=1 attention=dot_product model_call_mode=inference sparse_matmul=False weight_dtype=bfloat16 &
26+
27+
sleep 600
28+
29+
cd ..
30+
31+
python JetStream/benchmarks/benchmark_serving.py --tokenizer maxtext/assets/tokenizer.mistral-v1 --save-result --save-request-outputs --request-outputs-file-path outputs.json --num-prompts 1200 --max-output-length 1024 --dataset openorca --run-eval True > ${OUTPUT_DIR}/moe_8x7b_jetstream.txt
32+
tail -n25 ${OUTPUT_DIR}/moe_8x7b_jetstream.txt > ${OUTPUT_DIR}/moe_8x7b_jetstream.tmp && mv ${OUTPUT_DIR}/moe_8x7b_jetstream.tmp ${OUTPUT_DIR}/moe_8x7b_jetstream.txt
33+
34+
# kill Jetstream server
35+
kill -9 %%

.github/workflows/unit_tests.yaml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ jobs:
3131
name: "Python type/lint/format checks"
3232
strategy:
3333
matrix:
34-
os: [ubuntu-20.04]
34+
os: [ubuntu-24.04]
3535
python-version: ['3.10']
3636
runs-on: ${{ matrix.os }}
3737
steps:
@@ -54,7 +54,7 @@ jobs:
5454
name: "JetStream unit tests"
5555
strategy:
5656
matrix:
57-
os: [ubuntu-20.04]
57+
os: [ubuntu-24.04]
5858
python-version: ['3.10']
5959
runs-on: ${{ matrix.os }}
6060
steps:
@@ -68,5 +68,7 @@ jobs:
6868
run: make install-deps
6969
- name: Run all unit tests in JetStream (jetstream/tests)
7070
run: make unit-tests
71+
- name: Run all py tests in JetStream (jetstream/tests)
72+
run: make py-tests
7173
- name: Create test coverage report
72-
run: make check-test-coverage
74+
run: make check-test-coverage

0 commit comments

Comments
 (0)