Skip to content

Commit f8ec421

Browse files
Add Github Runner and tests (#246)
1. Add v6e-8 runner 2. Add basic MOE Microbenchmark tests
1 parent 035e13a commit f8ec421

3 files changed

Lines changed: 180 additions & 0 deletions

File tree

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
16+
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
17+
18+
name: Tests
19+
20+
on:
21+
# pull_request:
22+
# push:
23+
# branches: [ "main" ]
24+
workflow_dispatch:
25+
schedule:
26+
# Run the job every 4 hours
27+
- cron: '0 */1 * * *'
28+
29+
jobs:
30+
prelim:
31+
runs-on: ["self-hosted", "tpu", "v6e-8"]
32+
steps:
33+
- uses: actions/checkout@v4
34+
- name: Test MOE Microbenchmarks
35+
run: bash .github/workflows/test_moe_microbenchmarks.sh
36+
# run: bash .github/workflows/test_moe_8x22b_microbenchmark.sh
37+
# - name: Test MOE long context chunked prefill - 8k
38+
# run: bash .github/workflows/benchmark_chunked_prefill.sh
39+
40+
notify:
41+
name: Notify test build # creates an issue or modifies last open existing issue for failed build
42+
needs: [prelim]
43+
runs-on: ["self-hosted", "tpu", "v6e-8"]
44+
steps:
45+
- name: Check whether one of the jobs failed
46+
if: ${{ failure() }}
47+
uses: jayqi/failed-build-issue-action@1a893bbf43ef1c2a8705e2b115cd4f0fe3c5649b # v1.2.0
48+
with:
49+
github-token: ${{ secrets.GITHUB_TOKEN }}
50+
- name: Log message if dependent job succeeded
51+
if: ${{ ! (failure() && github.event.pull_request == null) }}
52+
run: echo "Conditions for creating/updating issue not met. Skipping."
53+
# - name: Send email
54+
# uses: dawidd6/action-send-mail@v3.6.0
55+
# with:
56+
# server_address: smtp.gmail.com
57+
# server_port: 465
58+
# username: ${{secrets.MAIL_USERNAME}}
59+
# password: ${{secrets.MAIL_PASSWORD}}
60+
# subject: Message from Inference Stable Stack Runs.
61+
# to: singhvijaya@google.com, yuyanpeng@google.com, vipannalla@google.com
62+
# from: InferenceStableStackRuns
63+
# secure: true
64+
# attachments: ~/test_dir/moe_8x7b_jetstream.txt
65+
# # attachments: ~/test_dir/moe_8x7b.txt,~/test_dir/moe_8x22b.txt,~/test_dir/moe_8x22b_long_context_8k_prefill.txt
66+
# body: workflow for ${{github.repository}} completed successfully!
67+
- name: Cleanup
68+
run: rm -rf ~/test_dir
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#!/bin/bash
2+
mkdir ~/test_dir
3+
cd ~/test_dir
4+
git clone https://github.com/google/maxtext.git
5+
6+
cd ~/test_dir
7+
git clone https://github.com/google/JetStream.git
8+
cd ~/test_dir
9+
sudo apt-get -y update
10+
sudo apt-get -y install python3.10-venv
11+
sudo apt-get -y install jq
12+
python -m venv .env
13+
source .env/bin/activate
14+
15+
cd ~/test_dir
16+
cd JetStream
17+
pip install -e .
18+
cd benchmarks
19+
pip install -r requirements.in
20+
21+
cd ~/test_dir
22+
cd maxtext/
23+
pip3 install wheel
24+
bash setup.sh MODE=stable DEVICE=tpu
25+
26+
pip install nltk==3.8.1
27+
28+
29+
# moe 8x7b microbenchmark
30+
LIBTPU_INIT_ARGS="--xla_tpu_enable_windowed_einsum_for_reduce_scatter=false --xla_jf_spmd_threshold_for_windowed_einsum_mib=1000000" python -m MaxText.inference_microbenchmark MaxText/configs/inference.yml tokenizer_path=assets/tokenizer.mistral-v1 max_prefill_predict_length=1024 max_target_length=2048 model_name=mixtral-8x7b ici_fsdp_parallelism=1 ici_autoregressive_parallelism=1 ici_tensor_parallelism=1 ici_context_autoregressive_parallelism=8 scan_layers=false weight_dtype=bfloat16 per_device_batch_size=8 megablox=False quantization=int8 quantize_kvcache=False checkpoint_is_quantized=True load_parameters_path=gs://jetstream-runner/8-7B-int8 capacity_factor=1 attention=dot_product model_call_mode=inference sparse_matmul=False weight_dtype=bfloat16 > ~/test_dir/moe_8x7b.txt
31+
tail -n5 ~/test_dir/moe_8x7b.txt > ~/test_dir/moe_8x7b.tmp && mv ~/test_dir/moe_8x7b.tmp ~/test_dir/moe_8x7b.txt
32+
33+
# moe 8x22B microbenchmark
34+
LIBTPU_INIT_ARGS="--xla_tpu_enable_windowed_einsum_for_reduce_scatter=false --xla_jf_spmd_threshold_for_windowed_einsum_mib=1000000" python -m MaxText.inference_microbenchmark MaxText/configs/inference.yml load_parameters_path=gs://jetstream-runner/8-22B-int8 max_prefill_predict_length=1024 max_target_length=2048 model_name=mixtral-8x22b ici_fsdp_parallelism=1 ici_autoregressive_parallelism=1 ici_tensor_parallelism=1 ici_context_autoregressive_parallelism=8 scan_layers=false per_device_batch_size=24 attention=dot_product megablox=False quantization=int8 checkpoint_is_quantized=True quantize_kvcache=True capacity_factor=1 tokenizer_path=assets/tokenizer.mistral-v3 inference_microbenchmark_prefill_lengths="128,1024" sparse_matmul=False model_call_mode=inference > ~/test_dir/moe_8x22b.txt
35+
tail -n5 ~/test_dir/moe_8x22b.txt > ~/test_dir/moe_8x22b.tmp && mv ~/test_dir/moe_8x22b.tmp ~/test_dir/moe_8x22b.txt
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
#!/bin/bash
2+
3+
# Copyright 2022 The JAX Authors.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# https://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
# Heavily influenced by
18+
# https://github.com/openxla/iree/tree/main/build_tools/github_actions/runner/config
19+
20+
# This file sets up a tpu vm to be used as a github runner for testing.
21+
# It creates a user runner without sudo permissions to
22+
# run the config file and authenticate to github
23+
24+
25+
set -eux
26+
27+
if [ "$#" -ne 4 ]; then
28+
echo "Usage: setup_runner.sh <runner name> <tags> <github token> <repo_url>"
29+
fi
30+
31+
runner_name="$1"
32+
runner_tags="$2"
33+
runner_token="$3"
34+
repo_url="$4"
35+
36+
37+
# Create `runner` user. This user won't have sudo access unless you ssh into the
38+
# GCP VM as `runner` using gcloud. Don't do that!
39+
sudo useradd runner -m
40+
ORIGIN_USER=${USER}
41+
# Find the latest actions-runner download. The runner will automatically update
42+
# itself when new versions are released. Github requires that all self-hosted
43+
# runners are updated to the latest version within 30 days of release
44+
# (https://docs.github.com/en/actions/hosting-your-own-runners/autoscaling-with-self-hosted-runners#controlling-runner-software-updates-on-self-hosted-runners).
45+
# Example URL:
46+
# https://github.com/actions/runner/releases/download/v2.298.2/actions-runner-linux-x64-2.298.2.tar.gz
47+
actions_runner_download_regexp='https://github.com/actions/runner/releases/'\
48+
'download/v[0-9.]\+/actions-runner-linux-x64-[0-9.]\+\.tar\.gz'
49+
# Use `head -n 1` because there are multiple instances of the same URL
50+
actions_runner_download=$(
51+
curl -s -X GET 'https://api.github.com/repos/actions/runner/releases/latest' |
52+
grep -o "${actions_runner_download_regexp}" |
53+
head -n 1)
54+
echo "actions_runner_download: ${actions_runner_download}"
55+
56+
# Run the rest of the setup as `runner`.
57+
#
58+
# Note that env vars in the heredoc will be expanded according to the _calling_
59+
# environment, not the `runner` environment we're creating -- it acts like a
60+
# double-quoted string. This is how variables like $runner_name are inserted
61+
# without using sudo -E (which would cause the current environment to be
62+
# inherited). This also means we must be careful to escape any variables that
63+
# we'd like to evaluate in the `runner` environment, e.g. $HOME.
64+
sudo -i -u runner bash -ex <<EOF
65+
cd ~/
66+
67+
mkdir actions-runner && cd actions-runner
68+
curl -o actions-runner-linux-x64.tar.gz -L ${actions_runner_download}
69+
tar xzf ./actions-runner-linux-x64.tar.gz
70+
# Register the runner with Github
71+
./config.sh --unattended \
72+
--url ${repo_url} \
73+
--labels ${runner_tags} \
74+
--token ${runner_token} \
75+
--name ${runner_name}
76+
77+
EOF

0 commit comments

Comments
 (0)