Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions .github/workflows/UploadDockerImages.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,11 @@ jobs:
- device: tpu
build_mode: stable
image_name: maxtext_jax_stable
dockerfile: ./dependencies/dockerfiles/maxtext_tpu_dependencies.Dockerfile
dockerfile: ./src/dependencies/dockerfiles/maxtext_tpu_dependencies.Dockerfile
- device: tpu
build_mode: nightly
image_name: maxtext_jax_nightly
dockerfile: ./dependencies/dockerfiles/maxtext_tpu_dependencies.Dockerfile
dockerfile: ./src/dependencies/dockerfiles/maxtext_tpu_dependencies.Dockerfile
uses: ./.github/workflows/build_and_push_docker_image.yml
with:
image_name: ${{ matrix.image_name }}
Expand All @@ -88,7 +88,7 @@ jobs:
device: tpu
build_mode: stable
workflow: post-training
dockerfile: ./dependencies/dockerfiles/maxtext_tpu_dependencies.Dockerfile
dockerfile: ./src/dependencies/dockerfiles/maxtext_tpu_dependencies.Dockerfile
maxtext_sha: ${{ needs.setup.outputs.maxtext_sha }}
image_date: ${{ needs.setup.outputs.image_date }}

Expand All @@ -101,7 +101,7 @@ jobs:
device: tpu
build_mode: nightly
workflow: post-training
dockerfile: ./dependencies/dockerfiles/maxtext_post_training_local_dependencies.Dockerfile
dockerfile: ./src/dependencies/dockerfiles/maxtext_post_training_local_dependencies.Dockerfile
maxtext_sha: ${{ needs.setup.outputs.maxtext_sha }}
image_date: ${{ needs.setup.outputs.image_date }}
base_image: gcr.io/tpu-prod-env-multipod/maxtext_post_training_stable:${{ needs.setup.outputs.image_date }}
Expand All @@ -116,11 +116,11 @@ jobs:
- device: gpu
build_mode: stable
image_name: maxtext_gpu_jax_stable
dockerfile: ./dependencies/dockerfiles/maxtext_gpu_dependencies.Dockerfile
dockerfile: ./src/dependencies/dockerfiles/maxtext_gpu_dependencies.Dockerfile
- device: gpu
build_mode: nightly
image_name: maxtext_gpu_jax_nightly
dockerfile: ./dependencies/dockerfiles/maxtext_gpu_dependencies.Dockerfile
dockerfile: ./src/dependencies/dockerfiles/maxtext_gpu_dependencies.Dockerfile
uses: ./.github/workflows/build_and_push_docker_image.yml
with:
image_name: ${{ matrix.image_name }}
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/build_and_upload_images.sh
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ if [[ ! -v CLOUD_IMAGE_NAME ]] || [[ ! -v PROJECT ]] || [[ ! -v MODE ]] || [[ !
fi

gcloud auth configure-docker us-docker.pkg.dev --quiet
bash "$MAXTEXT_REPO_ROOT"'/dependencies/scripts/docker_build_dependency_image.sh' LOCAL_IMAGE_NAME=$LOCAL_IMAGE_NAME MODE="$MODE" DEVICE="$DEVICE"
bash "$MAXTEXT_REPO_ROOT"'/src/dependencies/scripts/docker_build_dependency_image.sh' LOCAL_IMAGE_NAME=$LOCAL_IMAGE_NAME MODE="$MODE" DEVICE="$DEVICE"
image_date=$(date +%Y-%m-%d)

# Upload only dependencies image
Expand All @@ -65,7 +65,7 @@ if ! gcloud storage cp gs://maxtext-test-assets/* "${MAXTEXT_TEST_ASSETS_ROOT:-$
fi

# Build then upload "dependencies + code" image
docker build --build-arg BASEIMAGE=${LOCAL_IMAGE_NAME} -f "$MAXTEXT_REPO_ROOT"'/dependencies/dockerfiles/maxtext_runner.Dockerfile' -t ${LOCAL_IMAGE_NAME}_runner .
docker build --build-arg BASEIMAGE=${LOCAL_IMAGE_NAME} -f "$MAXTEXT_REPO_ROOT"'/src/dependencies/dockerfiles/maxtext_runner.Dockerfile' -t ${LOCAL_IMAGE_NAME}_runner .
docker tag ${LOCAL_IMAGE_NAME}_runner gcr.io/$PROJECT/${CLOUD_IMAGE_NAME}:latest
docker push gcr.io/$PROJECT/${CLOUD_IMAGE_NAME}:latest
docker tag ${LOCAL_IMAGE_NAME}_runner gcr.io/$PROJECT/${CLOUD_IMAGE_NAME}:${image_date}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/check_docs_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
run: uv venv --python 3.12 $GITHUB_WORKSPACE/venv

- name: Install dependencies
run: . $GITHUB_WORKSPACE/venv/bin/activate && uv pip install -r dependencies/requirements/requirements_docs.txt
run: . $GITHUB_WORKSPACE/venv/bin/activate && uv pip install -r src/dependencies/requirements/requirements_docs.txt

- name: Build documentation
run: |
Expand Down
2 changes: 1 addition & 1 deletion .readthedocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@ sphinx:
# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
python:
install:
- requirements: dependencies/requirements/requirements_docs.txt
- requirements: src/dependencies/requirements/requirements_docs.txt
2 changes: 1 addition & 1 deletion PREFLIGHT.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ bash preflight.sh PLATFORM=GCE && numactl --membind 0 --cpunodebind=0 python3 -m
```

For GKE,
`numactl` should be built into your docker image from [maxtext_tpu_dependencies.Dockerfile](https://github.com/google/maxtext/blob/main/dependencies/dockerfiles/maxtext_tpu_dependencies.Dockerfile), so you can use it directly if you built the maxtext docker image. Here is an example
`numactl` should be built into your docker image from [maxtext_tpu_dependencies.Dockerfile](https://github.com/google/maxtext/blob/main/src/dependencies/dockerfiles/maxtext_tpu_dependencies.Dockerfile), so you can use it directly if you built the maxtext docker image. Here is an example

```
bash preflight.sh PLATFORM=GKE && numactl --membind 0 --cpunodebind=0 python3 -m maxtext.trainers.pre_train.train src/maxtext/configs/base.yml run_name=${YOUR_JOB_NAME?}
Expand Down
2 changes: 1 addition & 1 deletion build_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import os
from hatchling.builders.hooks.plugin.interface import BuildHookInterface

TPU_REQUIREMENTS_PATH = "dependencies/requirements/generated_requirements/tpu-requirements.txt"
TPU_REQUIREMENTS_PATH = "src/dependencies/requirements/generated_requirements/tpu-requirements.txt"


def get_tpu_dependencies():
Expand Down
2 changes: 1 addition & 1 deletion docs/development.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ If you are writing documentation for MaxText, you may want to preview the docume
First, make sure you install the necessary dependencies. You can do this by navigating to your local clone of the MaxText repo and running:

```bash
pip install -r dependencies/requirements/requirements_docs.txt
pip install -r src/dependencies/requirements/requirements_docs.txt
```

Once the dependencies are installed, you can navigate to the `docs/` folder and run:
Expand Down
4 changes: 2 additions & 2 deletions docs/install_maxtext.md
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ Run the following command, replacing `<jax-build-commit-hash>` with the hash you

```bash
seed-env \
--local-requirements=dependencies/requirements/base_requirements/tpu-base-requirements.txt \
--local-requirements=src/dependencies/requirements/base_requirements/tpu-base-requirements.txt \
--host-name=MaxText \
--seed-commit=<jax-build-commit-hash> \
--python-version=3.12 \
Expand All @@ -141,7 +141,7 @@ Similarly, run the command for the GPU requirements.

```bash
seed-env \
--local-requirements=dependencies/requirements/base_requirements/cuda12-base-requirements.txt \
--local-requirements=src/dependencies/requirements/base_requirements/cuda12-base-requirements.txt \
--host-name=MaxText \
--seed-commit=<jax-build-commit-hash> \
--python-version=3.12 \
Expand Down
4 changes: 2 additions & 2 deletions docs/run_maxtext/run_maxtext_via_pathways.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ Before you can run a MaxText workload, you must complete the following setup ste
Step 1: Build the Docker image for a TPU device. This image contains MaxText and its dependencies.

```shell
bash dependencies/scripts/docker_build_dependency_image.sh DEVICE=tpu MODE=stable
bash src/dependencies/scripts/docker_build_dependency_image.sh DEVICE=tpu MODE=stable
```

Step 2: Configure Docker to authenticate with Google Cloud
Expand All @@ -52,7 +52,7 @@ Before you can run a MaxText workload, you must complete the following setup ste
Step 3: Upload the image to your project's registry. Replace `$USER_runner` with your desired image name.

```shell
bash dependencies/scripts/docker_upload_runner.sh CLOUD_IMAGE_NAME=$USER_runner
bash src/dependencies/scripts/docker_upload_runner.sh CLOUD_IMAGE_NAME=$USER_runner
```

## 2. Environment configuration
Expand Down
4 changes: 2 additions & 2 deletions docs/run_maxtext/run_maxtext_via_xpk.md
Original file line number Diff line number Diff line change
Expand Up @@ -130,13 +130,13 @@ ______________________________________________________________________
- **For TPUs:**

```
bash dependencies/scripts/docker_build_dependency_image.sh DEVICE=tpu MODE=stable
bash src/dependencies/scripts/docker_build_dependency_image.sh DEVICE=tpu MODE=stable
```
- **For GPUs:**
```
bash dependencies/scripts/docker_build_dependency_image.sh DEVICE=gpu MODE=stable
bash src/dependencies/scripts/docker_build_dependency_image.sh DEVICE=gpu MODE=stable
```
______________________________________________________________________
Expand Down
2 changes: 1 addition & 1 deletion docs/tutorials/first_run.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ You can use [demo_decoding.ipynb](https://github.com/AI-Hypercomputer/maxtext/bl

### Run MaxText on NVIDIA GPUs

1. Use `bash dependencies/scripts/docker_build_dependency_image.sh DEVICE=gpu` to build a container with the required dependencies.
1. Use `bash src/dependencies/scripts/docker_build_dependency_image.sh DEVICE=gpu` to build a container with the required dependencies.
2. After installation is complete, run training with the following command on synthetic data:

```sh
Expand Down
8 changes: 4 additions & 4 deletions docs/tutorials/posttraining/rl_on_multi_host.md
Original file line number Diff line number Diff line change
Expand Up @@ -142,13 +142,13 @@ Run the following script to create a Docker image with stable releases of
MaxText, and its post-training dependencies. The build process takes approximately 10-15 minutes.

```bash
bash dependencies/scripts/docker_build_dependency_image.sh WORKFLOW=post-training
bash src/dependencies/scripts/docker_build_dependency_image.sh WORKFLOW=post-training
```

For experimental features (such as improved pathwaysutils resharding API), use:

```bash
bash dependencies/scripts/docker_build_dependency_image.sh WORKFLOW=post-training-experimental
bash src/dependencies/scripts/docker_build_dependency_image.sh WORKFLOW=post-training-experimental
```

### Option 2: From Github
Expand All @@ -159,7 +159,7 @@ For using a version newer than the latest PyPI release, you could also build the
git clone https://github.com/AI-Hypercomputer/maxtext.git
cd maxtext

bash dependencies/scripts/docker_build_dependency_image.sh WORKFLOW=post-training
bash src/dependencies/scripts/docker_build_dependency_image.sh WORKFLOW=post-training
```

### Upload the Docker Image
Expand All @@ -170,7 +170,7 @@ bash dependencies/scripts/docker_build_dependency_image.sh WORKFLOW=post-trainin
> project administrator if you don't have this permission.

```bash
bash dependencies/scripts/docker_upload_runner.sh CLOUD_IMAGE_NAME=${CLOUD_IMAGE_NAME?}
bash src/dependencies/scripts/docker_upload_runner.sh CLOUD_IMAGE_NAME=${CLOUD_IMAGE_NAME?}
```

## Submit your RL workload via Pathways
Expand Down
4 changes: 2 additions & 2 deletions docs/tutorials/posttraining/sft_on_multi_host.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ docker run hello-world
Then run the following command to create a local Docker image named `maxtext_base_image`. This build process takes approximately 10 to 15 minutes.

```bash
bash dependencies/scripts/docker_build_dependency_image.sh WORKFLOW=post-training
bash src/dependencies/scripts/docker_build_dependency_image.sh WORKFLOW=post-training
```

### 1.3. Upload the Docker image to Artifact Registry
Expand All @@ -61,7 +61,7 @@ bash dependencies/scripts/docker_build_dependency_image.sh WORKFLOW=post-trainin

```bash
export DOCKER_IMAGE_NAME=<Docker Image Name>
bash dependencies/scripts/docker_upload_runner.sh CLOUD_IMAGE_NAME=${DOCKER_IMAGE_NAME?}
bash src/dependencies/scripts/docker_upload_runner.sh CLOUD_IMAGE_NAME=${DOCKER_IMAGE_NAME?}
```

The `docker_upload_runner.sh` script uploads your Docker image to Artifact Registry.
Expand Down
12 changes: 7 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ classifiers = [
dependencies = []

[tool.hatch.metadata.hooks.requirements_txt.optional-dependencies]
tpu = ["dependencies/requirements/generated_requirements/tpu-requirements.txt"]
tpu-post-train = ["dependencies/requirements/generated_requirements/tpu-post-train-requirements.txt"]
cuda12 = ["dependencies/requirements/generated_requirements/cuda12-requirements.txt"]
docs = ["dependencies/requirements/requirements_docs.txt"]
tpu = ["src/dependencies/requirements/generated_requirements/tpu-requirements.txt"]
tpu-post-train = ["src/dependencies/requirements/generated_requirements/tpu-post-train-requirements.txt"]
cuda12 = ["src/dependencies/requirements/generated_requirements/cuda12-requirements.txt"]
docs = ["src/dependencies/requirements/requirements_docs.txt"]

[project.urls]
Repository = "https://github.com/AI-Hypercomputer/maxtext.git"
Expand All @@ -39,7 +39,7 @@ Repository = "https://github.com/AI-Hypercomputer/maxtext.git"
allow-direct-references = true

[tool.hatch.build.targets.wheel]
packages = ["src/MaxText", "src/maxtext", "src/install_maxtext_extra_deps"]
packages = ["src/MaxText", "src/maxtext", "src/install_maxtext_extra_deps", "src/dependencies"]

# TODO: Add this hook back when it handles device-type parsing
# [tool.hatch.build.targets.wheel.hooks.custom]
Expand All @@ -49,3 +49,5 @@ packages = ["src/MaxText", "src/maxtext", "src/install_maxtext_extra_deps"]
install_maxtext_tpu_github_deps = "install_maxtext_extra_deps.install_github_deps:main"
install_maxtext_cuda12_github_deps = "install_maxtext_extra_deps.install_github_deps:main"
install_maxtext_tpu_post_train_extra_deps = "install_maxtext_extra_deps.install_post_train_extra_deps:main"
docker_build_dependency_image = "dependencies.scripts.docker_build_dependency_image:main"
docker_upload_runner = "dependencies.scripts.docker_upload_runner:main"
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ WORKDIR /deps

# Copy setup files and dependency files separately for better caching
COPY tools/setup tools/setup/
COPY dependencies/requirements/ dependencies/requirements/
COPY src/dependencies/requirements/ src/dependencies/requirements/
COPY src/install_maxtext_extra_deps/extra_deps_from_github.txt src/install_maxtext_extra_deps/

# Install dependencies - these steps are cached unless the copied files change
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ WORKDIR /deps

# Copy setup files and dependency files separately for better caching
COPY tools/setup tools/setup/
COPY dependencies/requirements/ dependencies/requirements/
COPY src/dependencies/requirements/ src/dependencies/requirements/
COPY src/install_maxtext_extra_deps/ src/install_maxtext_extra_deps/
COPY src/maxtext/integration/vllm/ src/maxtext/integration/vllm/

Expand Down
27 changes: 27 additions & 0 deletions src/dependencies/scripts/docker_build_dependency_image.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Copyright 2026 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Wrapper to run docker_build_dependency_image.sh from pip install."""

import os
import sys


def main():
script_path = os.path.join(os.path.dirname(__file__), "docker_build_dependency_image.sh")
if not os.path.exists(script_path):
raise FileNotFoundError(f"Script not found at {script_path}")

cmd = ["bash", script_path] + sys.argv[1:]
os.execvp("bash", cmd)
Original file line number Diff line number Diff line change
Expand Up @@ -23,34 +23,34 @@
# ==================================

# Build docker image with stable dependencies
## bash dependencies/scripts/docker_build_dependency_image.sh DEVICE={{gpu|tpu}} MODE=stable
## bash src/dependencies/scripts/docker_build_dependency_image.sh DEVICE={{gpu|tpu}} MODE=stable

# Build docker image with nightly dependencies
## bash dependencies/scripts/docker_build_dependency_image.sh DEVICE={{gpu|tpu}} MODE=nightly
## bash src/dependencies/scripts/docker_build_dependency_image.sh DEVICE={{gpu|tpu}} MODE=nightly

# Build docker image with stable dependencies and, a pinned JAX_VERSION for TPUs
## bash dependencies/scripts/docker_build_dependency_image.sh MODE=stable JAX_VERSION=0.4.13
## bash src/dependencies/scripts/docker_build_dependency_image.sh MODE=stable JAX_VERSION=0.4.13

# Build docker image with a pinned JAX_VERSION and, a pinned LIBTPU_VERSION for TPUs
## bash dependencies/scripts/docker_build_dependency_image.sh MODE={{stable|nightly}} JAX_VERSION=0.8.1 LIBTPU_VERSION=0.0.31.dev20251119+nightly
## bash src/dependencies/scripts/docker_build_dependency_image.sh MODE={{stable|nightly}} JAX_VERSION=0.8.1 LIBTPU_VERSION=0.0.31.dev20251119+nightly

# Build docker image with a custom libtpu.so for TPUs
# Note: libtpu.so file must be present in the root directory of the MaxText repository
## bash dependencies/scripts/docker_build_dependency_image.sh MODE={{stable|nightly}}
## bash src/dependencies/scripts/docker_build_dependency_image.sh MODE={{stable|nightly}}

# Build docker image with nightly dependencies and, a pinned JAX_VERSION for GPUs
# Available versions listed at https://us-python.pkg.dev/ml-oss-artifacts-published/jax-public-nightly-artifacts-registry/simple/jax
## bash dependencies/scripts/docker_build_dependency_image.sh DEVICE=gpu MODE=nightly JAX_VERSION=0.4.36.dev20241109
## bash src/dependencies/scripts/docker_build_dependency_image.sh DEVICE=gpu MODE=nightly JAX_VERSION=0.4.36.dev20241109

# ==================================
# POST-TRAINING BUILD EXAMPLES
# ==================================

# Build docker image with stable pre-training dependencies and stable post-training dependencies
## bash dependencies/scripts/docker_build_dependency_image.sh WORKFLOW=post-training
## bash src/dependencies/scripts/docker_build_dependency_image.sh WORKFLOW=post-training

# Build docker image with stable pre-training dependencies and post-training dependencies from GitHub head
## bash dependencies/scripts/docker_build_dependency_image.sh WORKFLOW=post-training POST_TRAINING_SOURCE=local
## bash src/dependencies/scripts/docker_build_dependency_image.sh WORKFLOW=post-training POST_TRAINING_SOURCE=local

if [ "${BASH_SOURCE-}" ]; then
this_file="${BASH_SOURCE[0]}"
Expand All @@ -61,7 +61,7 @@ else
this_file="${0}"
fi

MAXTEXT_REPO_ROOT="${MAXTEXT_REPO_ROOT:-$(CDPATH='' cd -- "$(dirname -- "${this_file}")"'/../..' && pwd)}"
MAXTEXT_REPO_ROOT="${MAXTEXT_REPO_ROOT:-$(CDPATH='' cd -- "$(dirname -- "${this_file}")"'/../../..' && pwd)}"

# Enable "exit immediately if any command fails" option
set -e
Expand Down Expand Up @@ -132,7 +132,7 @@ build_post_training_deps_from_local_github() {
DOCKERFILE_NAME='maxtext_post_training_local_dependencies.Dockerfile'
echo "Building local post-training dependencies: $DOCKERFILE_NAME"

run_docker_build "$MAXTEXT_REPO_ROOT/dependencies/dockerfiles/$DOCKERFILE_NAME" \
run_docker_build "$MAXTEXT_REPO_ROOT/src/dependencies/dockerfiles/$DOCKERFILE_NAME" \
"MODE=${WORKFLOW}" "BASEIMAGE=${LOCAL_IMAGE_NAME}"
}

Expand All @@ -144,7 +144,7 @@ build_gpu_image() {
fi

echo "Building docker image with arguments: ${docker_build_args[*]}"
run_docker_build "$MAXTEXT_REPO_ROOT/dependencies/dockerfiles/maxtext_gpu_dependencies.Dockerfile" "${docker_build_args[@]}"
run_docker_build "$MAXTEXT_REPO_ROOT/src/dependencies/dockerfiles/maxtext_gpu_dependencies.Dockerfile" "${docker_build_args[@]}"
}

# Function to build image for TPUs
Expand All @@ -161,7 +161,7 @@ build_tpu_image() {
fi

echo "Building docker image with arguments: ${docker_build_args[*]}"
run_docker_build "$MAXTEXT_REPO_ROOT/dependencies/dockerfiles/maxtext_tpu_dependencies.Dockerfile" "${docker_build_args[@]}"
run_docker_build "$MAXTEXT_REPO_ROOT/src/dependencies/dockerfiles/maxtext_tpu_dependencies.Dockerfile" "${docker_build_args[@]}"

# Handle post-training workflow if specified
if [[ ${WORKFLOW} == "post-training" || ${WORKFLOW} == "post-training-experimental" ]]; then
Expand Down
Loading
Loading