Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/build_and_push_docker_image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ jobs:
DEVICE=${{ inputs.device }}
MODE=${{ inputs.build_mode }}
WORKFLOW=${{ inputs.workflow }}
PACKAGE_DIR=./src
JAX_VERSION=NONE
LIBTPU_VERSION=NONE
INCLUDE_TEST_ASSETS=true
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,5 +50,5 @@ packages = ["src/MaxText", "src/maxtext", "src/install_maxtext_extra_deps", "src
install_maxtext_tpu_github_deps = "install_maxtext_extra_deps.install_github_deps:main"
install_maxtext_cuda12_github_deps = "install_maxtext_extra_deps.install_github_deps:main"
install_maxtext_tpu_post_train_extra_deps = "install_maxtext_extra_deps.install_post_train_extra_deps:main"
docker_build_dependency_image = "dependencies.scripts.docker_build_dependency_image:main"
docker_upload_runner = "dependencies.scripts.docker_upload_runner:main"
build_maxtext_docker_image = "dependencies.scripts.build_maxtext_docker_image:main"
upload_maxtext_docker_image = "dependencies.scripts.upload_maxtext_docker_image:main"
2 changes: 1 addition & 1 deletion src/MaxText/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
"""

__author__ = "Google LLC"
__version__ = "0.2.0"
__version__ = "0.2.1"
__description__ = (
"MaxText is a high performance, highly scalable, open-source LLM written in pure Python/Jax and "
"targeting Google Cloud TPUs and GPUs for training and **inference."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ ENV ENV_JAX_VERSION=$JAX_VERSION
ARG DEVICE
ENV ENV_DEVICE=$DEVICE

ARG PACKAGE_DIR
ENV PACKAGE_DIR=$PACKAGE_DIR
Comment thread
SurbhiJainUSC marked this conversation as resolved.

ENV MAXTEXT_ASSETS_ROOT=/deps/src/maxtext/assets
ENV MAXTEXT_TEST_ASSETS_ROOT=/deps/tests/assets
ENV MAXTEXT_PKG_DIR=/deps/src/MaxText
Expand All @@ -47,16 +50,19 @@ ENV MAXTEXT_REPO_ROOT=/deps
WORKDIR /deps

# Copy setup files and dependency files separately for better caching
COPY tools/setup tools/setup/
COPY src/dependencies/requirements/ src/dependencies/requirements/
COPY src/install_maxtext_extra_deps/extra_deps_from_github.txt src/install_maxtext_extra_deps/
COPY ${PACKAGE_DIR}/dependencies/requirements/ src/dependencies/requirements/
COPY ${PACKAGE_DIR}/dependencies/scripts/ src/dependencies/scripts/
COPY ${PACKAGE_DIR}/install_maxtext_extra_deps/ src/install_maxtext_extra_deps/
COPY ${PACKAGE_DIR}/maxtext/integration/vllm/ src/maxtext/integration/vllm/

# Install dependencies - these steps are cached unless the copied files change
RUN echo "Running command: bash setup.sh MODE=$ENV_MODE JAX_VERSION=$ENV_JAX_VERSION DEVICE=${ENV_DEVICE}"
RUN --mount=type=cache,target=/root/.cache/pip --mount=type=cache,target=/root/.cache/uv bash /deps/tools/setup/setup.sh MODE=${ENV_MODE} JAX_VERSION=${ENV_JAX_VERSION} DEVICE=${ENV_DEVICE}
RUN --mount=type=cache,target=/root/.cache/uv \
export UV_LINK_MODE=copy && \
bash /deps/src/dependencies/scripts/setup.sh MODE=${ENV_MODE} JAX_VERSION=${ENV_JAX_VERSION} DEVICE=${ENV_DEVICE}

# Now copy the remaining code (source files that may change frequently)
COPY . .
COPY ${PACKAGE_DIR}/maxtext/ src/maxtext/

# Download test assets from GCS if building image with test assets
ARG INCLUDE_TEST_ASSETS=false
Expand All @@ -67,5 +73,4 @@ RUN if [ "$INCLUDE_TEST_ASSETS" = "true" ]; then \
fi; \
fi

# Install (editable) MaxText
RUN --mount=type=cache,target=/root/.cache/pip --mount=type=cache,target=/root/.cache/uv test -f '/tmp/venv_created' && "$(tail -n1 /tmp/venv_created)"/bin/activate ; pip install --no-dependencies -e .
ENV PYTHONPATH="/deps/src:${PYTHONPATH}"

This file was deleted.

8 changes: 4 additions & 4 deletions src/dependencies/dockerfiles/maxtext_runner.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
ARG BASEIMAGE=maxtext_base_image
FROM $BASEIMAGE

#FROM maxtext_base_image
ARG PACKAGE_DIR
ENV PACKAGE_DIR=$PACKAGE_DIR

ENV MAXTEXT_ASSETS_ROOT=/deps/src/maxtext/assets
ENV MAXTEXT_TEST_ASSETS_ROOT=/deps/tests/assets
Expand All @@ -14,8 +15,7 @@ ENV MAXTEXT_REPO_ROOT=/deps
WORKDIR /deps

# Copy assets separately
COPY src/maxtext/assets/ "${MAXTEXT_ASSETS_ROOT}"
COPY tests/assets/ "${MAXTEXT_TEST_ASSETS_ROOT}"
COPY ${PACKAGE_DIR}/maxtext/assets/ "${MAXTEXT_ASSETS_ROOT}"

# Copy all files except assets from local workspace into docker container
COPY --exclude="${MAXTEXT_ASSETS_ROOT}" --exclude="${MAXTEXT_TEST_ASSETS_ROOT}" . .
COPY --exclude=${PACKAGE_DIR}/maxtext/assets/ ${PACKAGE_DIR}/maxtext/ src/maxtext/
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ ENV ENV_LIBTPU_VERSION=$LIBTPU_VERSION
ARG DEVICE
ENV ENV_DEVICE=$DEVICE

ARG PACKAGE_DIR
ENV PACKAGE_DIR=$PACKAGE_DIR

ENV MAXTEXT_ASSETS_ROOT=/deps/src/maxtext/assets
ENV MAXTEXT_TEST_ASSETS_ROOT=/deps/tests/assets
ENV MAXTEXT_PKG_DIR=/deps/src/maxtext
Expand All @@ -44,20 +47,22 @@ ENV MAXTEXT_REPO_ROOT=/deps
WORKDIR /deps

# Copy setup files and dependency files separately for better caching
COPY tools/setup tools/setup/
COPY src/dependencies/requirements/ src/dependencies/requirements/
COPY src/install_maxtext_extra_deps/ src/install_maxtext_extra_deps/
COPY src/maxtext/integration/vllm/ src/maxtext/integration/vllm/
COPY ${PACKAGE_DIR}/dependencies/requirements/ src/dependencies/requirements/
COPY ${PACKAGE_DIR}/dependencies/scripts/ src/dependencies/scripts/
COPY ${PACKAGE_DIR}/install_maxtext_extra_deps/ src/install_maxtext_extra_deps/
COPY ${PACKAGE_DIR}/maxtext/integration/vllm/ src/maxtext/integration/vllm/

# Copy the custom libtpu.so file if it exists inside maxtext repository
# Copy the custom libtpu.so file if it exists
COPY libtpu.so* /root/custom_libtpu/

# Install dependencies - these steps are cached unless the copied files change
RUN echo "Running command: bash setup.sh MODE=$ENV_MODE WORKFLOW=$ENV_WORKFLOW JAX_VERSION=$ENV_JAX_VERSION LIBTPU_VERSION=$ENV_LIBTPU_VERSION DEVICE=${ENV_DEVICE}"
RUN --mount=type=cache,target=/root/.cache/pip --mount=type=cache,target=/root/.cache/uv bash /deps/tools/setup/setup.sh MODE=${ENV_MODE} WORKFLOW=${ENV_WORKFLOW} JAX_VERSION=${ENV_JAX_VERSION} LIBTPU_VERSION=${ENV_LIBTPU_VERSION} DEVICE=${ENV_DEVICE}
RUN --mount=type=cache,target=/root/.cache/uv \
export UV_LINK_MODE=copy && \
bash /deps/src/dependencies/scripts/setup.sh MODE=${ENV_MODE} WORKFLOW=${ENV_WORKFLOW} JAX_VERSION=${ENV_JAX_VERSION} LIBTPU_VERSION=${ENV_LIBTPU_VERSION} DEVICE=${ENV_DEVICE}

# Now copy the remaining code (source files that may change frequently)
COPY . .
COPY ${PACKAGE_DIR}/maxtext/ src/maxtext/

# Download test assets from GCS if building image with test assets
ARG INCLUDE_TEST_ASSETS=false
Expand All @@ -68,5 +73,4 @@ RUN if [ "$INCLUDE_TEST_ASSETS" = "true" ]; then \
fi; \
fi

# Install (editable) MaxText
RUN --mount=type=cache,target=/root/.cache/pip --mount=type=cache,target=/root/.cache/uv test -f '/tmp/venv_created' && "$(tail -n1 /tmp/venv_created)"/bin/activate ; pip install --no-dependencies -e .
ENV PYTHONPATH="/deps/src:${PYTHONPATH}"
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,12 @@


def main():
script_path = os.path.join(os.path.dirname(__file__), "docker_build_dependency_image.sh")
current_dir = os.path.dirname(os.path.abspath(__file__))
repo_root = os.path.abspath(os.path.join(current_dir, "..", ".."))
# Use relative path for Docker
os.environ["PACKAGE_DIR"] = os.path.relpath(repo_root, os.getcwd())

script_path = os.path.join(current_dir, "docker_build_dependency_image.sh")
if not os.path.exists(script_path):
raise FileNotFoundError(f"Script not found at {script_path}")

Expand Down
17 changes: 5 additions & 12 deletions src/dependencies/scripts/docker_build_dependency_image.sh
Original file line number Diff line number Diff line change
Expand Up @@ -49,16 +49,8 @@
# Build docker image with post-training dependencies
## bash src/dependencies/scripts/docker_build_dependency_image.sh WORKFLOW=post-training

if [ "${BASH_SOURCE-}" ]; then
this_file="${BASH_SOURCE[0]}"
elif [ "${ZSH_VERSION-}" ]; then
# shellcheck disable=SC2296
this_file="${(%):-%x}"
else
this_file="${0}"
fi

MAXTEXT_REPO_ROOT="${MAXTEXT_REPO_ROOT:-$(CDPATH='' cd -- "$(dirname -- "${this_file}")"'/../../..' && pwd)}"
PACKAGE_DIR="${PACKAGE_DIR:-src}"
echo "PACKAGE_DIR: $PACKAGE_DIR"

# Enable "exit immediately if any command fails" option
set -e
Expand Down Expand Up @@ -107,6 +99,7 @@ docker_build_args=(
"WORKFLOW=${WORKFLOW}"
"MODE=${MODE}"
"JAX_VERSION=${JAX_VERSION}"
"PACKAGE_DIR=${PACKAGE_DIR}"
)

run_docker_build() {
Expand All @@ -123,7 +116,7 @@ build_gpu_image() {
fi

echo "Building docker image with arguments: ${docker_build_args[*]}"
run_docker_build "$MAXTEXT_REPO_ROOT/src/dependencies/dockerfiles/maxtext_gpu_dependencies.Dockerfile" "${docker_build_args[@]}"
run_docker_build "$PACKAGE_DIR/dependencies/dockerfiles/maxtext_gpu_dependencies.Dockerfile" "${docker_build_args[@]}"
}

# Function to build image for TPUs
Expand All @@ -140,7 +133,7 @@ build_tpu_image() {
fi

echo "Building docker image with arguments: ${docker_build_args[*]}"
run_docker_build "$MAXTEXT_REPO_ROOT/src/dependencies/dockerfiles/maxtext_tpu_dependencies.Dockerfile" "${docker_build_args[@]}"
run_docker_build "$PACKAGE_DIR/dependencies/dockerfiles/maxtext_tpu_dependencies.Dockerfile" "${docker_build_args[@]}"
}

if [[ ${DEVICE} == "gpu" ]]; then
Expand Down
26 changes: 8 additions & 18 deletions src/dependencies/scripts/docker_upload_runner.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,10 @@
# (minutes). However, if you are simply changing local code and not updating dependencies, uploading just takes a few seconds.

# Example command:
# bash docker_upload_runner.sh CLOUD_IMAGE_NAME=${USER}_runner

if [ "${BASH_SOURCE-}" ]; then
this_file="${BASH_SOURCE[0]}"
elif [ "${ZSH_VERSION-}" ]; then
# shellcheck disable=SC2296
this_file="${(%):-%x}"
else
this_file="${0}"
fi
# bash src/dependencies/scripts/docker_upload_runner.sh CLOUD_IMAGE_NAME=${USER}_runner

MAXTEXT_REPO_ROOT="${MAXTEXT_REPO_ROOT:-$(CDPATH='' cd -- "$(dirname -- "${this_file}")"'/../../..' && pwd)}"
PACKAGE_DIR="${PACKAGE_DIR:-src}"
echo "PACKAGE_DIR: $PACKAGE_DIR"

set -e

Expand Down Expand Up @@ -85,6 +77,7 @@ if [ -n "$DANGLING_LINKS" ]; then
echo "$DANGLING_LINKS"
echo "These can cause 'failed to compute cache key' errors during 'docker build'."
echo "Please remove or fix them before building the Docker image."
echo "Alternatively, run the command again from a clean, empty directory to bypass your local file state entirely."
exit 1
fi

Expand All @@ -95,23 +88,20 @@ if [ -n "$ABSOLUTE_LINKS" ]; then
echo "$ABSOLUTE_LINKS"
echo "Docker cannot follow absolute paths outside of the build context, which can cause 'failed to compute cache key' errors."
echo "Please remove these links or convert them to relative paths before building the Docker image."
echo "Alternatively, run the command again from a clean, empty directory to bypass your local file state entirely."
exit 1
fi

# Download other test assets from GCS into ${MAXTEXT_TEST_ASSETS_ROOT:-${MAXTEXT_REPO_ROOT:-$PWD}}/tests/assets/golden_logits
# if ! gcloud storage cp gs://maxtext-test-assets/* "${MAXTEXT_TEST_ASSETS_ROOT:-${MAXTEXT_REPO_ROOT:-$PWD}/tests/assets/golden_logits}"; then
# echo "WARNING: Failed to download test assets from GCS. These files are only used for end-to-end tests; you may not have access to the bucket."
# fi

# Check if the base image exists locally
if ! docker image inspect "${LOCAL_IMAGE_NAME}" &> /dev/null; then
echo "ERROR: Base image '${LOCAL_IMAGE_NAME}' not found locally."
echo "Please build it first by running 'bash docker_build_dependency_image.sh'."
echo "Please build it first by running 'build_maxtext_docker_image'."
exit 1
fi

docker build --no-cache --build-arg BASEIMAGE=${LOCAL_IMAGE_NAME} \
-f "$MAXTEXT_REPO_ROOT"'/src/dependencies/dockerfiles/maxtext_runner.Dockerfile' \
--build-arg PACKAGE_DIR=${PACKAGE_DIR} \
-f "$PACKAGE_DIR"'/dependencies/dockerfiles/maxtext_runner.Dockerfile' \
-t ${LOCAL_IMAGE_NAME_RUNNER} .

docker tag ${LOCAL_IMAGE_NAME_RUNNER} gcr.io/$PROJECT/${CLOUD_IMAGE_NAME}:latest
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,12 @@


def main():
script_path = os.path.join(os.path.dirname(__file__), "docker_upload_runner.sh")
current_dir = os.path.dirname(os.path.abspath(__file__))
repo_root = os.path.abspath(os.path.join(current_dir, "..", ".."))
# Use relative path for Docker
os.environ["PACKAGE_DIR"] = os.path.relpath(repo_root, os.getcwd())

script_path = os.path.join(current_dir, "docker_upload_runner.sh")
if not os.path.exists(script_path):
raise FileNotFoundError(f"Script not found at {script_path}")

Expand Down
Loading