diff --git a/docs/install_maxtext.md b/docs/install_maxtext.md index 879599fcb3..0ed5ab393a 100644 --- a/docs/install_maxtext.md +++ b/docs/install_maxtext.md @@ -106,6 +106,29 @@ uv pip install -e .[runner] --resolution=lowest After installation, you can verify the package is available with `python3 -c "import maxtext"` and run training jobs with `python3 -m maxtext.trainers.pre_train.train ...`. +## UV Project management + +For simplicity this guide uses the traditional `uv pip install` syntax. +If you are using the `uv` project management features (with a `pyproject.toml` and `uv.lock` in your own project), you would need to run your commands differently. +You would need to use `uv add` instead of `uv pip install` and `uv run` as a prefix to all other commands. +For example, to install and run MaxText in a `uv`-managed project: + +```bash +# 1. Initialize your uv project +mkdir my-maxtext-project && cd my-maxtext-project +uv init + +# 2. Add MaxText as a dependency +uv add maxtext[tpu]==0.2.1 --resolution=lowest + +# 3. Install MaxText's extra GitHub dependencies. These will be automatically added to your pyproject.toml +uv run install_tpu_pre_train_extra_deps # This will be added to your pyproject.toml + +# 4. Run MaxText training for a few steps +uv run python3 -m maxtext.trainers.pre_train.train run_name=${RUN_NAME?} base_output_directory=${BASE_OUTPUT_DIRECTORY?} \ + dataset_type=synthetic steps=5 per_device_batch_size=1 model_name=llama2-7b +``` + # Update MaxText dependencies ## Introduction @@ -120,7 +143,7 @@ To update dependencies, you will follow these general steps: 1. **Modify Base Requirements**: Update the desired dependencies in `base_requirements/requirements.txt` or the hardware-specific files (`base_requirements/tpu-base-requirements.txt`, `base_requirements/gpu-base-requirements.txt`). 2. **Generate New Files**: Run the `seed-env` CLI tool to generate new, fully-pinned requirements files based on your changes. -3. **Update Project Files**: Copy the newly generated files into the `generated_requirements/` directory. +3. **Update Project Files**: Copy the newly generated files into the `src/dependencies/requirements/generated_requirements/` directory. 4. **Handle GitHub Dependencies**: Move any dependencies that are installed directly from GitHub from the generated files to `src/dependencies/github_deps/pre_train_deps.txt`. 5. **Verify**: Test the new dependencies to ensure the project installs and runs correctly. @@ -176,8 +199,8 @@ After generating the new requirements, you need to update the files in the MaxTe 1. **Copy the generated files:** - - Move `generated_tpu_artifacts/tpu-requirements.txt` to `generated_requirements/tpu-requirements.txt`. - - Move `generated_gpu_artifacts/cuda12-requirements.txt` to `generated_requirements/cuda12-requirements.txt`. + - Move `generated_tpu_artifacts/tpu-requirements.txt` to `src/dependencies/requirements/generated_requirements/tpu-requirements.txt`. + - Move `generated_gpu_artifacts/cuda12-requirements.txt` to `src/dependencies/requirements/generated_requirements/cuda12-requirements.txt`. 2. **Update `pre_train_deps.txt` (if necessary):** Currently, MaxText uses a few dependencies, such as `mlperf-logging` and `google-jetstream`, that are installed directly from GitHub source. These are defined in `base_requirements/requirements.txt`, and the `seed-env` tool will carry them over to the generated requirements files. diff --git a/src/dependencies/scripts/install_post_train_extra_deps.py b/src/dependencies/scripts/install_post_train_extra_deps.py index e756256d59..50c46581f9 100644 --- a/src/dependencies/scripts/install_post_train_extra_deps.py +++ b/src/dependencies/scripts/install_post_train_extra_deps.py @@ -19,14 +19,21 @@ """ import os -import subprocess -import sys + +# This block makes the script a bit more flexible. It allows `uv_utils` to be imported whether this module is run as a +# standalone script or as part of a larger Python package. It also allows us to not worry whether the full package name +# starts with "src." (this happens when running inside a docker image as part of setup.sh). +try: + from . import uv_utils +except ImportError: + import uv_utils def main(): """ Installs extra dependencies specified in 'dependencies/extra_deps/post_train_*.txt' using uv. - It executes 'uv pip install -r --resolution=lowest'. + + It executes 'uv add' (if uv.lock is present) or 'uv pip install'. """ os.environ["VLLM_TARGET_DEVICE"] = "tpu" @@ -36,57 +43,9 @@ def main(): if not os.path.exists(github_deps_path): raise FileNotFoundError(f"Github dependencies file not found at {github_deps_path}") - # Check if 'uv' is available in the environment - try: - subprocess.run([sys.executable, "-m", "pip", "install", "uv"], check=True, capture_output=True) - subprocess.run([sys.executable, "-m", "uv", "--version"], check=True, capture_output=True) - except subprocess.CalledProcessError as e: - print(f"Error checking uv version: {e}") - print(f"Stderr: {e.stderr.decode()}") - sys.exit(1) - - github_deps_command = [ - sys.executable, # Use the current Python executable's pip to ensure the correct environment - "-m", - "uv", - "pip", - "install", - "-r", - str(github_deps_path), - "--no-deps", - ] - - local_vllm_install_command = [ - sys.executable, # Use the current Python executable's pip to ensure the correct environment - "-m", - "uv", - "pip", - "install", - f"{repo_root}/maxtext/integration/vllm", # MaxText on vllm installations - "--no-deps", - ] - - try: - # Run the command to install Github dependencies - print(f"Installing Github dependencies: {' '.join(github_deps_command)}") - _ = subprocess.run(github_deps_command, check=True, capture_output=True, text=True) - print("Github dependencies installed successfully!") - - # Run the command to install the MaxText vLLM directory - print(f"Installing MaxText vLLM dependency: {' '.join(local_vllm_install_command)}") - _ = subprocess.run(local_vllm_install_command, check=True, capture_output=True, text=True) - print("MaxText vLLM dependency installed successfully!") - except subprocess.CalledProcessError as e: - print("Failed to install extra dependencies.") - print(f"Command '{' '.join(e.cmd)}' returned non-zero exit status {e.returncode}.") - print("--- Stderr ---") - print(e.stderr) - print("--- Stdout ---") - print(e.stdout) - sys.exit(e.returncode) - except (OSError, FileNotFoundError) as e: - print(f"An OS-level error occurred while trying to run uv: {e}") - sys.exit(1) + # Install both requirements file and the local vLLM integration + uv_utils.install_requirements(requirements_files=[github_deps_path]) + uv_utils.install_editable(paths=[os.path.join(repo_root, "maxtext", "integration", "vllm")]) if __name__ == "__main__": diff --git a/src/dependencies/scripts/install_pre_train_extra_deps.py b/src/dependencies/scripts/install_pre_train_extra_deps.py index 310e246b8b..7e3d4a8d0b 100644 --- a/src/dependencies/scripts/install_pre_train_extra_deps.py +++ b/src/dependencies/scripts/install_pre_train_extra_deps.py @@ -19,14 +19,21 @@ """ import os -import subprocess -import sys + +# This block makes the script a bit more flexible. It allows `uv_utils` to be imported whether this module is run as a +# standalone script or as part of a larger Python package. It also allows us to not worry whether the full package name +# starts with "src." (this happens when running inside a docker image as part of setup.sh). +try: + from . import uv_utils +except ImportError: + import uv_utils def main(): """ Installs extra dependencies specified in 'dependencies/extra_deps/pre_train_*.txt' using uv. - It executes 'uv pip install -r --resolution=lowest'. + + It executes 'uv add' (if uv.lock is present) or 'uv pip install'. """ current_dir = os.path.dirname(os.path.abspath(__file__)) repo_root = os.path.abspath(os.path.join(current_dir, "..", "..")) @@ -34,41 +41,7 @@ def main(): if not os.path.exists(github_deps_path): raise FileNotFoundError(f"Github dependencies file not found at {github_deps_path}") - # Check if 'uv' is available in the environment - try: - subprocess.run([sys.executable, "-m", "pip", "install", "uv"], check=True, capture_output=True) - subprocess.run([sys.executable, "-m", "uv", "--version"], check=True, capture_output=True) - except subprocess.CalledProcessError as e: - print(f"Error checking uv version: {e}") - print(f"Stderr: {e.stderr.decode()}") - sys.exit(1) - - github_deps_command = [ - sys.executable, # Use the current Python executable's pip to ensure the correct environment - "-m", - "uv", - "pip", - "install", - "-r", - str(github_deps_path), - "--no-deps", - ] - - try: - print(f"Installing Github dependencies: {' '.join(github_deps_command)}") - _ = subprocess.run(github_deps_command, check=True, capture_output=True, text=True) - print("Github dependencies installed successfully!") - except subprocess.CalledProcessError as e: - print("Failed to install extra dependencies.") - print(f"Command '{' '.join(e.cmd)}' returned non-zero exit status {e.returncode}.") - print("--- Stderr ---") - print(e.stderr) - print("--- Stdout ---") - print(e.stdout) - sys.exit(e.returncode) - except (OSError, FileNotFoundError) as e: - print(f"An OS-level error occurred while trying to run uv: {e}") - sys.exit(1) + uv_utils.install_requirements(requirements_files=[github_deps_path]) if __name__ == "__main__": diff --git a/src/dependencies/scripts/uv_utils.py b/src/dependencies/scripts/uv_utils.py new file mode 100644 index 0000000000..19e16acb92 --- /dev/null +++ b/src/dependencies/scripts/uv_utils.py @@ -0,0 +1,107 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Helper utilities for working with uv in installation scripts.""" + +import os +import shutil +import subprocess +import sys + + +def _get_uv_command(): + """ + Returns the command to run uv, either as a binary in PATH or as a module. + Attempts to install uv via pip if not found. + """ + # 1. Try finding 'uv' in PATH + uv_binary = shutil.which("uv") + if uv_binary: + return [uv_binary] + + # 2. Try running it as a module + try: + subprocess.run([sys.executable, "-m", "uv", "--version"], check=True, capture_output=True) + return [sys.executable, "-m", "uv"] + except (subprocess.CalledProcessError, FileNotFoundError): + pass + + # 3. Fall back to installing via pip + try: + print("uv not found in PATH or as a module. Attempting to install it via pip...") + subprocess.run([sys.executable, "-m", "pip", "install", "uv"], check=True, capture_output=True) + # Check PATH again after installation + uv_binary = shutil.which("uv") + if uv_binary: + return [uv_binary] + return [sys.executable, "-m", "uv"] + except subprocess.CalledProcessError as e: + print(f"Error installing uv via pip: {e}") + print(f"Stderr: {e.stderr.decode()}") + sys.exit(1) + + +def install_requirements(requirements_files): + """Installs packages from requirements files using uv.""" + if not requirements_files: + return + + uv_command = _get_uv_command() + is_uv_project = os.path.exists("uv.lock") + + if is_uv_project: + cmd = uv_command + ["add", "--frozen"] + else: + cmd = uv_command + ["pip", "install", "--no-deps"] + + for req in requirements_files: + cmd.extend(["-r", str(req)]) + + _execute_command(cmd) + + +def install_editable(paths): + """Installs local packages in editable mode using uv.""" + if not paths: + return + + uv_command = _get_uv_command() + is_uv_project = os.path.exists("uv.lock") + + if is_uv_project: + cmd = uv_command + ["add", "--frozen", "--editable"] + else: + cmd = uv_command + ["pip", "install", "--no-deps", "-e"] + + cmd.extend(paths) + + _execute_command(cmd) + + +def _execute_command(cmd): + """Helper to execute a command with logging and error handling.""" + try: + print(f"Executing: {' '.join(cmd)}") + subprocess.run(cmd, check=True, capture_output=True, text=True) + print("Success!") + except subprocess.CalledProcessError as e: + print(f"Command failed with exit status {e.returncode}.") + print("--- Stderr ---") + print(e.stderr) + print("--- Stdout ---") + print(e.stdout) + sys.exit(e.returncode) + except (OSError, FileNotFoundError) as e: + print(f"An OS-level error occurred: {e}") + sys.exit(1)