AI-Hypercomputer
diff --git a/‎.github/CODEOWNERS‎
Lines changed: 1 addition & 1 deletion b/‎.github/CODEOWNERS‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/build_and_push_docker_image.yml‎
Lines changed: 3 additions & 18 deletions b/‎.github/workflows/build_and_push_docker_image.yml‎
Lines changed: 3 additions & 18 deletions
diff --git a/‎.github/workflows/build_and_test_maxtext.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/build_and_test_maxtext.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/run_pathways_tests.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/run_pathways_tests.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/run_tests_against_package.yml‎
Lines changed: 1 addition & 3 deletions b/‎.github/workflows/run_tests_against_package.yml‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 0 additions & 1 deletion b/‎.pre-commit-config.yaml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎LICENSE_HEADER‎
Lines changed: 13 additions & 0 deletions b/‎LICENSE_HEADER‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎PREFLIGHT.md‎
Lines changed: 7 additions & 7 deletions b/‎PREFLIGHT.md‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎benchmarks/maxtext_xpk_runner.py‎
Lines changed: 1 addition & 1 deletion b/‎benchmarks/maxtext_xpk_runner.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmarks/upload_metrics_to_bq.py‎
Lines changed: 1 addition & 1 deletion b/‎benchmarks/upload_metrics_to_bq.py‎
Lines changed: 1 addition & 1 deletion
@@ -22,7 +22,7 @@ tests/inference/ @vipannalla @mitalisi @gpolovets1 @mailvijayasingh @jrplatin @p
 src/maxtext/inference @vipannalla @mitalisi @gpolovets1 @mailvijayasingh @jrplatin @patemotter @lumosis @richjames0
 
 # Dockerfiles and dependencies
-src/dependencies/ @bvandermoon @parambole @richjames0 @shralex
+src/dependencies/ @bvandermoon @SurbhiJainUSC @parambole @richjames0 @shralex
 
 # Docs
 docs/ @jacoguzo @bvandermoon @richjames0 @shralex @gobbleturk @RissyRan @gagika @A9isha @jiangjy1982 @vipannalla
 
@@ -54,6 +54,7 @@ jobs:
     runs-on: linux-x86-n2-16-buildkit
     container: google/cloud-sdk:524.0.0
     if: >
+      github.event_name == 'release' ||
       github.event_name == 'schedule' ||
       github.event_name == 'pull_request' ||
       github.event_name == 'workflow_dispatch' && (
@@ -86,15 +87,8 @@ jobs:
           # This ensures that every job clones the exact same commit as "setup" job
           ref: ${{ inputs.maxtext_sha }}
 
-      - name: Checkout post-training dependencies
-        if: steps.check.outputs.should_run == 'true' && inputs.image_name == 'maxtext_post_training_nightly'
-        run: |
-          git clone https://github.com/google/tunix.git ./tunix
-          git clone https://github.com/vllm-project/vllm.git ./vllm
-          git clone https://github.com/vllm-project/tpu-inference.git ./tpu-inference
-
       - name: Mark git repositories as safe
-        run: git config --global --add safe.directory '*'
+        run: git config --global --add safe.directory ${GITHUB_WORKSPACE}
         if: steps.check.outputs.should_run == 'true'
 
       - name: Configure Docker
@@ -122,6 +116,7 @@ jobs:
             DEVICE=${{ inputs.device }}
             MODE=${{ inputs.build_mode }}
             WORKFLOW=${{ inputs.workflow }}
+            PACKAGE_DIR=./src
             JAX_VERSION=NONE
             LIBTPU_VERSION=NONE
             INCLUDE_TEST_ASSETS=true
@@ -147,16 +142,6 @@ jobs:
             # Add MaxText tag
             maxtext_hash=$(git rev-parse --short HEAD)
             gcloud container images add-tag "$SOURCE_IMAGE:${{ github.run_id }}" "$SOURCE_IMAGE:maxtext_${maxtext_hash}_${clean_date}" --quiet
-
-          # Add post-training dependencies tags
-          if [ "${{ inputs.workflow }}" == "post-training" ]; then
-            for dir in tunix vllm tpu-inference; do
-              if [ -d "./$dir" ]; then
-                dir_hash=$(git -C "$dir" rev-parse --short HEAD)
-                gcloud container images add-tag "$SOURCE_IMAGE:${{ github.run_id }}" "$SOURCE_IMAGE:${dir}_${dir_hash}_${clean_date}" --quiet
-                fi
-              done
-            fi
           fi
         env:
           INPUTS_IMAGE_NAME: ${{ inputs.image_name }}
 
@@ -262,7 +262,7 @@ jobs:
       tf_force_gpu_allow_growth: false
       container_resource_option: "--privileged"
       is_scheduled_run: ${{ github.event_name == 'schedule' }}
-      extra_pip_deps_file: 'src/install_maxtext_extra_deps/extra_post_train_base_deps_from_github.txt'
+      extra_pip_deps_file: 'src/dependencies/github_deps/post_train_base_deps.txt'
       maxtext_sha: ${{ needs.build_and_upload_maxtext_package.outputs.maxtext_sha }}
 
   maxtext_post_training_tpu_unit_tests:
@@ -284,7 +284,7 @@ jobs:
       tf_force_gpu_allow_growth: false
       container_resource_option: "--privileged"
       is_scheduled_run: ${{ github.event_name == 'schedule' }}
-      extra_pip_deps_file: 'src/install_maxtext_extra_deps/extra_post_train_base_deps_from_github.txt'
+      extra_pip_deps_file: 'src/dependencies/github_deps/post_train_base_deps.txt'
       maxtext_sha: ${{ needs.build_and_upload_maxtext_package.outputs.maxtext_sha }}
 
   maxtext_gpu_integration_tests:
 
@@ -85,7 +85,7 @@ jobs:
           source .venv/bin/activate
           maxtext_wheel=$(ls maxtext-*-py3-none-any.whl 2>/dev/null)
           uv pip install ${maxtext_wheel}[tpu] --resolution=lowest
-          uv pip install -r src/install_maxtext_extra_deps/extra_deps_from_github.txt
+          uv pip install -r src/dependencies/github_deps/pre_train_deps.txt
           python3 --version
           python3 -m pip freeze
       - name: Copy test assets files
 
@@ -96,7 +96,7 @@ jobs:
           source .venv/bin/activate
           maxtext_wheel=$(ls maxtext-*-py3-none-any.whl 2>/dev/null)
           uv pip install ${maxtext_wheel}[${MAXTEXT_PACKAGE_EXTRA}] --resolution=lowest
-          uv pip install -r src/install_maxtext_extra_deps/extra_deps_from_github.txt
+          uv pip install -r src/dependencies/github_deps/pre_train_deps.txt
           python3 --version
           python3 -m pip freeze
           uv pip install pytest-cov
@@ -131,12 +131,10 @@ jobs:
           else
             SPLIT_ARGS=""
           fi
-          # TODO: Fix the skipped tests and remove the deselect flags
           .venv/bin/python3 -m pytest ${INPUTS_PYTEST_ADDOPTS} \
             -v \
             -m "${FINAL_PYTEST_MARKER}" \
             --durations=0 \
-            --deselect "tests/unit/tokenizer_test.py::TokenizerTest::test_detokenize" \
             --cov=MaxText \
             --cov=maxtext \
             --cov-report=xml \
 
@@ -52,7 +52,6 @@ repos:
         args:
           - '--pyink-indentation=2'
           - '--line-length=122'
-          - '--check'
 
   - repo: https://github.com/executablebooks/mdformat
     rev: 0.7.22
 
@@ -0,0 +1,13 @@
+ Copyright 2023–2026 Google LLC
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+    https://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
@@ -1,35 +1,35 @@
 # Optimization 1: Multihost recommended network settings
-We included all the recommended network settings in [rto_setup.sh](https://github.com/google/maxtext/blob/main/rto_setup.sh). 
+We included all the recommended network settings in [rto_setup.sh](https://github.com/google/maxtext/blob/main/src/dependencies/scripts/rto_setup.sh). 
 
-[preflight.sh](https://github.com/google/maxtext/blob/main/preflight.sh) will help you apply them based on GCE or GKE platform.
+[preflight.sh](https://github.com/google/maxtext/blob/main/src/dependencies/scripts/preflight.sh) will help you apply them based on GCE or GKE platform.
 
 Before you run ML workload on Multihost with GCE or GKE, simply apply `bash preflight.sh PLATFORM=[GCE or GKE]` to leverage the best DCN network performance.
 
 Here is an example for GCE:
 ```
-bash preflight.sh PLATFORM=GCE && python3 -m maxtext.trainers.pre_train.train run_name=${YOUR_JOB_NAME?}
+bash src/dependencies/scripts/preflight.sh PLATFORM=GCE && python3 -m maxtext.trainers.pre_train.train run_name=${YOUR_JOB_NAME?}
 ```
 
 Here is an example for GKE:
 ```
-bash preflight.sh PLATFORM=GKE && python3 -m maxtext.trainers.pre_train.train run_name=${YOUR_JOB_NAME?}
+bash src/dependencies/scripts/preflight.sh PLATFORM=GKE && python3 -m maxtext.trainers.pre_train.train run_name=${YOUR_JOB_NAME?}
 ```
 
 # Optimization 2: Numa binding (You can only apply this to v4 and v5p)
 NUMA binding is recommended for enhanced performance, as it reduces memory latency and maximizes data throughput, ensuring that your high-performance applications operate more efficiently and effectively.
 
 For GCE, 
-[preflight.sh](https://github.com/google/maxtext/blob/main/preflight.sh) will help you install `numactl` dependency, so you can use it directly, here is an example:
+[preflight.sh](https://github.com/google/maxtext/blob/main/src/dependencies/scripts/preflight.sh) will help you install `numactl` dependency, so you can use it directly, here is an example:
 
 ```
-bash preflight.sh PLATFORM=GCE && numactl --membind 0 --cpunodebind=0 python3 -m maxtext.trainers.pre_train.train run_name=${YOUR_JOB_NAME?}
+bash src/dependencies/scripts/preflight.sh PLATFORM=GCE && numactl --membind 0 --cpunodebind=0 python3 -m maxtext.trainers.pre_train.train run_name=${YOUR_JOB_NAME?}
 ```
 
 For GKE,
 `numactl` should be built into your docker image from [maxtext_tpu_dependencies.Dockerfile](https://github.com/google/maxtext/blob/main/src/dependencies/dockerfiles/maxtext_tpu_dependencies.Dockerfile), so you can use it directly if you built the maxtext docker image. Here is an example
 
 ```
-bash preflight.sh PLATFORM=GKE && numactl --membind 0 --cpunodebind=0 python3 -m maxtext.trainers.pre_train.train run_name=${YOUR_JOB_NAME?}
+bash src/dependencies/scripts/preflight.sh PLATFORM=GKE && numactl --membind 0 --cpunodebind=0 python3 -m maxtext.trainers.pre_train.train run_name=${YOUR_JOB_NAME?}
 ```
 
 1. `numactl`: This is the command-line tool used for controlling NUMA policy for processes or shared memory. It's particularly useful on multi-socket systems where memory locality can impact performance.
 
@@ -428,7 +428,7 @@ def build_user_command(
   if wl_config.hlo_dump:
     hlo_dump = "XLA_FLAGS='--xla_dump_large_constants --xla_dump_to=/tmp/xla_dump'"
     upload_hlo_dump = (
-        f" && gsutil -m cp -r /tmp/xla_dump  {wl_config.base_output_directory}/{wl_config.run_name}/hlo_dump"
+        f" && gcloud storage cp -r /tmp/xla_dump  {wl_config.base_output_directory}/{wl_config.run_name}/hlo_dump"
     )
   # Construct the command string with proper formatting and line continuations
   command = " ".join(
 
@@ -187,7 +187,7 @@ def add_parser_arguments(parser: argparse.ArgumentParser):
 
 
 def download_metrics_file_locally(metrics_gcs_file: str, local_file: str) -> int:
-  command = f"gsutil cp -r {metrics_gcs_file} {local_file}"
+  command = f"gcloud storage cp --recursive {metrics_gcs_file} {local_file}"
   return run_command_with_updates(command, f"Download {metrics_gcs_file} in {local_file}")
Original file line number	Diff line number	Diff line change
`@@ -428,7 +428,7 @@ def build_user_command(`
`428`	`428`	`if wl_config.hlo_dump:`
`429`	`429`	`hlo_dump = "XLA_FLAGS='--xla_dump_large_constants --xla_dump_to=/tmp/xla_dump'"`
`430`	`430`	`upload_hlo_dump = (`
`431`		`- f" && gsutil -m cp -r /tmp/xla_dump {wl_config.base_output_directory}/{wl_config.run_name}/hlo_dump"`
	`431`	`+ f" && gcloud storage cp -r /tmp/xla_dump {wl_config.base_output_directory}/{wl_config.run_name}/hlo_dump"`
`432`	`432`	`)`
`433`	`433`	`# Construct the command string with proper formatting and line continuations`
`434`	`434`	`command = " ".join(`