Update JAX to 0.9.2 for post-training

SurbhiJainUSC · SurbhiJainUSC · commit 2a03ed1d8836 · 2026-04-22T19:54:45.000Z
diff --git a/.github/workflows/run_jupyter_notebooks.yml b/.github/workflows/run_jupyter_notebooks.yml
@@ -105,7 +105,8 @@ jobs:
 
           for notebook in "$MAXTEXT_NOTEBOOKS_ROOT"/{sft,rl}*.ipynb; do
             filename=$(basename "$notebook")
-            if [[ "$filename" == "sft_llama3_demo_gpu.ipynb" || "$filename" == "maxtext_with_gepa.ipynb" ]]; then
+            # TODO: Update runnner to v6e-8 as RL with LLama3.1-8b doesn't fit on v6e-4
+            if [[ "$filename" == "sft_llama3_demo_gpu.ipynb" || "$filename" == "maxtext_with_gepa.ipynb" || "$filename" == "rl_llama3_demo.ipynb" ]]; then
               echo "Skipping $filename"
               continue
             fi
diff --git a/.github/workflows/run_tests_coordinator.yml b/.github/workflows/run_tests_coordinator.yml
@@ -102,24 +102,24 @@ jobs:
         ${{ fromJSON('{
             "tpu-unit": "not cpu_only and not gpu_only and not integration_test and not post_training",
             "tpu-integration": "not cpu_only and not gpu_only and integration_test and not post_training",
-            "tpu-post-training-unit": "not cpu_only and not gpu_only and not integration_test",
+            "tpu-post-training-unit": "not cpu_only and not gpu_only and not integration_test and post_training",
             "tpu-post-training-integration": "not cpu_only and not gpu_only and integration_test",
             "gpu-unit": "not cpu_only and not tpu_only and not integration_test and not post_training",
             "gpu-integration": "not cpu_only and not tpu_only and integration_test and not post_training",
             "cpu-unit": "cpu_only and not post_training",
-            "cpu-post-training-unit": "cpu_only"
+            "cpu-post-training-unit": "cpu_only and post_training"
           }')[inputs.flavor] }}
 
       pytest_addopts: >-
         ${{ fromJSON('{
             "tpu-unit": "",
             "tpu-integration": "",
-            "tpu-post-training-unit": "tests/post_training/unit",
+            "tpu-post-training-unit": "tests/post_training/unit tests/unit",
             "tpu-post-training-integration": "tests/post_training/integration",
             "gpu-unit": "",
             "gpu-integration": "",
             "cpu-unit": "",
-            "cpu-post-training-unit": "tests/post_training/unit"
+            "cpu-post-training-unit": "tests/post_training/unit tests/unit"
           }')[inputs.flavor] }}
 
       pytest_extra_args: >-
diff --git a/src/dependencies/extra_deps/post_train_base_deps.txt b/src/dependencies/extra_deps/post_train_base_deps.txt
@@ -1 +1 @@
-google-tunix @ https://github.com/google/tunix/archive/336d102fe32ca0edbe42a8f66ff0fd533cebdf52.zip
+google-tunix @ https://github.com/google/tunix/archive/f0102a7b0dccc0020503c0617869883f16b3b4ed.zip
diff --git a/src/dependencies/extra_deps/post_train_github_deps.txt b/src/dependencies/extra_deps/post_train_github_deps.txt
@@ -1,5 +1,5 @@
 -r post_train_base_deps.txt
 google-jetstream @ https://github.com/AI-Hypercomputer/JetStream/archive/29329e8e73820993f77cfc8efe34eb2a73f5de98.zip
 mlperf-logging @ https://github.com/mlcommons/logging/archive/38ab22670527888c8eb7825a4ece176fcc36a95d.zip
-tpu-inference @ https://github.com/vllm-project/tpu-inference/archive/0cae84fc9a883ba1bde02d4f07930e6af9e92958.zip
-vllm @ git+https://github.com/vllm-project/vllm@ee8a29511fc69e3f0f6291fa6ff1cf6e47f7750d
+tpu-inference @ https://github.com/vllm-project/tpu-inference/archive/40876e81f04226f9b7b1e4bbdc9051d6b1364b9d.zip
+vllm @ git+https://github.com/vllm-project/vllm@595562651a5a4539ffa910d8570c08fb5169bdc9
diff --git a/src/dependencies/requirements/generated_requirements/tpu-post-train-requirements.txt b/src/dependencies/requirements/generated_requirements/tpu-post-train-requirements.txt
@@ -140,8 +140,8 @@ isort>=7.0.0
 jaraco.classes>=3.4.0
 jaraco.context>=6.1.0
 jaraco.functools>=4.3.0
-jax>=0.8.3
-jaxlib>=0.8.3
+jax>=0.9.2
+jaxlib>=0.9.2
 jaxtyping>=0.3.3
 jedi>=0.19.2
 jeepney>=0.9.0
@@ -164,7 +164,7 @@ lark>=1.2.2
 latex2sympy2_extended>=1.11.0
 libclang>=18.1.1
 libcst>=1.8.6
-libtpu>=0.0.32
+libtpu>=0.0.39
 llguidance>=1.3.0
 llvmlite>=0.45.1
 lm-format-enforcer>=0.11.3
@@ -180,7 +180,7 @@ matplotlib-inline>=0.2.1
 mccabe>=0.7.0
 mcp>=1.26.0
 mdurl>=0.1.2
-mistral_common>=1.9.1
+mistral_common>=1.11.0
 ml_collections>=1.1.0
 ml_dtypes>=0.5.4
 ml_goodput_measurement>=0.0.15
@@ -250,7 +250,7 @@ parameterized>=0.9.0
 parso>=0.8.6
 partial-json-parser>=0.2.1.1.post7
 pathspec>=0.12.1
-pathwaysutils>=0.1.4
+pathwaysutils>=0.1.7
 perfetto>=0.16.0
 pexpect>=4.9.0
 pillow>=12.0.0
@@ -355,19 +355,19 @@ tensorflow-text>=2.20.0
 tensorstore>=0.1.79
 termcolor>=3.2.0
 tiktoken>=0.12.0
-tokamax>=0.0.8
+tokamax>=0.0.12
 tokenizers>=0.22.1
 toml>=0.10.2
 tomlkit>=0.13.3
 toolz>=1.1.0
-torch>=2.9.0
+torch==2.10.0
 torchax>=0.0.11
-torchvision>=0.24.0
+torchvision==0.25.0
 tornado>=6.5.4
 tpu-info>=0.7.1
 tqdm>=4.67.3
 traitlets>=5.14.3
-transformers>=4.57.1
+transformers>=5.5.4
 treescope>=0.1.10
 triton>=3.5.0
 typeguard>=2.13.3
diff --git a/src/maxtext/examples/rl_llama3_demo.ipynb b/src/maxtext/examples/rl_llama3_demo.ipynb
@@ -135,7 +135,28 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": "import datetime\nimport os\nimport sys\nimport subprocess\nfrom pathlib import Path\nfrom huggingface_hub import login\nfrom etils import epath\nimport jax\n\nfrom maxtext.trainers.post_train.rl.train_rl import rl_train\nfrom maxtext.utils.model_creation_utils import setup_configs_and_devices\nfrom maxtext.utils.globals import MAXTEXT_REPO_ROOT, MAXTEXT_PKG_DIR\n\nos.environ[\"TF_CPP_MIN_LOG_LEVEL\"] = \"0\"\nos.environ[\"SKIP_JAX_PRECOMPILE\"] = \"1\"  # Faster startup for vLLM\n# Suppress vLLM logging with a severity level below ERROR\nos.environ[\"VLLM_LOGGING_LEVEL\"] = \"ERROR\"\n\n\nprint(f\"MaxText installation path: {MAXTEXT_PKG_DIR}\")"
+   "source": [
+    "import datetime\n",
+    "import os\n",
+    "import sys\n",
+    "import subprocess\n",
+    "from pathlib import Path\n",
+    "from huggingface_hub import login\n",
+    "from etils import epath\n",
+    "import jax\n",
+    "\n",
+    "from maxtext.trainers.post_train.rl.train_rl import rl_train\n",
+    "from maxtext.utils.model_creation_utils import setup_configs_and_devices\n",
+    "from maxtext.utils.globals import MAXTEXT_REPO_ROOT, MAXTEXT_PKG_DIR\n",
+    "\n",
+    "os.environ[\"TF_CPP_MIN_LOG_LEVEL\"] = \"0\"\n",
+    "os.environ[\"SKIP_JAX_PRECOMPILE\"] = \"1\"  # Faster startup for vLLM\n",
+    "# Suppress vLLM logging with a severity level below ERROR\n",
+    "os.environ[\"VLLM_LOGGING_LEVEL\"] = \"ERROR\"\n",
+    "\n",
+    "\n",
+    "print(f\"MaxText installation path: {MAXTEXT_PKG_DIR}\")"
+   ]
   },
   {
    "cell_type": "code",
@@ -188,8 +209,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "MODEL_NAME = \"llama3.1-8b\"\n",
-    "TOKENIZER_PATH = \"meta-llama/Llama-3.1-8B-Instruct\"\n",
+    "MODEL_NAME = \"llama3.1-8b-Instruct\"\n",
     "RUN_NAME = datetime.datetime.now().strftime(\"%Y-%m-%d-%H-%M-%S\")\n",
     "LOSS_ALGO=\"grpo\" #  or \"gspo-token\" if you want to use GSPO\n",
     "\n",
@@ -270,35 +290,25 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Load configuration for RL training\n",
+    "# Configuration for RL training\n",
     "config_argv = [\n",
     "    \"\",\n",
     "    f\"{MAXTEXT_PKG_DIR}/configs/post_train/rl.yml\",\n",
     "    f\"model_name={MODEL_NAME}\",\n",
-    "    f\"tokenizer_path={TOKENIZER_PATH}\",\n",
     "    f\"run_name={RUN_NAME}\",\n",
     "    f\"chat_template_path={CHAT_TEMPLATE_PATH}\",\n",
     "    f\"load_parameters_path={MODEL_CHECKPOINT_PATH}/0/items\",\n",
     "    f\"base_output_directory={OUTPUT_DIRECTORY}\",\n",
     "    f\"hf_access_token={HF_TOKEN}\",\n",
     "    \"debug.rl=False\",\n",
     "    f\"rl.loss_algo={LOSS_ALGO}\",\n",
-    "    \"use_pathways=False\"\n",
+    "    \"use_pathways=False\",\n",
+    "    \"log_config=False\",\n",
     "]\n",
     "\n",
-    "trainer_config, sampler_config, trainer_devices, sampler_devices = setup_configs_and_devices(config_argv)\n",
-    "\n",
-    "rl_train_steps = int(\n",
-    "    trainer_config.num_batches\n",
-    "    * trainer_config.rl.num_iterations\n",
-    "    * trainer_config.train_fraction\n",
-    "    * trainer_config.num_epoch\n",
-    ")\n",
-    "\n",
     "print(\"✓ Configuration initialized successfully\")\n",
-    "print(f\"📁 Output directory: {trainer_config.base_output_directory}\")\n",
-    "print(f\"🤖 Model: {trainer_config.model_name}\")\n",
-    "print(f\"📊 RL Train Steps: {rl_train_steps}\")"
+    "print(f\"📁 Output directory: {OUTPUT_DIRECTORY}\")\n",
+    "print(f\"🤖 Model: {MODEL_NAME}\")"
    ]
   },
   {
@@ -314,23 +324,22 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "import traceback\n",
+    "\n",
     "print(\"\\n\" + \"=\" * 80)\n",
     "print(f\"🚀 Starting {LOSS_ALGO} Training...\")\n",
     "print(\"=\" * 80)\n",
     "try:\n",
-    "    rl_train(trainer_config, sampler_config, trainer_devices, sampler_devices)\n",
+    "    rl_train(argv=config_argv, kwargs={})\n",
     "    print(\"\\n\" + \"=\" * 80)\n",
     "    print(\"✅ Training Completed Successfully!\")\n",
-    "    print(f\"✍️ Note the improved evaluation accuracy metrics with just {rl_train_steps} RL training steps!\")\n",
     "    print(\"=\" * 80)\n",
-    "    print(f\"📁 Checkpoints saved to: {trainer_config.checkpoint_dir}\")\n",
-    "    print(f\"📊 TensorBoard logs: {trainer_config.tensorboard_dir}\")\n",
-    "    print(f\"🎯 Model ready for inference!\")\n",
-    "except Exception as e:\n",
+    "except Exception:\n",
     "    print(\"\\n\" + \"=\" * 80)\n",
     "    print(\"❌Training Failed!\")\n",
     "    print(\"=\" * 80)\n",
-    "    print(f\"Error: {str(e)}\")"
+    "    traceback.print_exc()\n",
+    "    sys.exit(1)"
    ]
   },
   {
@@ -347,7 +356,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": ".venv",
+   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },
@@ -361,9 +370,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.12"
+   "version": "3.12.11"
   }
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
+}
diff --git a/src/maxtext/examples/sft_llama3_demo_tpu.ipynb b/src/maxtext/examples/sft_llama3_demo_tpu.ipynb
diff --git a/tests/post_training/unit/sft_data_processing_test.py b/tests/post_training/unit/sft_data_processing_test.py
diff --git a/tests/unit/moe_test.py b/tests/unit/moe_test.py

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-google-tunix @ https://github.com/google/tunix/archive/336d102fe32ca0edbe42a8f66ff0fd533cebdf52.zip`
	`1`	`+google-tunix @ https://github.com/google/tunix/archive/f0102a7b0dccc0020503c0617869883f16b3b4ed.zip`