Use nemo:26.02 container for megatron gpu tests

kevalmorabia97 · kevalmorabia97 · commit 6c09a6f578da · 2026-04-17T12:11:05.000-07:00
Signed-off-by: Keval Morabia &lt;28916987+kevalmorabia97@users.noreply.github.com&gt;
diff --git a/.github/workflows/_example_tests_runner.yml b/.github/workflows/_example_tests_runner.yml
@@ -48,7 +48,6 @@ jobs:
       - name: Install dependencies
         run: |
           # use `python -m pip` instead of `pip` to avoid conflicts with system pip for nemo containers
-          pip uninstall -y nvidia-modelopt
           python -m pip install ".${{ inputs.pip_install_extras }}"
 
           if [[ "${{ inputs.example }}" == *"diffusers"* ]]; then
diff --git a/.github/workflows/example_tests.yml b/.github/workflows/example_tests.yml
@@ -118,8 +118,8 @@ jobs:
       pip_install_extras: "[hf,dev-test]"
       runner: linux-amd64-gpu-rtxpro6000-latest-2
 
-  ##### NeMo Example Tests #####
-  nemo-pr:
+  ##### Megatron Example Tests #####
+  megatron-pr:
     needs: [check-file-changes, wait-checks]
     if: startsWith(github.ref, 'refs/heads/pull-request/') && needs.check-file-changes.outputs.any_changed == 'true'
     strategy: &nemo_strategy
@@ -135,7 +135,7 @@ jobs:
       pip_install_extras: "[hf,puzzletron,dev-test]"
       runner: linux-amd64-gpu-rtxpro6000-latest-1
 
-  nemo-non-pr:
+  megatron-non-pr:
     if: ${{ !startsWith(github.ref, 'refs/heads/pull-request/') }}
     strategy: *nemo_strategy
     uses: ./.github/workflows/_example_tests_runner.yml
@@ -160,7 +160,7 @@ jobs:
     with:
       docker_image: "nvcr.io/nvidia/tensorrt:26.02-py3"
       example: ${{ matrix.example }}
-      pip_install_extras: "[all,dev-test]"
+      pip_install_extras: "[onnx,hf,dev-test]"
       runner: linux-amd64-gpu-rtxpro6000-latest-1
 
   onnx-non-pr:
@@ -171,14 +171,14 @@ jobs:
     with:
       docker_image: "nvcr.io/nvidia/tensorrt:26.02-py3"
       example: ${{ matrix.example }}
-      pip_install_extras: "[all,dev-test]"
+      pip_install_extras: "[onnx,hf,dev-test]"
       runner: linux-amd64-gpu-rtxpro6000-latest-2
 
   ##### Required Check for PR #####
   example-pr-required-check:
     # Run even if example tests are skipped
     if: ${{ startsWith(github.ref, 'refs/heads/pull-request/') && always() }}
-    needs: [check-file-changes, torch-pr, trtllm-pr, nemo-pr, onnx-pr]
+    needs: [check-file-changes, torch-pr, trtllm-pr, megatron-pr, onnx-pr]
     runs-on: ubuntu-latest
     steps:
       - name: Required GPU tests did not succeed
@@ -187,7 +187,7 @@ jobs:
           (needs.check-file-changes.outputs.any_changed == 'true' && (
             needs.torch-pr.result != 'success' ||
             needs.trtllm-pr.result != 'success' ||
-            needs.nemo-pr.result != 'success' ||
+            needs.megatron-pr.result != 'success' ||
             needs.onnx-pr.result != 'success'
           ))
         run: exit 1
diff --git a/.github/workflows/gpu_tests.yml b/.github/workflows/gpu_tests.yml
@@ -8,7 +8,7 @@ on:
     - cron: "0 0 * * *" # Nightly
   workflow_dispatch: # On-demand
 
-# Cancel previous runs if new commit is pushed to the same PR
+    # Cancel previous runs if new commit is pushed to the same PR
 concurrency:
   group: ${{ github.workflow }}-${{ startsWith(github.ref, 'refs/heads/pull-request/') && github.ref || github.sha }}
   cancel-in-progress: true
@@ -42,7 +42,9 @@ jobs:
             .github/workflows/gpu_tests.yml
             modelopt/**
             tests/gpu/**
+            tests/gpu_megatron/**
             tests/gpu_regression/**
+            tests/gpu_trtllm/**
             examples/speculative_decoding/**
             examples/dataset/**
             modelopt_recipes/general/speculative_decoding/**
@@ -71,13 +73,13 @@ jobs:
             timeout: 60
             container_image: pytorch:26.01-py3
             # tests/gpu/_extensions/test_onnx_extensions.py fails for newer containers until https://github.com/tbenthompson/cppimport/pull/98
-          - example: gpu-regression
+          - example: gpu_regression
             timeout: 15
             container_image: pytorch:26.01-py3
-          - example: gpu-megatron
+          - example: gpu_megatron
             timeout: 45
-            container_image: pytorch:26.01-py3
-          - example: gpu-trtllm
+            container_image: nemo:26.02
+          - example: gpu_trtllm
             timeout: 30
             container_image: tensorrt-llm/release:1.3.0rc10
     runs-on: linux-amd64-gpu-rtxpro6000-latest-1
@@ -99,8 +101,14 @@ jobs:
           COVERAGE_PROCESS_START: ${{ github.workspace }}/pyproject.toml
           COVERAGE_FILE: ${{ github.workspace }}/.coverage
         run: |
-          pip install tox-current-env
-          COV_ARGS="--cov" tox -e cuda13-${{ matrix.example }} --current-env
+          # nemo containers use uv venvs which is not compatible with tox-current-env, so run tests directly
+          if [[ "${{ matrix.example }}" == "gpu_megatron" ]]; then
+            python -m pip install -e .[hf,dev-test]
+            python -m pytest tests/gpu_megatron --cov
+          else
+            python -m pip install tox tox-current-env
+            COV_ARGS="--cov" python -m tox -e cuda13-${{ matrix.example }} --current-env
+          fi
       - name: Upload GPU coverage to Codecov
         uses: codecov/codecov-action@v5
         with:
diff --git a/tests/gpu_megatron/torch/peft/plugins/test_megatron_peft.py b/tests/gpu_megatron/torch/peft/plugins/test_megatron_peft.py
@@ -20,8 +20,11 @@
 import torch
 import torch.nn.init as init
 from _test_utils.torch.megatron.models import get_mcore_gpt_model
-from _test_utils.torch.megatron.utils import initialize_for_megatron
-from megatron.core import dist_checkpointing
+from _test_utils.torch.megatron.utils import (
+    initialize_for_megatron,
+    load_distributed_checkpoint,
+    save_distributed_checkpoint,
+)
 
 import modelopt.torch.peft as mtpeft
 import modelopt.torch.quantization as mtq
@@ -148,20 +151,6 @@
 }
 
 
-def save_distributed_checkpoint(checkpoint_path, gpt_model):
-    sharded_state_dict = gpt_model.sharded_state_dict(prefix="")
-    dist_checkpointing.save(sharded_state_dict=sharded_state_dict, checkpoint_dir=checkpoint_path)
-
-
-def load_distributed_checkpoint(checkpoint_path, gpt_model):
-    sharded_state_dict = gpt_model.sharded_state_dict(prefix="")
-    checkpoint = dist_checkpointing.load(
-        sharded_state_dict=sharded_state_dict, checkpoint_dir=checkpoint_path
-    )
-    gpt_model.load_state_dict(checkpoint)
-    return gpt_model
-
-
 def _gpt_model_provider(tp_size: int, hidden_size=256, vocab_size=64, meta_device=False):
     """Build the model."""
 
diff --git a/tox.ini b/tox.ini
@@ -1,10 +1,9 @@
 [tox]
 envlist=
     pre-commit-all
-    py312-torch210-tf_latest-unit
+    py312-torch211-tf_latest-unit
     cuda13-gpu
-    cuda13-gpu-regression
-    cuda13-gpu-megatron
+    cuda13-gpu_regression
 skipsdist = True
 toxworkdir = /tmp/{env:USER}-modelopt-tox
 passenv =
@@ -57,44 +56,43 @@ commands =
 ###########################################################
 # GPU test environments (Should be used with --current-env)
 ###########################################################
+# Container: nvcr.io/nvidia/pytorch:26.01-py3 or later
 [testenv:cuda13-gpu]
 commands_pre =
     # Install deps here so that it gets installed even in --current-env
-    pip install --no-build-isolation git+https://github.com/Dao-AILab/fast-hadamard-transform.git
-    pip install -e .[all,dev-test]
+    python -m pip install --no-build-isolation git+https://github.com/Dao-AILab/fast-hadamard-transform.git
+    python -m pip install -e .[all,dev-test]
 
     # Install cupy-cuda13x for INT4 ONNX quantization (default is cupy-cuda12x)
-    pip uninstall -y cupy-cuda12x
-    pip install cupy-cuda13x
+    python -m pip uninstall -y cupy-cuda12x
+    python -m pip install cupy-cuda13x
 
     # Install mamba and causal-conv1d for Nemotron tests
-    pip install --no-build-isolation git+https://github.com/state-spaces/mamba.git
-    pip install --no-build-isolation git+https://github.com/Dao-AILab/causal-conv1d.git
+    python -m pip install --no-build-isolation git+https://github.com/state-spaces/mamba.git
+    python -m pip install --no-build-isolation git+https://github.com/Dao-AILab/causal-conv1d.git
 commands =
     python -m pytest tests/gpu {env:COV_ARGS:}
 
-[testenv:cuda13-gpu-regression]
+[testenv:cuda13-gpu_regression]
 commands_pre =
-    pip install -e .[hf,dev-test]
+    python -m pip install -e .[hf,dev-test]
 commands =
     python -m pytest tests/gpu_regression {env:COV_ARGS:}
 
-[testenv:cuda13-gpu-megatron]
+# Container: nvcr.io/nvidia/nemo:26.02 or later
+# NOTE: tox is bypassed for this env in CI (see gpu_tests.yml) because tox-current-env is
+# incompatible with uv venvs, and any new tox env would lack NeMo/Megatron packages from /opt/venv.
+# [testenv:cuda13-gpu_megatron]
+# commands_pre =
+#     python -m pip install -e .[hf,dev-test]
+# commands =
+#     python -m pytest tests/gpu_megatron {env:COV_ARGS:}
+
+# Container: nvcr.io/nvidia/tensorrt-llm/release:1.2.0 or later
+[testenv:cuda13-gpu_trtllm]
 commands_pre =
     # Install deps here so that it gets installed even in --current-env
-    # Temporarily disable latest mcore until we fix its nvidia-resiliency-ext dependency
-    pip install 'megatron-core<0.17.0'
-    pip install --no-build-isolation git+https://github.com/state-spaces/mamba.git
-    pip install --no-build-isolation git+https://github.com/Dao-AILab/causal-conv1d.git
-    pip install -e .[hf,dev-test]
-commands =
-    python -m pytest tests/gpu_megatron {env:COV_ARGS:}
-
-[testenv:cuda13-gpu-trtllm]
-# Expected to be run in TRT-LLM container
-commands_pre =
-    # Install deps here so that it gets installed even in --current-env
-    pip install -e .[hf,dev-test]
+    python -m pip install -e .[hf,dev-test]
 commands =
     python -m pytest tests/gpu_trtllm {env:COV_ARGS:}