Move regression tests to separate workflow file and folder name

kevalmorabia97 · kevalmorabia97 · commit e6618df601ca · 2026-04-18T03:41:00.000-07:00
Signed-off-by: Keval Morabia &lt;28916987+kevalmorabia97@users.noreply.github.com&gt;
diff --git a/.github/workflows/bump_uv_lock.yml b/.github/workflows/bump_uv_lock.yml
@@ -3,7 +3,8 @@ name: Bump uv.lock
 on:
   schedule:
     - cron: "0 9 * * 1" # Every Monday at 9:00 UTC
-  workflow_dispatch: # On-demand
+  workflow_dispatch:
+    # On-demand
 
 permissions:
   contents: write
diff --git a/.github/workflows/code_quality.yml b/.github/workflows/code_quality.yml
@@ -5,10 +5,12 @@ on:
     branches: [main, release/*, feature/*]
   schedule:
     - cron: "0 0 * * *" # Nightly
-  workflow_dispatch: # On-demand
+  workflow_dispatch:
+    # On-demand
+
 
-# Cancel previous runs if new commit is pushed to the same PR
 concurrency:
+  # Cancel previous runs if new commit is pushed to the same PR
   group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
   cancel-in-progress: true
 
diff --git a/.github/workflows/gpu_tests.yml b/.github/workflows/gpu_tests.yml
@@ -48,11 +48,6 @@ jobs:
             tests/gpu/**
             tests/gpu_megatron/**
             tests/gpu_trtllm/**
-            tests/gpu_regression/**
-            examples/speculative_decoding/**
-            examples/dataset/**
-            modelopt_recipes/general/speculative_decoding/**
-            tools/launcher/**
           fail_on_initial_diff_error: true
   wait-checks:
     needs: [check-file-changes]
@@ -75,9 +70,6 @@ jobs:
             timeout: 60
             container_image: pytorch:26.01-py3
             # tests/gpu/_extensions/test_onnx_extensions.py fails for newer containers until https://github.com/tbenthompson/cppimport/pull/98
-          - example: gpu_regression
-            timeout: 15
-            container_image: pytorch:26.01-py3
           - example: gpu_megatron
             timeout: 45
             container_image: nemo:26.04
diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml
@@ -8,16 +8,18 @@ on:
     branches: [main]
   schedule:
     - cron: "0 0 * * *" # Nightly
-  workflow_dispatch: # On-demand
+  workflow_dispatch:
+    # On-demand
+
 
-# Cancel previous runs if new commit is pushed
 concurrency:
+  # Cancel previous runs if new commit is pushed
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}
   cancel-in-progress: true
 
 permissions:
-  contents: write       # push to gh-pages branch
-  pull-requests: write  # post/update preview URL comment on PRs
+  contents: write # push to gh-pages branch
+  pull-requests: write # post/update preview URL comment on PRs
 
 jobs:
   build-docs:
diff --git a/.github/workflows/regression_tests.yml b/.github/workflows/regression_tests.yml
@@ -0,0 +1,120 @@
+name: Regression tests
+
+on:
+  push:
+    branches: ["pull-request/[0-9]+"]
+    # NOTE: paths cannot be used since push happens to copied PR and only latest commit to PR is used
+  schedule:
+    - cron: "0 0 * * *" # Nightly
+  workflow_dispatch:
+    # On-demand
+
+
+concurrency:
+  # Cancel previous runs if new commit is pushed to the same PR
+  group: ${{ github.workflow }}-${{ startsWith(github.ref, 'refs/heads/pull-request/') && github.ref || github.sha }}
+  cancel-in-progress: true
+
+jobs:
+  check-file-changes:
+    if: startsWith(github.ref, 'refs/heads/pull-request/')
+    runs-on: ubuntu-latest
+    outputs:
+      any_changed: ${{ steps.changed-tests.outputs.any_changed }}
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          fetch-depth: 0
+      - id: get-pr-info
+        uses: nv-gha-runners/get-pr-info@main
+      # Get commit from main branch that is present in the PR to use as base for changed files
+      - id: calculate-merge-base
+        env:
+          PR_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).head.sha }}
+          BASE_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.sha }}
+        run: |
+          (echo -n "merge-base="; git merge-base "$BASE_SHA" "$PR_SHA") | tee --append "${GITHUB_OUTPUT}"
+      - name: Check for changes in test-relevant directories
+        id: changed-tests
+        uses: step-security/changed-files@v46.0.5
+        with:
+          base_sha: ${{ steps.calculate-merge-base.outputs.merge-base }}
+          sha: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).head.sha }}
+          files: |
+            .github/workflows/regression_tests.yml
+            modelopt/torch/**
+            noxfile.py
+            pyproject.toml
+            tests/regression/**
+            examples/speculative_decoding/**
+            examples/dataset/**
+            modelopt_recipes/general/speculative_decoding/**
+            tools/launcher/**
+          fail_on_initial_diff_error: true
+  wait-checks:
+    needs: [check-file-changes]
+    if: needs.check-file-changes.outputs.any_changed == 'true'
+    uses: ./.github/workflows/_wait_for_checks.yml
+    permissions:
+      checks: read
+    secrets: inherit
+    with:
+      match_pattern: "^DCO$|^linux$" # Wait for DCO and Unit tests / linux to pass
+      delay: 300s
+  regression-tests-pr:
+    needs: [check-file-changes, wait-checks]
+    if: needs.check-file-changes.outputs.any_changed == 'true'
+    strategy: &regression_strategy
+      fail-fast: false
+      matrix:
+        include:
+          - example: regression
+            timeout: 15
+            container_image: pytorch:26.01-py3
+    runs-on: linux-amd64-gpu-rtxpro6000-latest-1
+    timeout-minutes: ${{ matrix.timeout }}
+    container: &regression_container
+      image: nvcr.io/nvidia/${{ matrix.container_image }}
+      env:
+        GIT_DEPTH: 1000 # For correct version
+        PIP_CONSTRAINT: "" # Disable pip constraint for upgrading packages
+        HF_TOKEN: ${{ secrets.HF_TOKEN }}
+    steps: &regression_steps
+      - uses: actions/checkout@v6
+      - uses: nv-gha-runners/setup-proxy-cache@main
+      - name: Setup environment variables
+        run: |
+          echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/include:/usr/lib/x86_64-linux-gnu" >> $GITHUB_ENV
+      - name: Run regression tests
+        env:
+          COVERAGE_PROCESS_START: ${{ github.workspace }}/pyproject.toml
+          COVERAGE_FILE: ${{ github.workspace }}/.coverage
+        run: |
+          pip install nox
+          nox -s ${{ matrix.example }}
+      - name: Upload regression coverage to Codecov
+        uses: codecov/codecov-action@v5
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }}
+          files: coverage.xml
+          flags: regression
+          fail_ci_if_error: false # test may be skipped if relevant file changes are not detected
+          verbose: true
+  regression-tests-non-pr:
+    if: ${{ !startsWith(github.ref, 'refs/heads/pull-request/') }}
+    strategy: *regression_strategy
+    runs-on: linux-amd64-gpu-rtxpro6000-latest-2
+    timeout-minutes: ${{ matrix.timeout }}
+    container: *regression_container
+    steps: *regression_steps
+  regression-pr-required-check:
+    # Run even if regression-tests-pr is skipped
+    if: ${{ startsWith(github.ref, 'refs/heads/pull-request/') && always() }}
+    needs: [check-file-changes, regression-tests-pr]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Required regression tests did not succeed
+        if: |
+          needs.check-file-changes.result != 'success' ||
+          (needs.check-file-changes.outputs.any_changed == 'true' && needs.regression-tests-pr.result != 'success')
+        run: exit 1
diff --git a/noxfile.py b/noxfile.py
@@ -128,18 +128,18 @@ def gpu_megatron(session):
     session.run("python", "-m", "pytest", "tests/gpu_megatron", *_cov_args())
 
 
-# Container: nvcr.io/nvidia/pytorch:26.01-py3 or later
+# Container: nvcr.io/nvidia/tensorrt-llm/release:1.3.0rc10 or later
 @nox.session(venv_backend="none")
-def gpu_regression(session):
+def gpu_trtllm(session):
     session.run("python", "-m", "pip", "install", "-e", ".[hf,dev-test]")
-    session.run("python", "-m", "pytest", "tests/gpu_regression", *_cov_args())
+    session.run("python", "-m", "pytest", "tests/gpu_trtllm", *_cov_args())
 
 
-# Container: nvcr.io/nvidia/tensorrt-llm/release:1.3.0rc10 or later
+# Container: nvcr.io/nvidia/pytorch:26.01-py3 or later
 @nox.session(venv_backend="none")
-def gpu_trtllm(session):
+def regression(session):
     session.run("python", "-m", "pip", "install", "-e", ".[hf,dev-test]")
-    session.run("python", "-m", "pytest", "tests/gpu_trtllm", *_cov_args())
+    session.run("python", "-m", "pytest", "tests/regression", *_cov_args())
 
 
 # ─── Code quality ─────────────────────────────────────────────────────────────
diff --git a/tests/regression/torch/speculative/test_dflash.py b/tests/regression/torch/speculative/test_dflash.py