Further unify pr and non-pr workflow jobs

kevalmorabia97 · kevalmorabia97 · commit a4c2408cd4ac · 2026-04-18T08:14:15.000-07:00
Signed-off-by: Keval Morabia &lt;28916987+kevalmorabia97@users.noreply.github.com&gt;
diff --git a/.github/workflows/example_tests.yml b/.github/workflows/example_tests.yml
@@ -30,10 +30,12 @@ jobs:
         tests/examples/**
 
   ##### PyTorch Example Tests (speculative_decoding requires 26.01 image) #####
-  torch-pr:
+  torch:
     needs: [pr-gate]
-    if: startsWith(github.ref, 'refs/heads/pull-request/') && needs.pr-gate.outputs.any_changed == 'true'
-    strategy: &torch_strategy
+    if: >-
+      needs.pr-gate.outputs.any_changed == 'true' ||
+      !startsWith(github.ref, 'refs/heads/pull-request/')
+    strategy:
       fail-fast: false
       matrix:
         example: [llm_distill, llm_qat, llm_sparsity, diffusers_sparsity]
@@ -47,21 +49,9 @@ jobs:
       example: ${{ matrix.example }}
       timeout_minutes: 30
       pip_install_extras: "[hf,dev-test]"
-      runner: linux-amd64-gpu-rtxpro6000-latest-1
-
-  torch-non-pr:
-    if: ${{ !startsWith(github.ref, 'refs/heads/pull-request/') }}
-    strategy: *torch_strategy
-    uses: ./.github/workflows/_example_tests_runner.yml
-    secrets: inherit
-    with:
-      docker_image: "nvcr.io/nvidia/pytorch:${{ matrix.docker_image || '26.03' }}-py3"
-      example: ${{ matrix.example }}
-      timeout_minutes: 30
-      pip_install_extras: "[hf,dev-test]"
-      runner: linux-amd64-gpu-rtxpro6000-latest-2
+      runner: ${{ startsWith(github.ref, 'refs/heads/pull-request/') && 'linux-amd64-gpu-rtxpro6000-latest-1' || 'linux-amd64-gpu-rtxpro6000-latest-2' }}
 
-  ##### TensorRT-LLM Example Tests #####
+  ##### TensorRT-LLM Example Tests (pr/non-pr split: non-pr runs extra autodeploy+eval examples) #####
   trtllm-pr:
     needs: [pr-gate]
     if: startsWith(github.ref, 'refs/heads/pull-request/') && needs.pr-gate.outputs.any_changed == 'true'
@@ -92,39 +82,27 @@ jobs:
       runner: linux-amd64-gpu-rtxpro6000-latest-2
 
   ##### Megatron Example Tests #####
-  megatron-pr:
+  megatron:
     needs: [pr-gate]
-    if: startsWith(github.ref, 'refs/heads/pull-request/') && needs.pr-gate.outputs.any_changed == 'true'
-    strategy: &nemo_strategy
-      fail-fast: false
-      matrix:
-        example: [megatron_bridge]
-    uses: ./.github/workflows/_example_tests_runner.yml
-    secrets: inherit
-    with:
-      docker_image: "nvcr.io/nvidia/nemo:26.02"
-      example: ${{ matrix.example }}
-      timeout_minutes: 30
-      pip_install_extras: "[hf,puzzletron,dev-test]"
-      runner: linux-amd64-gpu-rtxpro6000-latest-1
-
-  megatron-non-pr:
-    if: ${{ !startsWith(github.ref, 'refs/heads/pull-request/') }}
-    strategy: *nemo_strategy
+    if: >-
+      needs.pr-gate.outputs.any_changed == 'true' ||
+      !startsWith(github.ref, 'refs/heads/pull-request/')
     uses: ./.github/workflows/_example_tests_runner.yml
     secrets: inherit
     with:
       docker_image: "nvcr.io/nvidia/nemo:26.02"
-      example: ${{ matrix.example }}
+      example: megatron_bridge
       timeout_minutes: 30
       pip_install_extras: "[hf,puzzletron,dev-test]"
-      runner: linux-amd64-gpu-rtxpro6000-latest-2
+      runner: ${{ startsWith(github.ref, 'refs/heads/pull-request/') && 'linux-amd64-gpu-rtxpro6000-latest-1' || 'linux-amd64-gpu-rtxpro6000-latest-2' }}
 
   ##### ONNX/TensorRT Example Tests #####
-  onnx-pr:
+  onnx:
     needs: [pr-gate]
-    if: startsWith(github.ref, 'refs/heads/pull-request/') && needs.pr-gate.outputs.any_changed == 'true'
-    strategy: &onnx_strategy
+    if: >-
+      needs.pr-gate.outputs.any_changed == 'true' ||
+      !startsWith(github.ref, 'refs/heads/pull-request/')
+    strategy:
       fail-fast: false
       matrix:
         example: [diffusers, torch_onnx]
@@ -134,33 +112,22 @@ jobs:
       docker_image: "nvcr.io/nvidia/tensorrt:26.02-py3"
       example: ${{ matrix.example }}
       pip_install_extras: "[onnx,hf,dev-test]"
-      runner: linux-amd64-gpu-rtxpro6000-latest-1
-
-  onnx-non-pr:
-    if: ${{ !startsWith(github.ref, 'refs/heads/pull-request/') }}
-    strategy: *onnx_strategy
-    uses: ./.github/workflows/_example_tests_runner.yml
-    secrets: inherit
-    with:
-      docker_image: "nvcr.io/nvidia/tensorrt:26.02-py3"
-      example: ${{ matrix.example }}
-      pip_install_extras: "[onnx,hf,dev-test]"
-      runner: linux-amd64-gpu-rtxpro6000-latest-2
+      runner: ${{ startsWith(github.ref, 'refs/heads/pull-request/') && 'linux-amd64-gpu-rtxpro6000-latest-1' || 'linux-amd64-gpu-rtxpro6000-latest-2' }}
 
   ##### Required Check for PR #####
   example-pr-required-check:
     # Run even if example tests are skipped
     if: ${{ startsWith(github.ref, 'refs/heads/pull-request/') && always() }}
-    needs: [pr-gate, torch-pr, trtllm-pr, megatron-pr, onnx-pr]
+    needs: [pr-gate, torch, trtllm-pr, megatron, onnx]
     runs-on: ubuntu-latest
     steps:
-      - name: Required GPU tests did not succeed
+      - name: Required example tests did not succeed
         if: |
           needs.pr-gate.result != 'success' ||
           (needs.pr-gate.outputs.any_changed == 'true' && (
-            needs.torch-pr.result != 'success' ||
+            needs.torch.result != 'success' ||
             needs.trtllm-pr.result != 'success' ||
-            needs.megatron-pr.result != 'success' ||
-            needs.onnx-pr.result != 'success'
+            needs.megatron.result != 'success' ||
+            needs.onnx.result != 'success'
           ))
         run: exit 1
diff --git a/.github/workflows/gpu_tests.yml b/.github/workflows/gpu_tests.yml
@@ -30,10 +30,13 @@ jobs:
         tests/gpu/**
         tests/gpu_megatron/**
         tests/gpu_trtllm/**
-  gpu-tests-pr:
+
+  gpu-tests:
     needs: [pr-gate]
-    if: needs.pr-gate.outputs.any_changed == 'true'
-    strategy: &gpu_strategy
+    if: >-
+      needs.pr-gate.outputs.any_changed == 'true' ||
+      !startsWith(github.ref, 'refs/heads/pull-request/')
+    strategy:
       fail-fast: false
       matrix:
         include:
@@ -47,15 +50,15 @@ jobs:
           - example: gpu_trtllm
             timeout: 30
             container_image: tensorrt-llm/release:1.3.0rc10
-    runs-on: linux-amd64-gpu-rtxpro6000-latest-1
+    runs-on: ${{ startsWith(github.ref, 'refs/heads/pull-request/') && 'linux-amd64-gpu-rtxpro6000-latest-1' || 'linux-amd64-gpu-rtxpro6000-latest-2' }}
     timeout-minutes: ${{ matrix.timeout }}
-    container: &gpu_container
+    container:
       image: nvcr.io/nvidia/${{ matrix.container_image }}
       env:
         GIT_DEPTH: 1000 # For correct version
         PIP_CONSTRAINT: "" # Disable pip constraint for upgrading packages
         HF_TOKEN: ${{ secrets.HF_TOKEN }}
-    steps: &gpu_steps
+    steps:
       - uses: actions/checkout@v6
       - uses: nv-gha-runners/setup-proxy-cache@main
       - name: Setup environment variables
@@ -75,19 +78,13 @@ jobs:
           flags: gpu
           fail_ci_if_error: false # test may be skipped if relevant file changes are not detected
           verbose: true
-  gpu-tests-non-pr:
-    if: ${{ !startsWith(github.ref, 'refs/heads/pull-request/') }}
-    strategy: *gpu_strategy
-    runs-on: linux-amd64-gpu-rtxpro6000-latest-2
-    timeout-minutes: ${{ matrix.timeout }}
-    container: *gpu_container
-    steps: *gpu_steps
+
   gpu-pr-required-check:
-    # Run even if gpu-tests-pr is skipped
+    # Run even if gpu-tests is skipped
     if: ${{ startsWith(github.ref, 'refs/heads/pull-request/') && always() }}
-    needs: [pr-gate, gpu-tests-pr]
+    needs: [pr-gate, gpu-tests]
     runs-on: ubuntu-latest
     steps:
       - name: Required GPU tests did not succeed
-        if: ${{ needs.pr-gate.result != 'success' || (needs.pr-gate.outputs.any_changed == 'true' && needs.gpu-tests-pr.result != 'success') }}
+        if: ${{ needs.pr-gate.result != 'success' || (needs.pr-gate.outputs.any_changed == 'true' && needs.gpu-tests.result != 'success') }}
         run: exit 1
diff --git a/.github/workflows/regression_tests.yml b/.github/workflows/regression_tests.yml
@@ -32,25 +32,20 @@ jobs:
         examples/dataset/**
         modelopt_recipes/general/speculative_decoding/**
         tools/launcher/**
-  regression-tests-pr:
+
+  regression-tests:
     needs: [pr-gate]
-    if: needs.pr-gate.outputs.any_changed == 'true'
-    strategy: &regression_strategy
-      fail-fast: false
-      matrix:
-        include:
-          - example: regression
-            timeout: 15
-            container_image: pytorch:26.01-py3
-    runs-on: linux-amd64-gpu-rtxpro6000-latest-1
-    timeout-minutes: ${{ matrix.timeout }}
-    container: &regression_container
-      image: nvcr.io/nvidia/${{ matrix.container_image }}
+    if: >-
+      needs.pr-gate.outputs.any_changed == 'true' || !startsWith(github.ref, 'refs/heads/pull-request/')
+    runs-on: ${{ startsWith(github.ref, 'refs/heads/pull-request/') && 'linux-amd64-gpu-rtxpro6000-latest-1' || 'linux-amd64-gpu-rtxpro6000-latest-2' }}
+    timeout-minutes: 15
+    container:
+      image: nvcr.io/nvidia/pytorch:26.01-py3
       env:
         GIT_DEPTH: 1000 # For correct version
         PIP_CONSTRAINT: "" # Disable pip constraint for upgrading packages
         HF_TOKEN: ${{ secrets.HF_TOKEN }}
-    steps: &regression_steps
+    steps:
       - uses: actions/checkout@v6
       - uses: nv-gha-runners/setup-proxy-cache@main
       - name: Setup environment variables
@@ -60,9 +55,7 @@ jobs:
         env:
           COVERAGE_PROCESS_START: ${{ github.workspace }}/pyproject.toml
           COVERAGE_FILE: ${{ github.workspace }}/.coverage
-        run: |
-          pip install nox
-          nox -s ${{ matrix.example }}
+        run: python -m pip install nox && nox -s regression
       - name: Upload regression coverage to Codecov
         uses: codecov/codecov-action@v5
         with:
@@ -71,21 +64,15 @@ jobs:
           flags: regression
           fail_ci_if_error: false # test may be skipped if relevant file changes are not detected
           verbose: true
-  regression-tests-non-pr:
-    if: ${{ !startsWith(github.ref, 'refs/heads/pull-request/') }}
-    strategy: *regression_strategy
-    runs-on: linux-amd64-gpu-rtxpro6000-latest-2
-    timeout-minutes: ${{ matrix.timeout }}
-    container: *regression_container
-    steps: *regression_steps
+
   regression-pr-required-check:
-    # Run even if regression-tests-pr is skipped
+    # Run even if regression-tests is skipped
     if: ${{ startsWith(github.ref, 'refs/heads/pull-request/') && always() }}
-    needs: [pr-gate, regression-tests-pr]
+    needs: [pr-gate, regression-tests]
     runs-on: ubuntu-latest
     steps:
       - name: Required regression tests did not succeed
         if: |
           needs.pr-gate.result != 'success' ||
-          (needs.pr-gate.outputs.any_changed == 'true' && needs.regression-tests-pr.result != 'success')
+          (needs.pr-gate.outputs.any_changed == 'true' && needs.regression-tests.result != 'success')
         run: exit 1