Further unify pr and non-pr workflow jobs

kevalmorabia97 · kevalmorabia97 · commit 5cf3b39d2ce0 · 2026-04-18T08:32:21.000-07:00
Signed-off-by: Keval Morabia &lt;28916987+kevalmorabia97@users.noreply.github.com&gt;
diff --git a/.github/workflows/_pr_gate.yml b/.github/workflows/_pr_gate.yml
@@ -14,34 +14,48 @@ on:
 
 jobs:
   check-file-changes:
-    if: startsWith(github.ref, 'refs/heads/pull-request/')
     runs-on: ubuntu-latest
     outputs:
-      any_changed: ${{ steps.changed-tests.outputs.any_changed }}
+      any_changed: ${{ steps.changed-tests.outputs.any_changed || steps.non-pr.outputs.any_changed }}
     steps:
-      - uses: actions/checkout@v6
+      # For non-PR triggers (schedule, workflow_dispatch), always run tests
+      - id: non-pr
+        if: "!startsWith(github.ref, 'refs/heads/pull-request/')"
+        run: echo "any_changed=true" >> $GITHUB_OUTPUT
+      - if: startsWith(github.ref, 'refs/heads/pull-request/')
+        uses: actions/checkout@v6
         with:
           fetch-depth: 0
-      - id: get-pr-info
+      - if: startsWith(github.ref, 'refs/heads/pull-request/')
+        id: get-pr-info
         uses: nv-gha-runners/get-pr-info@main
-      # Get commit from main branch that is present in the PR to use as base for changed files
-      - id: calculate-merge-base
+      # Extract SHAs from pr-info JSON via shell to avoid fromJSON on potentially-empty outputs
+      - if: startsWith(github.ref, 'refs/heads/pull-request/')
+        id: pr-shas
         env:
-          PR_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).head.sha }}
-          BASE_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.sha }}
+          PR_INFO: ${{ steps.get-pr-info.outputs.pr-info }}
+        run: |
+          echo "head_sha=$(echo "$PR_INFO" | jq -r '.head.sha')" >> $GITHUB_OUTPUT
+          echo "base_sha=$(echo "$PR_INFO" | jq -r '.base.sha')" >> $GITHUB_OUTPUT
+      # Get commit from main branch that is present in the PR to use as base for changed files
+      - if: startsWith(github.ref, 'refs/heads/pull-request/')
+        id: calculate-merge-base
         run: |
-          (echo -n "merge-base="; git merge-base "$BASE_SHA" "$PR_SHA") | tee --append "${GITHUB_OUTPUT}"
-      - name: Check for changes in test-relevant directories
+          (echo -n "merge-base="; git merge-base "${{ steps.pr-shas.outputs.base_sha }}" "${{ steps.pr-shas.outputs.head_sha }}") | tee --append "${GITHUB_OUTPUT}"
+      - if: startsWith(github.ref, 'refs/heads/pull-request/')
+        name: Check for changes in test-relevant directories
         id: changed-tests
         uses: step-security/changed-files@v46.0.5
         with:
           base_sha: ${{ steps.calculate-merge-base.outputs.merge-base }}
-          sha: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).head.sha }}
+          sha: ${{ steps.pr-shas.outputs.head_sha }}
           files: ${{ inputs.files }}
           fail_on_initial_diff_error: true
   wait-checks:
     needs: [check-file-changes]
-    if: needs.check-file-changes.outputs.any_changed == 'true'
+    if: >-
+      startsWith(github.ref, 'refs/heads/pull-request/') &&
+      needs.check-file-changes.outputs.any_changed == 'true'
     uses: ./.github/workflows/_wait_for_checks.yml
     permissions:
       checks: read
diff --git a/.github/workflows/example_tests.yml b/.github/workflows/example_tests.yml
@@ -30,10 +30,10 @@ jobs:
         tests/examples/**
 
   ##### PyTorch Example Tests (speculative_decoding requires 26.01 image) #####
-  torch-pr:
+  torch:
     needs: [pr-gate]
-    if: startsWith(github.ref, 'refs/heads/pull-request/') && needs.pr-gate.outputs.any_changed == 'true'
-    strategy: &torch_strategy
+    if: needs.pr-gate.outputs.any_changed == 'true'
+    strategy:
       fail-fast: false
       matrix:
         example: [llm_distill, llm_qat, llm_sparsity, diffusers_sparsity]
@@ -47,21 +47,9 @@ jobs:
       example: ${{ matrix.example }}
       timeout_minutes: 30
       pip_install_extras: "[hf,dev-test]"
-      runner: linux-amd64-gpu-rtxpro6000-latest-1
-
-  torch-non-pr:
-    if: ${{ !startsWith(github.ref, 'refs/heads/pull-request/') }}
-    strategy: *torch_strategy
-    uses: ./.github/workflows/_example_tests_runner.yml
-    secrets: inherit
-    with:
-      docker_image: "nvcr.io/nvidia/pytorch:${{ matrix.docker_image || '26.03' }}-py3"
-      example: ${{ matrix.example }}
-      timeout_minutes: 30
-      pip_install_extras: "[hf,dev-test]"
-      runner: linux-amd64-gpu-rtxpro6000-latest-2
+      runner: ${{ startsWith(github.ref, 'refs/heads/pull-request/') && 'linux-amd64-gpu-rtxpro6000-latest-1' || 'linux-amd64-gpu-rtxpro6000-latest-2' }}
 
-  ##### TensorRT-LLM Example Tests #####
+  ##### TensorRT-LLM Example Tests (pr/non-pr split: non-pr runs extra autodeploy+eval examples) #####
   trtllm-pr:
     needs: [pr-gate]
     if: startsWith(github.ref, 'refs/heads/pull-request/') && needs.pr-gate.outputs.any_changed == 'true'
@@ -92,39 +80,23 @@ jobs:
       runner: linux-amd64-gpu-rtxpro6000-latest-2
 
   ##### Megatron Example Tests #####
-  megatron-pr:
+  megatron:
     needs: [pr-gate]
-    if: startsWith(github.ref, 'refs/heads/pull-request/') && needs.pr-gate.outputs.any_changed == 'true'
-    strategy: &nemo_strategy
-      fail-fast: false
-      matrix:
-        example: [megatron_bridge]
-    uses: ./.github/workflows/_example_tests_runner.yml
-    secrets: inherit
-    with:
-      docker_image: "nvcr.io/nvidia/nemo:26.02"
-      example: ${{ matrix.example }}
-      timeout_minutes: 30
-      pip_install_extras: "[hf,puzzletron,dev-test]"
-      runner: linux-amd64-gpu-rtxpro6000-latest-1
-
-  megatron-non-pr:
-    if: ${{ !startsWith(github.ref, 'refs/heads/pull-request/') }}
-    strategy: *nemo_strategy
+    if: needs.pr-gate.outputs.any_changed == 'true'
     uses: ./.github/workflows/_example_tests_runner.yml
     secrets: inherit
     with:
       docker_image: "nvcr.io/nvidia/nemo:26.02"
-      example: ${{ matrix.example }}
+      example: megatron_bridge
       timeout_minutes: 30
       pip_install_extras: "[hf,puzzletron,dev-test]"
-      runner: linux-amd64-gpu-rtxpro6000-latest-2
+      runner: ${{ startsWith(github.ref, 'refs/heads/pull-request/') && 'linux-amd64-gpu-rtxpro6000-latest-1' || 'linux-amd64-gpu-rtxpro6000-latest-2' }}
 
   ##### ONNX/TensorRT Example Tests #####
-  onnx-pr:
+  onnx:
     needs: [pr-gate]
-    if: startsWith(github.ref, 'refs/heads/pull-request/') && needs.pr-gate.outputs.any_changed == 'true'
-    strategy: &onnx_strategy
+    if: needs.pr-gate.outputs.any_changed == 'true'
+    strategy:
       fail-fast: false
       matrix:
         example: [diffusers, torch_onnx]
@@ -134,33 +106,22 @@ jobs:
       docker_image: "nvcr.io/nvidia/tensorrt:26.02-py3"
       example: ${{ matrix.example }}
       pip_install_extras: "[onnx,hf,dev-test]"
-      runner: linux-amd64-gpu-rtxpro6000-latest-1
-
-  onnx-non-pr:
-    if: ${{ !startsWith(github.ref, 'refs/heads/pull-request/') }}
-    strategy: *onnx_strategy
-    uses: ./.github/workflows/_example_tests_runner.yml
-    secrets: inherit
-    with:
-      docker_image: "nvcr.io/nvidia/tensorrt:26.02-py3"
-      example: ${{ matrix.example }}
-      pip_install_extras: "[onnx,hf,dev-test]"
-      runner: linux-amd64-gpu-rtxpro6000-latest-2
+      runner: ${{ startsWith(github.ref, 'refs/heads/pull-request/') && 'linux-amd64-gpu-rtxpro6000-latest-1' || 'linux-amd64-gpu-rtxpro6000-latest-2' }}
 
   ##### Required Check for PR #####
   example-pr-required-check:
     # Run even if example tests are skipped
     if: ${{ startsWith(github.ref, 'refs/heads/pull-request/') && always() }}
-    needs: [pr-gate, torch-pr, trtllm-pr, megatron-pr, onnx-pr]
+    needs: [pr-gate, torch, trtllm-pr, megatron, onnx]
     runs-on: ubuntu-latest
     steps:
-      - name: Required GPU tests did not succeed
+      - name: Required example tests did not succeed
         if: |
           needs.pr-gate.result != 'success' ||
           (needs.pr-gate.outputs.any_changed == 'true' && (
-            needs.torch-pr.result != 'success' ||
+            needs.torch.result != 'success' ||
             needs.trtllm-pr.result != 'success' ||
-            needs.megatron-pr.result != 'success' ||
-            needs.onnx-pr.result != 'success'
+            needs.megatron.result != 'success' ||
+            needs.onnx.result != 'success'
           ))
         run: exit 1
diff --git a/.github/workflows/gpu_tests.yml b/.github/workflows/gpu_tests.yml
@@ -30,10 +30,11 @@ jobs:
         tests/gpu/**
         tests/gpu_megatron/**
         tests/gpu_trtllm/**
-  gpu-tests-pr:
+
+  gpu-tests:
     needs: [pr-gate]
     if: needs.pr-gate.outputs.any_changed == 'true'
-    strategy: &gpu_strategy
+    strategy:
       fail-fast: false
       matrix:
         include:
@@ -47,15 +48,15 @@ jobs:
           - example: gpu_trtllm
             timeout: 30
             container_image: tensorrt-llm/release:1.3.0rc10
-    runs-on: linux-amd64-gpu-rtxpro6000-latest-1
+    runs-on: ${{ startsWith(github.ref, 'refs/heads/pull-request/') && 'linux-amd64-gpu-rtxpro6000-latest-1' || 'linux-amd64-gpu-rtxpro6000-latest-2' }}
     timeout-minutes: ${{ matrix.timeout }}
-    container: &gpu_container
+    container:
       image: nvcr.io/nvidia/${{ matrix.container_image }}
       env:
         GIT_DEPTH: 1000 # For correct version
         PIP_CONSTRAINT: "" # Disable pip constraint for upgrading packages
         HF_TOKEN: ${{ secrets.HF_TOKEN }}
-    steps: &gpu_steps
+    steps:
       - uses: actions/checkout@v6
       - uses: nv-gha-runners/setup-proxy-cache@main
       - name: Setup environment variables
@@ -75,19 +76,13 @@ jobs:
           flags: gpu
           fail_ci_if_error: false # test may be skipped if relevant file changes are not detected
           verbose: true
-  gpu-tests-non-pr:
-    if: ${{ !startsWith(github.ref, 'refs/heads/pull-request/') }}
-    strategy: *gpu_strategy
-    runs-on: linux-amd64-gpu-rtxpro6000-latest-2
-    timeout-minutes: ${{ matrix.timeout }}
-    container: *gpu_container
-    steps: *gpu_steps
+
   gpu-pr-required-check:
-    # Run even if gpu-tests-pr is skipped
+    # Run even if gpu-tests is skipped
     if: ${{ startsWith(github.ref, 'refs/heads/pull-request/') && always() }}
-    needs: [pr-gate, gpu-tests-pr]
+    needs: [pr-gate, gpu-tests]
     runs-on: ubuntu-latest
     steps:
       - name: Required GPU tests did not succeed
-        if: ${{ needs.pr-gate.result != 'success' || (needs.pr-gate.outputs.any_changed == 'true' && needs.gpu-tests-pr.result != 'success') }}
+        if: ${{ needs.pr-gate.result != 'success' || (needs.pr-gate.outputs.any_changed == 'true' && needs.gpu-tests.result != 'success') }}
         run: exit 1
diff --git a/.github/workflows/regression_tests.yml b/.github/workflows/regression_tests.yml
@@ -32,25 +32,19 @@ jobs:
         examples/dataset/**
         modelopt_recipes/general/speculative_decoding/**
         tools/launcher/**
-  regression-tests-pr:
+
+  regression-tests:
     needs: [pr-gate]
     if: needs.pr-gate.outputs.any_changed == 'true'
-    strategy: &regression_strategy
-      fail-fast: false
-      matrix:
-        include:
-          - example: regression
-            timeout: 15
-            container_image: pytorch:26.01-py3
-    runs-on: linux-amd64-gpu-rtxpro6000-latest-1
-    timeout-minutes: ${{ matrix.timeout }}
-    container: &regression_container
-      image: nvcr.io/nvidia/${{ matrix.container_image }}
+    runs-on: ${{ startsWith(github.ref, 'refs/heads/pull-request/') && 'linux-amd64-gpu-rtxpro6000-latest-1' || 'linux-amd64-gpu-rtxpro6000-latest-2' }}
+    timeout-minutes: 15
+    container:
+      image: nvcr.io/nvidia/pytorch:26.01-py3
       env:
         GIT_DEPTH: 1000 # For correct version
         PIP_CONSTRAINT: "" # Disable pip constraint for upgrading packages
         HF_TOKEN: ${{ secrets.HF_TOKEN }}
-    steps: &regression_steps
+    steps:
       - uses: actions/checkout@v6
       - uses: nv-gha-runners/setup-proxy-cache@main
       - name: Setup environment variables
@@ -60,9 +54,7 @@ jobs:
         env:
           COVERAGE_PROCESS_START: ${{ github.workspace }}/pyproject.toml
           COVERAGE_FILE: ${{ github.workspace }}/.coverage
-        run: |
-          pip install nox
-          nox -s ${{ matrix.example }}
+        run: python -m pip install nox && nox -s regression
       - name: Upload regression coverage to Codecov
         uses: codecov/codecov-action@v5
         with:
@@ -71,21 +63,15 @@ jobs:
           flags: regression
           fail_ci_if_error: false # test may be skipped if relevant file changes are not detected
           verbose: true
-  regression-tests-non-pr:
-    if: ${{ !startsWith(github.ref, 'refs/heads/pull-request/') }}
-    strategy: *regression_strategy
-    runs-on: linux-amd64-gpu-rtxpro6000-latest-2
-    timeout-minutes: ${{ matrix.timeout }}
-    container: *regression_container
-    steps: *regression_steps
+
   regression-pr-required-check:
-    # Run even if regression-tests-pr is skipped
+    # Run even if regression-tests is skipped
     if: ${{ startsWith(github.ref, 'refs/heads/pull-request/') && always() }}
-    needs: [pr-gate, regression-tests-pr]
+    needs: [pr-gate, regression-tests]
     runs-on: ubuntu-latest
     steps:
       - name: Required regression tests did not succeed
         if: |
           needs.pr-gate.result != 'success' ||
-          (needs.pr-gate.outputs.any_changed == 'true' && needs.regression-tests-pr.result != 'success')
+          (needs.pr-gate.outputs.any_changed == 'true' && needs.regression-tests.result != 'success')
         run: exit 1