NVIDIA
diff --git a/‎.coveragerc‎
Lines changed: 1 addition & 1 deletion b/‎.coveragerc‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/build-wheel.yml‎
Lines changed: 18 additions & 11 deletions b/‎.github/workflows/build-wheel.yml‎
Lines changed: 18 additions & 11 deletions
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 169 additions & 0 deletions b/‎.github/workflows/ci.yml‎
Lines changed: 169 additions & 0 deletions
diff --git a/‎.github/workflows/test-wheel-linux.yml‎
Lines changed: 12 additions & 4 deletions b/‎.github/workflows/test-wheel-linux.yml‎
Lines changed: 12 additions & 4 deletions
@@ -1,5 +1,5 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
+# SPDX-License-Identifier: Apache-2.0
 
 [paths]
 source =
 
@@ -44,20 +44,24 @@ jobs:
         with:
           fetch-depth: 0
 
-      # The env vars ACTIONS_CACHE_SERVICE_V2, ACTIONS_RESULTS_URL, and ACTIONS_RUNTIME_TOKEN
-      # are exposed by this action.
-      - name: Enable sccache
-        uses: mozilla-actions/sccache-action@7d986dd989559c6ecdb630a3fd2557667be217ad  # 0.0.9
-        with:
-          disable_annotations: 'true'
+      - name: Install latest rapidsai/sccache
+        if: ${{ startsWith(inputs.host-platform, 'linux') }}
+        run: |
+          curl -fsSL "https://github.com/rapidsai/sccache/releases/latest/download/sccache-$(uname -m)-unknown-linux-musl.tar.gz" \
+            | sudo tar -C /usr/local/bin -xvzf - --wildcards --strip-components=1 -x '*/sccache'
+          echo "SCCACHE_PATH=/usr/local/bin/sccache" >> "$GITHUB_ENV"
+          echo "SCCACHE_GHA_USE_PREPROCESSOR_CACHE_MODE=true" >> "$GITHUB_ENV"
 
       # xref: https://github.com/orgs/community/discussions/42856#discussioncomment-7678867
       - name: Adding addtional GHA cache-related env vars
         uses: actions/github-script@v8
         with:
           script: |
-            core.exportVariable('ACTIONS_CACHE_URL', process.env['ACTIONS_CACHE_URL'])
-            core.exportVariable('ACTIONS_RUNTIME_URL', process.env['ACTIONS_RUNTIME_URL'])
+            core.exportVariable('ACTIONS_CACHE_SERVICE_V2', 'on');
+            core.exportVariable('ACTIONS_CACHE_URL', process.env['ACTIONS_CACHE_URL'] || '');
+            core.exportVariable('ACTIONS_RESULTS_URL', process.env['ACTIONS_RESULTS_URL'] || '');
+            core.exportVariable('ACTIONS_RUNTIME_URL', process.env['ACTIONS_RUNTIME_URL'] || '');
+            core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env['ACTIONS_RUNTIME_TOKEN'] || '');
 
       - name: Setup proxy cache
         uses: nv-gha-runners/setup-proxy-cache@main
@@ -176,14 +180,15 @@ jobs:
             ACTIONS_RESULTS_URL=${{ env.ACTIONS_RESULTS_URL }}
             ACTIONS_CACHE_URL=${{ env.ACTIONS_CACHE_URL }}
             ACTIONS_CACHE_SERVICE_V2=${{ env.ACTIONS_CACHE_SERVICE_V2 }}
+            SCCACHE_GHA_USE_PREPROCESSOR_CACHE_MODE=${{ env.SCCACHE_GHA_USE_PREPROCESSOR_CACHE_MODE }}
             SCCACHE_DIR=/host/${{ env.SCCACHE_DIR }}
             SCCACHE_CACHE_SIZE=${{ env.SCCACHE_CACHE_SIZE }}
           CIBW_ENVIRONMENT_WINDOWS: >
             CUDA_PATH="$(cygpath -w ${{ env.CUDA_PATH }})"
             CUDA_PYTHON_PARALLEL_LEVEL=${{ env.CUDA_PYTHON_PARALLEL_LEVEL }}
           # check cache stats before leaving cibuildwheel
           CIBW_BEFORE_TEST_LINUX: >
-            "/host/${{ env.SCCACHE_PATH }}" --show-stats &&
+            "/host/${{ env.SCCACHE_PATH }}" --show-adv-stats &&
             "/host/${{ env.SCCACHE_PATH }}" --show-stats --stats-format=json > /host/${{ github.workspace }}/sccache_bindings.json
           # force the test stage to be run (so that before-test is not skipped)
           # TODO: we might want to think twice on adding this, it does a lot of
@@ -241,6 +246,7 @@ jobs:
             ACTIONS_RESULTS_URL=${{ env.ACTIONS_RESULTS_URL }}
             ACTIONS_CACHE_URL=${{ env.ACTIONS_CACHE_URL }}
             ACTIONS_CACHE_SERVICE_V2=${{ env.ACTIONS_CACHE_SERVICE_V2 }}
+            SCCACHE_GHA_USE_PREPROCESSOR_CACHE_MODE=${{ env.SCCACHE_GHA_USE_PREPROCESSOR_CACHE_MODE }}
             SCCACHE_DIR=/host/${{ env.SCCACHE_DIR }}
             SCCACHE_CACHE_SIZE=${{ env.SCCACHE_CACHE_SIZE }}
           CIBW_ENVIRONMENT_WINDOWS: >
@@ -250,7 +256,7 @@ jobs:
             PIP_FIND_LINKS="$(cygpath -w ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }})"
           # check cache stats before leaving cibuildwheel
           CIBW_BEFORE_TEST_LINUX: >
-            "/host${{ env.SCCACHE_PATH }}" --show-stats &&
+            "/host${{ env.SCCACHE_PATH }}" --show-adv-stats &&
             "/host${{ env.SCCACHE_PATH }}" --show-stats --stats-format=json > /host/${{ github.workspace }}/sccache_core.json
           # force the test stage to be run (so that before-test is not skipped)
           # TODO: we might want to think twice on adding this, it does a lot of
@@ -429,6 +435,7 @@ jobs:
             ACTIONS_RESULTS_URL=${{ env.ACTIONS_RESULTS_URL }}
             ACTIONS_CACHE_URL=${{ env.ACTIONS_CACHE_URL }}
             ACTIONS_CACHE_SERVICE_V2=${{ env.ACTIONS_CACHE_SERVICE_V2 }}
+            SCCACHE_GHA_USE_PREPROCESSOR_CACHE_MODE=${{ env.SCCACHE_GHA_USE_PREPROCESSOR_CACHE_MODE }}
             SCCACHE_DIR=/host/${{ env.SCCACHE_DIR }}
             SCCACHE_CACHE_SIZE=${{ env.SCCACHE_CACHE_SIZE }}
           CIBW_ENVIRONMENT_WINDOWS: >
@@ -438,7 +445,7 @@ jobs:
             PIP_FIND_LINKS="$(cygpath -w ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }})"
           # check cache stats before leaving cibuildwheel
           CIBW_BEFORE_TEST_LINUX: >
-            "/host${{ env.SCCACHE_PATH }}" --show-stats &&
+            "/host${{ env.SCCACHE_PATH }}" --show-adv-stats &&
             "/host${{ env.SCCACHE_PATH }}" --show-stats --stats-format=json > /host/${{ github.workspace }}/sccache_core_prev.json
           # force the test stage to be run (so that before-test is not skipped)
           # TODO: we might want to think twice on adding this, it does a lot of
 
@@ -71,6 +71,159 @@ jobs:
           echo "skip=${skip}" >> "$GITHUB_OUTPUT"
           echo "doc_only=${doc_only}" >> "$GITHUB_OUTPUT"
 
+  # Detect which top-level modules were touched by the PR so downstream build
+  # and test jobs can avoid rebuilding/retesting modules unaffected by the
+  # change. See issue #299.
+  #
+  # Dependency graph (verified in pyproject.toml files):
+  #   cuda_pathfinder   -> (no internal deps)
+  #   cuda_bindings     -> cuda_pathfinder
+  #   cuda_core         -> cuda_pathfinder, cuda_bindings
+  #   cuda_python       -> cuda_bindings (meta package)
+  #
+  # A change to cuda_pathfinder (or shared infra) forces a rebuild of every
+  # downstream module. A change to cuda_bindings forces rebuild of cuda_core.
+  # A change to cuda_core alone skips rebuilding/retesting cuda_bindings.
+  # On push to main, tag refs, schedule, or workflow_dispatch events we
+  # unconditionally run everything because there is no meaningful "changed
+  # paths" baseline for those events.
+  detect-changes:
+    runs-on: ubuntu-latest
+    outputs:
+      bindings: ${{ steps.compose.outputs.bindings }}
+      core: ${{ steps.compose.outputs.core }}
+      pathfinder: ${{ steps.compose.outputs.pathfinder }}
+      python_meta: ${{ steps.compose.outputs.python_meta }}
+      test_helpers: ${{ steps.compose.outputs.test_helpers }}
+      shared: ${{ steps.compose.outputs.shared }}
+      build_bindings: ${{ steps.compose.outputs.build_bindings }}
+      build_core: ${{ steps.compose.outputs.build_core }}
+      build_pathfinder: ${{ steps.compose.outputs.build_pathfinder }}
+      test_bindings: ${{ steps.compose.outputs.test_bindings }}
+      test_core: ${{ steps.compose.outputs.test_core }}
+      test_pathfinder: ${{ steps.compose.outputs.test_pathfinder }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          fetch-depth: 0
+
+      # copy-pr-bot pushes every PR (whether it targets main or a backport
+      # branch such as 12.9.x) to pull-request/<N>, so the base branch
+      # cannot be inferred from github.ref_name. Look it up via the
+      # upstream PR metadata so the diff below is rooted at the right place.
+      - name: Resolve PR base branch
+        id: pr-info
+        if: ${{ startsWith(github.ref_name, 'pull-request/') }}
+        uses: nv-gha-runners/get-pr-info@main
+
+      - name: Detect changed paths
+        id: filter
+        if: ${{ startsWith(github.ref_name, 'pull-request/') }}
+        env:
+          # GitHub Actions evaluates step-level `env:` expressions eagerly —
+          # the step's `if:` gate does NOT short-circuit them. On non-PR
+          # events (push/tag/schedule), `pr-info` is skipped and its outputs
+          # are empty strings, so `fromJSON('')` would raise a template error
+          # and fail the step despite `if:` being false. Guard the
+          # `fromJSON` call with a short-circuit so the expression resolves
+          # to an empty string on non-PR events; the step is still gated
+          # off by `if:`, so `BASE_REF` is never consumed there.
+          BASE_REF: ${{ steps.pr-info.outputs.pr-info && fromJSON(steps.pr-info.outputs.pr-info).base.ref || '' }}
+        run: |
+          # Diff against the merge base with the PR's actual target branch.
+          # Uses merge-base so diverged branches only show files changed on
+          # the PR side, not upstream commits.
+          if [[ -z "${BASE_REF}" ]]; then
+            echo "Could not resolve PR base branch from get-pr-info output" >&2
+            exit 1
+          fi
+          base=$(git merge-base HEAD "origin/${BASE_REF}")
+          changed=$(git diff --name-only "$base"...HEAD)
+
+          has_match() {
+            grep -qE "$1" <<< "$changed" && echo true || echo false
+          }
+
+          {
+            echo "bindings=$(has_match '^cuda_bindings/')"
+            echo "core=$(has_match '^cuda_core/')"
+            echo "pathfinder=$(has_match '^cuda_pathfinder/')"
+            echo "python_meta=$(has_match '^cuda_python/')"
+            echo "test_helpers=$(has_match '^cuda_python_test_helpers/')"
+            echo "shared=$(has_match '^(\.github/|ci/|scripts/|toolshed/|conftest\.py$|pyproject\.toml$|pixi\.(toml|lock)$|pytest\.ini$|ruff\.toml$)')"
+          } >> "$GITHUB_OUTPUT"
+
+      - name: Compose gating outputs
+        id: compose
+        env:
+          IS_PR: ${{ startsWith(github.ref_name, 'pull-request/') }}
+          BINDINGS: ${{ steps.filter.outputs.bindings || 'false' }}
+          CORE: ${{ steps.filter.outputs.core || 'false' }}
+          PATHFINDER: ${{ steps.filter.outputs.pathfinder || 'false' }}
+          PYTHON_META: ${{ steps.filter.outputs.python_meta || 'false' }}
+          TEST_HELPERS: ${{ steps.filter.outputs.test_helpers || 'false' }}
+          SHARED: ${{ steps.filter.outputs.shared || 'false' }}
+        run: |
+          set -euxo pipefail
+          # Non-PR events (push to main, tag push, schedule, workflow_dispatch)
+          # always exercise the full pipeline because there is no baseline for
+          # a meaningful diff.
+          if [[ "${IS_PR}" != "true" ]]; then
+            bindings=true
+            core=true
+            pathfinder=true
+            python_meta=true
+            test_helpers=true
+            shared=true
+          else
+            bindings="${BINDINGS}"
+            core="${CORE}"
+            pathfinder="${PATHFINDER}"
+            python_meta="${PYTHON_META}"
+            test_helpers="${TEST_HELPERS}"
+            shared="${SHARED}"
+          fi
+
+          or_flag() {
+            for v in "$@"; do
+              if [[ "${v}" == "true" ]]; then
+                echo "true"
+                return
+              fi
+            done
+            echo "false"
+          }
+
+          # Build gating: pathfinder change forces rebuild of bindings and
+          # core; bindings change forces rebuild of core. shared changes force
+          # a full rebuild.
+          build_pathfinder="$(or_flag "${shared}" "${pathfinder}")"
+          build_bindings="$(or_flag "${shared}" "${pathfinder}" "${bindings}")"
+          build_core="$(or_flag "${shared}" "${pathfinder}" "${bindings}" "${core}")"
+
+          # Test gating: tests for a module must run whenever that module, any
+          # of its runtime dependencies, the shared test helper package, or
+          # shared infra changes. pathfinder tests are cheap and always run.
+          test_pathfinder=true
+          test_bindings="$(or_flag "${shared}" "${pathfinder}" "${bindings}" "${test_helpers}")"
+          test_core="$(or_flag "${shared}" "${pathfinder}" "${bindings}" "${core}" "${test_helpers}")"
+
+          {
+            echo "bindings=${bindings}"
+            echo "core=${core}"
+            echo "pathfinder=${pathfinder}"
+            echo "python_meta=${python_meta}"
+            echo "test_helpers=${test_helpers}"
+            echo "shared=${shared}"
+            echo "build_bindings=${build_bindings}"
+            echo "build_core=${build_core}"
+            echo "build_pathfinder=${build_pathfinder}"
+            echo "test_bindings=${test_bindings}"
+            echo "test_core=${test_core}"
+            echo "test_pathfinder=${test_pathfinder}"
+          } >> "$GITHUB_OUTPUT"
+
   # NOTE: Build jobs are intentionally split by platform rather than using a single
   # matrix. This allows each test job to depend only on its corresponding build,
   # so faster platforms can proceed through build & test without waiting for slower
@@ -151,6 +304,7 @@ jobs:
     needs:
       - ci-vars
       - should-skip
+      - detect-changes
       - build-linux-64
     secrets: inherit
     uses: ./.github/workflows/test-wheel-linux.yml
@@ -159,6 +313,7 @@ jobs:
       host-platform: ${{ matrix.host-platform }}
       build-ctk-ver: ${{ needs.ci-vars.outputs.CUDA_BUILD_VER }}
       nruns: ${{ (github.event_name == 'schedule' && 100) || 1}}
+      skip-bindings-test: ${{ !fromJSON(needs.detect-changes.outputs.test_bindings) }}
 
   # See test-linux-64 for why test jobs are split by platform.
   test-linux-aarch64:
@@ -174,6 +329,7 @@ jobs:
     needs:
       - ci-vars
       - should-skip
+      - detect-changes
       - build-linux-aarch64
     secrets: inherit
     uses: ./.github/workflows/test-wheel-linux.yml
@@ -182,6 +338,7 @@ jobs:
       host-platform: ${{ matrix.host-platform }}
       build-ctk-ver: ${{ needs.ci-vars.outputs.CUDA_BUILD_VER }}
       nruns: ${{ (github.event_name == 'schedule' && 100) || 1}}
+      skip-bindings-test: ${{ !fromJSON(needs.detect-changes.outputs.test_bindings) }}
 
   # See test-linux-64 for why test jobs are split by platform.
   test-windows:
@@ -197,6 +354,7 @@ jobs:
     needs:
       - ci-vars
       - should-skip
+      - detect-changes
       - build-windows
     secrets: inherit
     uses: ./.github/workflows/test-wheel-windows.yml
@@ -205,6 +363,7 @@ jobs:
       host-platform: ${{ matrix.host-platform }}
       build-ctk-ver: ${{ needs.ci-vars.outputs.CUDA_BUILD_VER }}
       nruns: ${{ (github.event_name == 'schedule' && 100) || 1}}
+      skip-bindings-test: ${{ !fromJSON(needs.detect-changes.outputs.test_bindings) }}
 
   doc:
     name: Docs
@@ -228,6 +387,7 @@ jobs:
     runs-on: ubuntu-latest
     needs:
       - should-skip
+      - detect-changes
       - test-linux-64
       - test-linux-aarch64
       - test-windows
@@ -254,7 +414,16 @@ jobs:
           #
           # Note: When [doc-only] is in PR title, test jobs are intentionally
           # skipped and should not cause failure.
+          #
+          # detect-changes gates whether heavy test matrices run at all; if it
+          # does not succeed, downstream test jobs are skipped rather than
+          # failed, which would otherwise go unnoticed here. Require its
+          # success explicitly so a broken gating step cannot masquerade as a
+          # green CI run.
           doc_only=${{ needs.should-skip.outputs.doc-only }}
+          if ${{ needs.detect-changes.result != 'success' }}; then
+            exit 1
+          fi
           if ${{ needs.doc.result == 'cancelled' || needs.doc.result == 'failure' }}; then
             exit 1
           fi
 
@@ -22,6 +22,13 @@ on:
       nruns:
         type: number
         default: 1
+      # When true, cuda.bindings tests (and the Cython tests that depend on
+      # them) are skipped even when CTK majors match. Callers set this based
+      # on the output of the detect-changes job in ci.yml so PRs that only
+      # touch unrelated modules avoid the expensive bindings test suite.
+      skip-bindings-test:
+        type: boolean
+        default: false
 
 defaults:
   run:
@@ -113,6 +120,7 @@ jobs:
           LOCAL_CTK: ${{ matrix.LOCAL_CTK }}
           PY_VER: ${{ matrix.PY_VER }}
           SHA: ${{ github.sha }}
+          SKIP_BINDINGS_TEST_OVERRIDE: ${{ inputs.skip-bindings-test && '1' || '0' }}
         run: ./ci/tools/env-vars test
 
       - name: Download cuda-pathfinder build artifacts
@@ -122,21 +130,21 @@ jobs:
           path: ./cuda_pathfinder
 
       - name: Download cuda-python build artifacts
-        if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0'}}
+        if: ${{ env.USE_BACKPORT_BINDINGS == '0' }}
         uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c  # v8.0.1
         with:
           name: cuda-python-wheel
           path: .
 
       - name: Download cuda.bindings build artifacts
-        if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0'}}
+        if: ${{ env.USE_BACKPORT_BINDINGS == '0' }}
         uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c  # v8.0.1
         with:
           name: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
           path: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}
 
       - name: Download cuda-python & cuda.bindings build artifacts from the prior branch
-        if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '1'}}
+        if: ${{ env.USE_BACKPORT_BINDINGS == '1' }}
         env:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         run: |
@@ -266,7 +274,7 @@ jobs:
         if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0' }}
         run: |
           pip install pyperf
-          pushd cuda_bindings/benchmarks
+          pushd benchmarks/cuda_bindings
           python run_pyperf.py --fast --min-time 1
           popd