NVIDIA
diff --git a/‎.bandit‎
Lines changed: 0 additions & 5 deletions b/‎.bandit‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎.github/workflows/bandit.yml‎
Lines changed: 24 additions & 7 deletions b/‎.github/workflows/bandit.yml‎
Lines changed: 24 additions & 7 deletions
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 30 additions & 10 deletions b/‎.github/workflows/ci.yml‎
Lines changed: 30 additions & 10 deletions
diff --git a/‎.github/workflows/status-check.yml‎
Lines changed: 0 additions & 20 deletions b/‎.github/workflows/status-check.yml‎
Lines changed: 0 additions & 20 deletions
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 4 additions & 12 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 4 additions & 12 deletions
diff --git a/‎cuda_bindings/tests/test_cuda.py‎
Lines changed: 5 additions & 3 deletions b/‎cuda_bindings/tests/test_cuda.py‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎cuda_bindings/tests/test_nvvm.py‎
Lines changed: 17 additions & 45 deletions b/‎cuda_bindings/tests/test_nvvm.py‎
Lines changed: 17 additions & 45 deletions
diff --git a/‎cuda_bindings/tests/test_utils.py‎
Lines changed: 3 additions & 3 deletions b/‎cuda_bindings/tests/test_utils.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎cuda_core/cuda/core/experimental/_memoryview.pyx‎
Lines changed: 17 additions & 0 deletions b/‎cuda_core/cuda/core/experimental/_memoryview.pyx‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎cuda_core/docs/source/release/0.X.Y-notes.rst‎
Lines changed: 1 addition & 0 deletions b/‎cuda_core/docs/source/release/0.X.Y-notes.rst‎
Lines changed: 1 addition & 0 deletions
@@ -19,10 +19,27 @@ jobs:
     permissions:
       security-events: write
     steps:
-      - name: Perform Bandit Analysis
-        # KEEP IN SYNC WITH bandit rev in .pre-commit-config.yaml
-        # Current runner uses Python 3.8, so the action installs bandit==1.7.10
-        # via `pip install bandit[sarif]`. If runner Python moves to >=3.9,
-        # the action will resolve to 1.8.x and you'll need to bump pre-commit.
-        # (Bandit >=1.8.0 dropped Python 3.8 via Requires-Python metadata.)
-        uses: PyCQA/bandit-action@8a1b30610f61f3f792fe7556e888c9d7dffa52de  # v1.0.0
+      - name: Checkout
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@b75a909f75acd358c2196fb9a5f1299a9a8868a4  # v6.7.0
+
+      - name: Get ignore codes
+        id: ignore-codes
+        # This are computed so that we can run only the `S` (bandit)
+        # checks. Passing --select to ruff overrides any config files
+        # (ruff.toml, pyproject.toml, etc), so to avoid having keep everything
+        # in sync we grab them from the TOML programmatically
+        run: |
+          set -euxo pipefail
+
+          echo "codes=$(uvx toml2json ./ruff.toml | jq -r '.lint.ignore | map(select(test("^S\\d+"))) | join(",")')" >> "$GITHUB_OUTPUT"
+      - name: Perform Bandit Analysis using Ruff
+        uses: astral-sh/ruff-action@57714a7c8a2e59f32539362ba31877a1957dded1  # v3.5.1
+        with:
+          args: "check --select S --ignore ${{ steps.ignore-codes.outputs.codes }} --output-format sarif --output-file results.sarif"
+      - name: Upload SARIF file
+        uses: github/codeql-action/upload-sarif@v3
+        with:
+          sarif_file: results.sarif
@@ -32,17 +32,38 @@ jobs:
           cuda_build_ver=$(jq -r .cuda.build.version ci/versions.json)
           echo "cuda_build_ver=$cuda_build_ver" >> $GITHUB_OUTPUT
 
+  should-skip:
+    runs-on: ubuntu-latest
+    outputs:
+      skip: ${{ steps.get-should-skip.outputs.skip }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
+      - name: Compute whether to skip builds and tests
+        id: get-should-skip
+        env:
+          GH_TOKEN: ${{ github.token }}
+        run: |
+          set -euxo pipefail
+          if ${{ startsWith(github.ref_name, 'pull-request/') }}; then
+            skip="$(gh pr view "$(grep -Po '(\d+)$' <<< '${{ github.ref_name }}')" --json title --jq '.title | contains("[no-ci]")')"
+          else
+            skip=false
+          fi
+          echo "skip=${skip}" >> "$GITHUB_OUTPUT"
+
   # WARNING: make sure all of the build jobs are in sync
   build-linux-64:
     needs:
       - ci-vars
+      - should-skip
     strategy:
       fail-fast: false
       matrix:
         host-platform:
           - linux-64
     name: Build ${{ matrix.host-platform }}, CUDA ${{ needs.ci-vars.outputs.CUDA_BUILD_VER }}
-    if: ${{ github.repository_owner == 'nvidia' }}
+    if: ${{ github.repository_owner == 'nvidia' && !fromJSON(needs.should-skip.outputs.skip) }}
     secrets: inherit
     uses: ./.github/workflows/build-wheel.yml
     with:
@@ -53,13 +74,14 @@ jobs:
   build-linux-aarch64:
     needs:
       - ci-vars
+      - should-skip
     strategy:
       fail-fast: false
       matrix:
         host-platform:
           - linux-aarch64
     name: Build ${{ matrix.host-platform }}, CUDA ${{ needs.ci-vars.outputs.CUDA_BUILD_VER }}
-    if: ${{ github.repository_owner == 'nvidia' }}
+    if: ${{ github.repository_owner == 'nvidia' && !fromJSON(needs.should-skip.outputs.skip) }}
     secrets: inherit
     uses: ./.github/workflows/build-wheel.yml
     with:
@@ -70,13 +92,14 @@ jobs:
   build-windows:
     needs:
       - ci-vars
+      - should-skip
     strategy:
       fail-fast: false
       matrix:
         host-platform:
           - win-64
     name: Build ${{ matrix.host-platform }}, CUDA ${{ needs.ci-vars.outputs.CUDA_BUILD_VER }}
-    if: ${{ github.repository_owner == 'nvidia' }}
+    if: ${{ github.repository_owner == 'nvidia' && !fromJSON(needs.should-skip.outputs.skip) }}
     secrets: inherit
     uses: ./.github/workflows/build-wheel.yml
     with:
@@ -163,15 +186,12 @@ jobs:
 
   checks:
     name: Check job status
-    permissions:
-      checks: read
+    runs-on: ubuntu-latest
     needs:
-      - build-linux-64
       - test-linux-64
-      - build-linux-aarch64
       - test-linux-aarch64
-      - build-windows
       - test-windows
       - doc
-    secrets: inherit
-    uses: ./.github/workflows/status-check.yml
+    steps:
+      - name: Exit
+        run: exit 0
@@ -15,7 +15,7 @@ ci:
 # pre-commit autoupdate --freeze
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: 0b19ef1fd6ad680ed7752d6daba883ce1265a6de  # frozen: v0.12.2
+    rev: f298305809c552671cc47e0fec0ba43e96c146a2  # frozen: v0.13.2
     hooks:
       - id: ruff
         args: [--fix, --show-fixes]
@@ -40,7 +40,7 @@ repos:
 
   # Standard hooks
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: "v5.0.0"
+    rev: "3e8a8703264a2f4a69428a0aa4dcb512790b2c8c"  # frozen: v6.0.0
     hooks:
     - id: check-added-large-files
     - id: check-case-conflict
@@ -58,22 +58,14 @@ repos:
 
   # Checking for common mistakes
   - repo: https://github.com/pre-commit/pygrep-hooks
-    rev: "v1.10.0"
+    rev: "3a6eb0fadf60b3cccfd80bad9dbb6fae7e47b316"  # frozen: v1.10.0
     hooks:
     - id: rst-backticks
     - id: rst-directive-colons
     - id: rst-inline-touching-normal
 
-  - repo: https://github.com/PyCQA/bandit
-    rev: "36fd65054fc8864b4037d0918904f9331512feb5"  # frozen: 1.7.10 KEEP IN SYNC WITH .github/workflows/bandit.yml
-    hooks:
-      - id: bandit
-        args:
-          - --ini
-          - .bandit
-
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: 0f86793af5ef5f6dc63c8d04a3cabfa3ea8f9c6a  # frozen: v1.16.1
+    rev: 9f70dc58c23dfcca1b97af99eaeee3140a807c7e  # frozen: v1.18.2
     hooks:
       - id: mypy
         name: mypy-pathfinder
 
@@ -653,7 +653,7 @@ def test_get_error_name_and_string():
 @pytest.mark.skipif(not callableBinary("nvidia-smi"), reason="Binary existance needed")
 def test_device_get_name():
     # TODO: Refactor this test once we have nvml bindings to avoid the use of subprocess
-    import subprocess  # nosec B404
+    import subprocess
 
     (err,) = cuda.cuInit(0)
     assert err == cuda.CUresult.CUDA_SUCCESS
@@ -663,8 +663,10 @@ def test_device_get_name():
     assert err == cuda.CUresult.CUDA_SUCCESS
 
     p = subprocess.check_output(
-        ["nvidia-smi", "--query-gpu=name", "--format=csv,noheader"], shell=False, stderr=subprocess.PIPE
-    )  # nosec B603, B607
+        ["nvidia-smi", "--query-gpu=name", "--format=csv,noheader"],  # noqa: S607
+        shell=False,
+        stderr=subprocess.PIPE,
+    )
 
     delimiter = b"\r\n" if platform.system() == "Windows" else b"\n"
     expect = p.split(delimiter)
 
@@ -4,21 +4,12 @@
 
 import binascii
 import re
-import textwrap
 from contextlib import contextmanager
 
 import pytest
 from cuda.bindings import nvvm
 
-MINIMAL_NVVMIR_FIXTURE_PARAMS = ["txt", "bitcode_static"]
-try:
-    import llvmlite.binding as llvmlite_binding  # Optional test dependency.
-except ImportError:
-    llvmlite_binding = None
-else:
-    MINIMAL_NVVMIR_FIXTURE_PARAMS.append("bitcode_dynamic")
-
-MINIMAL_NVVMIR_TXT = b"""\
+MINIMAL_NVVMIR_TXT_TEMPLATE = b"""\
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
 
 target triple = "nvptx64-nvidia-cuda"
@@ -130,43 +121,24 @@
     "6e673e0000000000",
 }
 
-MINIMAL_NVVMIR_CACHE = {}
-
 
-@pytest.fixture(params=MINIMAL_NVVMIR_FIXTURE_PARAMS)
+@pytest.fixture(params=("txt", "bitcode_static"))
 def minimal_nvvmir(request):
-    for pass_counter in range(2):
-        nvvmir = MINIMAL_NVVMIR_CACHE.get(request.param, -1)
-        if nvvmir != -1:
-            if nvvmir is None:
-                pytest.skip(f"UNAVAILABLE: {request.param}")
-            return nvvmir
-        if pass_counter:
-            raise AssertionError("This code path is meant to be unreachable.")
-        # Build cache entries, then try again (above).
-        major, minor, debug_major, debug_minor = nvvm.ir_version()
-        txt = MINIMAL_NVVMIR_TXT % (major, debug_major)
-        if llvmlite_binding is None:
-            bitcode_dynamic = None
-        else:
-            bitcode_dynamic = llvmlite_binding.parse_assembly(txt.decode()).as_bitcode()
-        bitcode_static = MINIMAL_NVVMIR_BITCODE_STATIC.get((major, debug_major))
-        if bitcode_static is not None:
-            bitcode_static = binascii.unhexlify(bitcode_static)
-        MINIMAL_NVVMIR_CACHE["txt"] = txt
-        MINIMAL_NVVMIR_CACHE["bitcode_dynamic"] = bitcode_dynamic
-        MINIMAL_NVVMIR_CACHE["bitcode_static"] = bitcode_static
-        if bitcode_static is None:
-            if bitcode_dynamic is None:
-                raise RuntimeError("Please `pip install llvmlite` to generate `bitcode_static` (see PR #443)")
-            bitcode_hex = binascii.hexlify(bitcode_dynamic).decode("ascii")
-            print("\n\nMINIMAL_NVVMIR_BITCODE_STATIC = { # PLEASE ADD TO test_nvvm.py")
-            print(f"    ({major}, {debug_major}):  # (major, debug_major)")
-            lines = textwrap.wrap(bitcode_hex, width=80)
-            for line in lines[:-1]:
-                print(f'    "{line}"')
-            print(f'    "{lines[-1]}",')
-            print("}\n", flush=True)
+    major, minor, debug_major, debug_minor = nvvm.ir_version()
+
+    if request.param == "txt":
+        return MINIMAL_NVVMIR_TXT_TEMPLATE % (major, debug_major)
+
+    bitcode_static_binascii = MINIMAL_NVVMIR_BITCODE_STATIC.get((major, debug_major))
+    if bitcode_static_binascii:
+        return binascii.unhexlify(bitcode_static_binascii)
+    raise RuntimeError(
+        "Static bitcode for NVVM IR version "
+        f"{major}.{debug_major} is not available in this test.\n"
+        "Maintainers: Please run the helper script to generate it and add the "
+        "output to the MINIMAL_NVVMIR_BITCODE_STATIC dict:\n"
+        "  ../../toolshed/build_static_bitcode_input.py"
+    )
 
 
 @pytest.fixture(params=[nvvm.compile_program, nvvm.verify_program])
 
@@ -3,7 +3,7 @@
 
 import platform
 import random
-import subprocess  # nosec B404
+import subprocess
 import sys
 from pathlib import Path
 
@@ -72,7 +72,7 @@ def test_ptx_utils(kernel, actual_ptx_ver, min_cuda_ver):
     ),
 )
 def test_get_handle(target):
-    ptr = random.randint(1, 1024)
+    ptr = random.randint(1, 1024)  # noqa: S311
     obj = target(ptr)
     handle = get_cuda_native_handle(obj)
     assert handle == ptr
@@ -105,6 +105,6 @@ def test_get_handle_error(target):
     ],
 )
 def test_cyclical_imports(module):
-    subprocess.check_call(  # nosec B603
+    subprocess.check_call(  # noqa: S603
         [sys.executable, Path(__file__).parent / "utils" / "check_cyclical_import.py", f"cuda.bindings.{module}"],
     )
@@ -105,6 +105,23 @@ cdef class StridedMemoryView:
         else:
             pass
 
+    def __dealloc__(self):
+        if self.dl_tensor == NULL:
+            return
+
+        if cpython.PyCapsule_IsValid(
+                self.metadata, DLPACK_VERSIONED_TENSOR_USED_NAME):
+            data = cpython.PyCapsule_GetPointer(
+                self.metadata, DLPACK_VERSIONED_TENSOR_USED_NAME)
+            dlm_tensor_ver = <DLManagedTensorVersioned*>data
+            dlm_tensor_ver.deleter(dlm_tensor_ver)
+        elif cpython.PyCapsule_IsValid(
+                self.metadata, DLPACK_TENSOR_USED_NAME):
+            data = cpython.PyCapsule_GetPointer(
+                self.metadata, DLPACK_TENSOR_USED_NAME)
+            dlm_tensor = <DLManagedTensor*>data
+            dlm_tensor.deleter(dlm_tensor)
+
     @property
     def shape(self) -> tuple[int]:
         if self._shape is None and self.exporting_obj is not None:
 
@@ -48,3 +48,4 @@ Fixes and enhancements
 - Make :class:`Buffer` creation more performant.
 - Enabled :class:`MemoryResource` subclasses to accept :class:`Device` objects, in addition to previously supported device ordinals.
 - Fixed a bug in :class:`Stream` and other classes where object cleanup would error during interpreter shutdown.
+- :class:`StridedMemoryView` of an underlying array using the DLPack protocol will no longer leak memory.