Skip to content

Commit 9eec539

Browse files
authored
[FEA]: Add CUDA driver version query to cuda.pathfinder (private API) (#1953)
* Add a reusable pathfinder driver info helper. Break out CUDA driver version querying into a standalone internal utility so it can be reused independently from compatibility checks, and cover the ctypes loader paths with focused tests. Made-with: Cursor * Refine pathfinder driver info loader checks. Treat the Windows WinDLL path as the normal runtime case and keep the focused tests aligned with the stricter driver-loader invariants. Made-with: Cursor * Add parsed pathfinder driver version metadata. Wrap the encoded cuDriverGetVersion() result in a DriverVersion dataclass so callers can use major and minor fields directly while retaining a low-level integer helper for loader-focused tests. Made-with: Cursor * Add a real pathfinder driver version test. Cover query_driver_version() alongside the driver library loading tests and reuse the existing strictness mode so host-specific failures still surface cleanly in all_must_work mode. Made-with: Cursor * Reduce redundant pathfinder driver info mocks. Drop the non-Windows loader mock now that a real driver-version test covers the Linux success path, while keeping the Windows branch and failure-path unit coverage. Made-with: Cursor * Rename the pathfinder CUDA driver version dataclass. Use DriverCudaVersion for clearer pairing with the planned release-version type while keeping the existing driver info API behavior unchanged. Made-with: Cursor * Add a pathfinder NVML driver release version helper. Query nvmlSystemGetDriverVersion() through pathfinder's driver library loading path and add a minimal real test so the implementation is preserved as a future reference. Made-with: Cursor * Revert the pathfinder NVML driver release version helper. Step back from the exploratory NVML-based release-version query for now because it adds non-trivial complexity and a new dependency surface without a current pathfinder need, while keeping the reference implementation in history if we need it later. Made-with: Cursor * Clarify the pathfinder CUDA driver version naming. Document that DriverCudaVersion matches the CUDA Version shown by nvidia-smi rather than the graphics driver release, so the dataclass name reads clearly in context. Made-with: Cursor * Finalize the pathfinder CUDA driver version query API. Expose DriverCudaVersion, QueryDriverCudaVersionError, and query_driver_cuda_version publicly, and align the internal naming, caching, docs, and test coverage around the CUDA-specific driver version query. Made-with: Cursor * Add a public pathfinder driver info regression test. Protect the new top-level driver-info re-exports so internal-only test coverage does not miss a broken `cuda.pathfinder` plumbing layer. Made-with: Cursor * Remove pathfinder driver info public re-exports Stop exposing the new driver info helper through cuda.pathfinder while keeping the internal implementation and internal test coverage in place. Made-with: Cursor * Clarify DriverCudaVersion docstring terminology. Spell out that `cuDriverGetVersion()` reports the CUDA-facing user-mode driver (UMD) version rather than the kernel-mode driver (KMD) package version so the `nvidia-smi` comparison is less ambiguous. Made-with: Cursor
1 parent f993098 commit 9eec539

3 files changed

Lines changed: 202 additions & 0 deletions

File tree

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
from __future__ import annotations
5+
6+
import ctypes
7+
import functools
8+
from collections.abc import Callable
9+
from dataclasses import dataclass
10+
11+
from cuda.pathfinder._dynamic_libs.load_nvidia_dynamic_lib import (
12+
load_nvidia_dynamic_lib as _load_nvidia_dynamic_lib,
13+
)
14+
from cuda.pathfinder._utils.platform_aware import IS_WINDOWS
15+
16+
17+
class QueryDriverCudaVersionError(RuntimeError):
18+
"""Raised when ``query_driver_cuda_version()`` cannot determine the CUDA driver version."""
19+
20+
21+
@dataclass(frozen=True, slots=True)
22+
class DriverCudaVersion:
23+
"""
24+
CUDA-facing driver version reported by ``cuDriverGetVersion()``.
25+
26+
The name ``DriverCudaVersion`` is intentionally specific: this dataclass
27+
models the version shown as ``CUDA Version`` in ``nvidia-smi``, not the
28+
graphics driver release shown as ``Driver Version``. More specifically,
29+
it reflects the CUDA user-mode driver (UMD) interface version reported by
30+
``cuDriverGetVersion()``, not the kernel-mode driver (KMD) package
31+
version.
32+
33+
Example ``nvidia-smi`` output::
34+
35+
+---------------------------------------------------------------------+
36+
| NVIDIA-SMI 595.58.03 Driver Version: 595.58.03 CUDA Version: 13.2 |
37+
+---------------------------------------------------------------------+
38+
39+
For the example above, ``DriverCudaVersion(encoded=13020, major=13,
40+
minor=2)`` corresponds to ``CUDA Version: 13.2``. It does not correspond
41+
to ``Driver Version: 595.58.03``.
42+
"""
43+
44+
encoded: int
45+
major: int
46+
minor: int
47+
48+
49+
@functools.cache
50+
def query_driver_cuda_version() -> DriverCudaVersion:
51+
"""Return the CUDA driver version parsed into its major/minor components."""
52+
try:
53+
encoded = _query_driver_cuda_version_int()
54+
return DriverCudaVersion(
55+
encoded=encoded,
56+
major=encoded // 1000,
57+
minor=(encoded % 1000) // 10,
58+
)
59+
except Exception as exc:
60+
raise QueryDriverCudaVersionError("Failed to query the CUDA driver version.") from exc
61+
62+
63+
def _query_driver_cuda_version_int() -> int:
64+
"""Return the encoded CUDA driver version from ``cuDriverGetVersion()``."""
65+
loaded_cuda = _load_nvidia_dynamic_lib("cuda")
66+
if IS_WINDOWS:
67+
# `ctypes.WinDLL` exists on Windows at runtime. The ignore is only for
68+
# Linux mypy runs, where the platform stubs do not define that attribute.
69+
loader_cls: Callable[[str], ctypes.CDLL] = ctypes.WinDLL # type: ignore[attr-defined]
70+
else:
71+
loader_cls = ctypes.CDLL
72+
driver_lib = loader_cls(loaded_cuda.abs_path)
73+
cu_driver_get_version = driver_lib.cuDriverGetVersion
74+
cu_driver_get_version.argtypes = [ctypes.POINTER(ctypes.c_int)]
75+
cu_driver_get_version.restype = ctypes.c_int
76+
version = ctypes.c_int()
77+
status = cu_driver_get_version(ctypes.byref(version))
78+
if status != 0:
79+
raise RuntimeError(f"Failed to query CUDA driver version via cuDriverGetVersion() (status={status}).")
80+
return version.value

cuda_pathfinder/tests/test_driver_lib_loading.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
_load_lib_no_cache,
2626
)
2727
from cuda.pathfinder._dynamic_libs.subprocess_protocol import STATUS_NOT_FOUND, parse_dynamic_lib_subprocess_payload
28+
from cuda.pathfinder._utils import driver_info
2829
from cuda.pathfinder._utils.platform_aware import IS_WINDOWS, quote_for_shell
2930

3031
STRICTNESS = os.environ.get("CUDA_PATHFINDER_TEST_LOAD_NVIDIA_DYNAMIC_LIB_STRICTNESS", "see_what_works")
@@ -157,3 +158,23 @@ def raise_child_process_failed():
157158
assert abs_path is not None
158159
info_summary_append(f"abs_path={quote_for_shell(abs_path)}")
159160
assert os.path.isfile(abs_path)
161+
162+
163+
def test_real_query_driver_cuda_version(info_summary_append):
164+
driver_info._load_nvidia_dynamic_lib.cache_clear()
165+
driver_info.query_driver_cuda_version.cache_clear()
166+
try:
167+
version = driver_info.query_driver_cuda_version()
168+
except driver_info.QueryDriverCudaVersionError as exc:
169+
if STRICTNESS == "all_must_work":
170+
raise
171+
info_summary_append(f"driver version unavailable: {exc.__class__.__name__}: {exc}")
172+
return
173+
finally:
174+
driver_info._load_nvidia_dynamic_lib.cache_clear()
175+
driver_info.query_driver_cuda_version.cache_clear()
176+
177+
info_summary_append(f"driver_version={version.major}.{version.minor} (encoded={version.encoded})")
178+
assert version.encoded > 0
179+
assert version.major == version.encoded // 1000
180+
assert version.minor == (version.encoded % 1000) // 10
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
import ctypes
5+
6+
import pytest
7+
8+
from cuda.pathfinder._dynamic_libs.load_dl_common import LoadedDL
9+
from cuda.pathfinder._utils import driver_info
10+
11+
12+
@pytest.fixture(autouse=True)
13+
def _clear_driver_cuda_version_query_cache():
14+
driver_info.query_driver_cuda_version.cache_clear()
15+
yield
16+
driver_info.query_driver_cuda_version.cache_clear()
17+
18+
19+
class _FakeCuDriverGetVersion:
20+
def __init__(self, *, status: int, version: int):
21+
self.argtypes = None
22+
self.restype = None
23+
self._status = status
24+
self._version = version
25+
26+
def __call__(self, version_ptr) -> int:
27+
ctypes.cast(version_ptr, ctypes.POINTER(ctypes.c_int)).contents.value = self._version
28+
return self._status
29+
30+
31+
class _FakeDriverLib:
32+
def __init__(self, *, status: int, version: int):
33+
self.cuDriverGetVersion = _FakeCuDriverGetVersion(status=status, version=version)
34+
35+
36+
def _loaded_cuda(abs_path: str) -> LoadedDL:
37+
return LoadedDL(
38+
abs_path=abs_path,
39+
was_already_loaded_from_elsewhere=False,
40+
_handle_uint=0xBEEF,
41+
found_via="system-search",
42+
)
43+
44+
45+
def test_query_driver_cuda_version_uses_windll_on_windows(monkeypatch):
46+
fake_driver_lib = _FakeDriverLib(status=0, version=12080)
47+
loaded_paths: list[str] = []
48+
49+
monkeypatch.setattr(driver_info, "IS_WINDOWS", True)
50+
monkeypatch.setattr(
51+
driver_info,
52+
"_load_nvidia_dynamic_lib",
53+
lambda _libname: _loaded_cuda(r"C:\Windows\System32\nvcuda.dll"),
54+
)
55+
56+
def fake_windll(abs_path: str):
57+
loaded_paths.append(abs_path)
58+
return fake_driver_lib
59+
60+
monkeypatch.setattr(driver_info.ctypes, "WinDLL", fake_windll, raising=False)
61+
62+
assert driver_info._query_driver_cuda_version_int() == 12080
63+
assert loaded_paths == [r"C:\Windows\System32\nvcuda.dll"]
64+
65+
66+
def test_query_driver_cuda_version_returns_parsed_dataclass(monkeypatch):
67+
monkeypatch.setattr(driver_info, "_query_driver_cuda_version_int", lambda: 12080)
68+
69+
assert driver_info.query_driver_cuda_version() == driver_info.DriverCudaVersion(
70+
encoded=12080,
71+
major=12,
72+
minor=8,
73+
)
74+
75+
76+
def test_query_driver_cuda_version_wraps_internal_failures(monkeypatch):
77+
root_cause = RuntimeError("low-level query failed")
78+
79+
def fail_query_driver_cuda_version_int() -> int:
80+
raise root_cause
81+
82+
monkeypatch.setattr(driver_info, "_query_driver_cuda_version_int", fail_query_driver_cuda_version_int)
83+
84+
with pytest.raises(
85+
driver_info.QueryDriverCudaVersionError,
86+
match="Failed to query the CUDA driver version",
87+
) as exc_info:
88+
driver_info.query_driver_cuda_version()
89+
90+
assert exc_info.value.__cause__ is root_cause
91+
92+
93+
def test_query_driver_cuda_version_int_raises_when_cuda_call_fails(monkeypatch):
94+
fake_driver_lib = _FakeDriverLib(status=1, version=0)
95+
96+
monkeypatch.setattr(driver_info, "IS_WINDOWS", False)
97+
monkeypatch.setattr(driver_info, "_load_nvidia_dynamic_lib", lambda _libname: _loaded_cuda("/usr/lib/libcuda.so.1"))
98+
monkeypatch.setattr(driver_info.ctypes, "CDLL", lambda _abs_path: fake_driver_lib)
99+
100+
with pytest.raises(RuntimeError, match=r"cuDriverGetVersion\(\) \(status=1\)"):
101+
driver_info._query_driver_cuda_version_int()

0 commit comments

Comments
 (0)