Skip to content

Commit b71a039

Browse files
cpcloudcursoragent
andcommitted
feat(pathfinder): add CTK root canary probe for non-standard-path libs
Libraries like nvvm whose shared object lives in a subdirectory (/nvvm/lib64/) that is not on the system linker path cannot be found via bare dlopen on system CTK installs without CUDA_HOME. Add a "canary probe" search step: when direct system search fails, system-load a well-known CTK lib that IS on the linker path (cudart), derive the CTK installation root from its resolved path, and look for the target lib relative to that root via the existing anchor-point logic. The mechanism is generic -- any future lib with a non-standard path just needs its entry in _find_lib_dir_using_anchor_point. The canary probe is intentionally placed after CUDA_HOME in the search cascade to preserve backward compatibility: users who have CUDA_HOME set expect it to be authoritative, and existing code relying on that ordering should not silently change behavior. Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent ea45bbf commit b71a039

3 files changed

Lines changed: 371 additions & 4 deletions

File tree

cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,57 @@ def _find_dll_using_lib_dir(
152152
return None
153153

154154

155+
def _derive_ctk_root_linux(resolved_lib_path: str) -> str | None:
156+
"""Derive the CTK installation root from a resolved library path on Linux.
157+
158+
Standard system CTK layout: ``$CTK_ROOT/lib64/libfoo.so.XX``
159+
(some installs use ``lib`` instead of ``lib64``).
160+
161+
Returns None if the path doesn't match a recognized layout.
162+
"""
163+
lib_dir = os.path.dirname(resolved_lib_path)
164+
basename = os.path.basename(lib_dir)
165+
if basename in ("lib64", "lib"):
166+
return os.path.dirname(lib_dir)
167+
return None
168+
169+
170+
def _derive_ctk_root_windows(resolved_lib_path: str) -> str | None:
171+
"""Derive the CTK installation root from a resolved library path on Windows.
172+
173+
Handles two CTK layouts:
174+
- CTK 13: ``$CTK_ROOT/bin/x64/foo.dll``
175+
- CTK 12: ``$CTK_ROOT/bin/foo.dll``
176+
177+
Returns None if the path doesn't match a recognized layout.
178+
179+
Uses ``ntpath`` explicitly so the function is testable on any platform.
180+
"""
181+
import ntpath
182+
183+
lib_dir = ntpath.dirname(resolved_lib_path)
184+
basename = ntpath.basename(lib_dir).lower()
185+
if basename == "x64":
186+
parent = ntpath.dirname(lib_dir)
187+
if ntpath.basename(parent).lower() == "bin":
188+
return ntpath.dirname(parent)
189+
elif basename == "bin":
190+
return ntpath.dirname(lib_dir)
191+
return None
192+
193+
194+
def derive_ctk_root(resolved_lib_path: str) -> str | None:
195+
"""Derive the CTK installation root from a resolved library path.
196+
197+
Given the absolute path of a loaded CTK shared library, walk up the
198+
directory tree to find the CTK root. Returns None if the path doesn't
199+
match any recognized CTK directory layout.
200+
"""
201+
if IS_WINDOWS:
202+
return _derive_ctk_root_windows(resolved_lib_path)
203+
return _derive_ctk_root_linux(resolved_lib_path)
204+
205+
155206
class _FindNvidiaDynamicLib:
156207
def __init__(self, libname: str):
157208
self.libname = libname
@@ -185,6 +236,16 @@ def try_with_conda_prefix(self) -> str | None:
185236
def try_with_cuda_home(self) -> str | None:
186237
return self._find_using_lib_dir(_find_lib_dir_using_cuda_home(self.libname))
187238

239+
def try_via_ctk_root(self, ctk_root: str) -> str | None:
240+
"""Find the library under a derived CTK root directory.
241+
242+
Uses :func:`_find_lib_dir_using_anchor_point` which already knows
243+
about non-standard sub-paths (e.g. ``nvvm/lib64`` for nvvm).
244+
"""
245+
return self._find_using_lib_dir(
246+
_find_lib_dir_using_anchor_point(self.libname, anchor_point=ctk_root, linux_lib_dir="lib64")
247+
)
248+
188249
def _find_using_lib_dir(self, lib_dir: str | None) -> str | None:
189250
if lib_dir is None:
190251
return None

cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py

Lines changed: 55 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,10 @@
55
import struct
66
import sys
77

8-
from cuda.pathfinder._dynamic_libs.find_nvidia_dynamic_lib import _FindNvidiaDynamicLib
8+
from cuda.pathfinder._dynamic_libs.find_nvidia_dynamic_lib import (
9+
_FindNvidiaDynamicLib,
10+
derive_ctk_root,
11+
)
912
from cuda.pathfinder._dynamic_libs.load_dl_common import LoadedDL, load_dependencies
1013
from cuda.pathfinder._utils.platform_aware import IS_WINDOWS
1114

@@ -22,6 +25,36 @@
2225
load_with_system_search,
2326
)
2427

28+
# Libs that reside on the standard linker path in system CTK installs.
29+
# Used to discover the CTK root when a lib with a non-standard path
30+
# (e.g. nvvm under $CTK_ROOT/nvvm/lib64) can't be found directly.
31+
_CTK_ROOT_CANARY_LIBNAMES = ("cudart",)
32+
33+
34+
def _try_ctk_root_canary(finder: _FindNvidiaDynamicLib) -> str | None:
35+
"""Derive the CTK root from a system-installed canary lib.
36+
37+
For libs like nvvm whose shared object doesn't reside on the standard
38+
linker path, we locate a well-known CTK lib that IS on the linker path
39+
via system search, derive the CTK installation root from its resolved
40+
path, and then look for the target lib relative to that root.
41+
42+
The canary lib is loaded as a side-effect but this is harmless: it stays
43+
loaded (handles are never closed) and will be reused by
44+
:func:`load_nvidia_dynamic_lib` if requested later.
45+
"""
46+
for canary_libname in _CTK_ROOT_CANARY_LIBNAMES:
47+
canary = load_with_system_search(canary_libname)
48+
if canary is None or canary.abs_path is None:
49+
continue
50+
ctk_root = derive_ctk_root(canary.abs_path)
51+
if ctk_root is None:
52+
continue
53+
abs_path = finder.try_via_ctk_root(ctk_root)
54+
if abs_path is not None:
55+
return abs_path
56+
return None
57+
2558

2659
def _load_lib_no_cache(libname: str) -> LoadedDL:
2760
finder = _FindNvidiaDynamicLib(libname)
@@ -50,11 +83,21 @@ def _load_lib_no_cache(libname: str) -> LoadedDL:
5083
loaded = load_with_system_search(libname)
5184
if loaded is not None:
5285
return loaded
86+
5387
abs_path = finder.try_with_cuda_home()
54-
if abs_path is None:
55-
finder.raise_not_found_error()
56-
else:
88+
if abs_path is not None:
5789
found_via = "CUDA_HOME"
90+
else:
91+
# Canary probe: if the direct system search and CUDA_HOME both
92+
# failed (e.g. nvvm isn't on the linker path and CUDA_HOME is
93+
# unset), try to discover the CTK root by system-loading a
94+
# well-known CTK lib that IS on the linker path, then look for
95+
# the target lib relative to that root.
96+
abs_path = _try_ctk_root_canary(finder)
97+
if abs_path is not None:
98+
found_via = "system-ctk-root"
99+
else:
100+
finder.raise_not_found_error()
58101

59102
return load_with_abs_path(libname, abs_path, found_via)
60103

@@ -123,6 +166,14 @@ def load_nvidia_dynamic_lib(libname: str) -> LoadedDL:
123166
124167
- If set, use ``CUDA_HOME`` or ``CUDA_PATH`` (in that order).
125168
169+
5. **CTK root canary probe**
170+
171+
- For libraries whose shared object doesn't reside on the standard
172+
linker path (e.g. ``libnvvm.so`` lives under ``$CTK_ROOT/nvvm/lib64``),
173+
attempt to discover the CTK installation root by system-loading a
174+
well-known CTK library (``cudart``) that *is* on the linker path, then
175+
derive the root from its resolved absolute path.
176+
126177
Notes:
127178
The search is performed **per library**. There is currently no mechanism to
128179
guarantee that multiple libraries are all resolved from the same location.

0 commit comments

Comments
 (0)