From fc7cd5250200ed1f57a194f39766f0ca81833e29 Mon Sep 17 00:00:00 2001 From: Keith Kraus Date: Thu, 28 Aug 2025 01:29:14 -0400 Subject: [PATCH 1/2] add conda specific functions to pathfinder and search conda before general system search --- .../_dynamic_libs/find_nvidia_dynamic_lib.py | 8 +- .../pathfinder/_dynamic_libs/load_dl_linux.py | 129 ++++++++++++++++++ .../_dynamic_libs/load_dl_windows.py | 58 ++++++++ .../_dynamic_libs/load_nvidia_dynamic_lib.py | 5 + 4 files changed, 196 insertions(+), 4 deletions(-) diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py index 18708a2b3eb..c4239d62ed4 100644 --- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py +++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/find_nvidia_dynamic_lib.py @@ -27,7 +27,7 @@ def _no_such_file_in_sub_dirs( attachments.append(f" {node}") -def _find_so_using_nvidia_lib_dirs( +def _find_so_using_nvidia_wheel_lib_dirs( libname: str, so_basename: str, error_messages: list[str], attachments: list[str] ) -> Optional[str]: rel_dirs = SITE_PACKAGES_LIBDIRS_LINUX.get(libname) @@ -61,7 +61,7 @@ def _find_dll_under_dir(dirpath: str, file_wild: str) -> Optional[str]: return None -def _find_dll_using_nvidia_bin_dirs( +def _find_dll_using_nvidia_wheel_bin_dirs( libname: str, lib_searched_for: str, error_messages: list[str], attachments: list[str] ) -> Optional[str]: rel_dirs = SITE_PACKAGES_LIBDIRS_WINDOWS.get(libname) @@ -157,7 +157,7 @@ def __init__(self, libname: str): if IS_WINDOWS: self.lib_searched_for = f"{libname}*.dll" if self.abs_path is None: - self.abs_path = _find_dll_using_nvidia_bin_dirs( + self.abs_path = _find_dll_using_nvidia_wheel_bin_dirs( libname, self.lib_searched_for, self.error_messages, @@ -166,7 +166,7 @@ def __init__(self, libname: str): else: self.lib_searched_for = f"lib{libname}.so" if self.abs_path is None: - self.abs_path = _find_so_using_nvidia_lib_dirs( + self.abs_path = _find_so_using_nvidia_wheel_lib_dirs( libname, self.lib_searched_for, self.error_messages, diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_linux.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_linux.py index ef7f078c9a1..b60450f69b4 100644 --- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_linux.py +++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_linux.py @@ -173,6 +173,135 @@ def load_with_system_search(libname: str) -> Optional[LoadedDL]: return None +def load_with_conda_search(libname: str) -> Optional[LoadedDL]: + """Try to load a library using conda search paths. + + Args: + libname: The name of the library to load + + Returns: + A LoadedDL object if successful, None if the library cannot be loaded + """ + in_conda_build = False + in_conda_env = False + if os.getenv("CONDA_BUILD") == "1": + in_conda_build = True + elif os.getenv("CONDA_PREFIX"): + in_conda_env = True + else: + return None + + normal_conda_lib_path = os.path.join("lib") + # TODO KEITH: All the libs in the targets directory are symlinked into the lib directory, do we need to search it? + # TODO KEITH: Should we do platform detection here to avoid extra searches? Any considerations we need to do in a + # cross compilation build environment? + nvidia_conda_target_lib_paths = [ + os.path.join("targets", "x86_64-linux", "lib"), + os.path.join("targets", "sbsa-linux", "lib"), + ] + if libname == "nvvm": + normal_conda_lib_path = os.path.join("nvvm") + nvidia_conda_target_lib_paths = [ + os.path.join("targets", "x86_64-linux", "nvvm", "lib64"), + os.path.join("targets", "sbsa-linux", "nvvm", "lib64"), + ] + + for soname in get_candidate_sonames(libname): + if in_conda_build: + if prefix := os.getenv("PREFIX"): + for nvidia_conda_target_lib_path in nvidia_conda_target_lib_paths: + prefix_target_lib_path = os.path.join(prefix, nvidia_conda_target_lib_path) + if os.path.isdir(prefix_target_lib_path): + soname = os.path.join(prefix_target_lib_path, soname) + try: + handle = _load_lib(libname, soname) + except OSError: + pass + else: + # TODO KEITH: Do we need this abs_path_for_dynamic_library call? + # We're already resolving the absolute path based on the conda environment variables + abs_path = abs_path_for_dynamic_library(libname, handle) + if abs_path is None: + raise RuntimeError(f"No expected symbol for {libname=!r}") + return LoadedDL(abs_path, False, handle._handle) + # Only run if not found in the target lib paths + prefix_normal_lib_path = os.path.join(prefix, normal_conda_lib_path) + if os.path.isdir(prefix_normal_lib_path): + soname = os.path.join(prefix_normal_lib_path, soname) + try: + handle = _load_lib(libname, soname) + except OSError: + pass + else: + # TODO KEITH: Do we need this abs_path_for_dynamic_library call? + # We're already resolving the absolute path based on the conda environment variables + abs_path = abs_path_for_dynamic_library(libname, handle) + if abs_path is None: + raise RuntimeError(f"No expected symbol for {libname=!r}") + return LoadedDL(abs_path, False, handle._handle) + if build_prefix := os.getenv("BUILD_PREFIX"): + for nvidia_conda_target_lib_path in nvidia_conda_target_lib_paths: + build_prefix_target_lib_path = os.path.join(build_prefix, nvidia_conda_target_lib_path) + if os.path.isdir(build_prefix_target_lib_path): + soname = os.path.join(build_prefix_target_lib_path, soname) + try: + handle = _load_lib(libname, soname) + except OSError: + pass + else: + # TODO KEITH: Do we need this abs_path_for_dynamic_library call? + # We're already resolving the absolute path based on the conda environment variables + abs_path = abs_path_for_dynamic_library(libname, handle) + if abs_path is None: + raise RuntimeError(f"No expected symbol for {libname=!r}") + return LoadedDL(abs_path, False, handle._handle) + # Only run if not found in the target lib paths + build_prefix_normal_lib_path = os.path.join(build_prefix, normal_conda_lib_path) + if os.path.isdir(build_prefix_normal_lib_path): + soname = os.path.join(build_prefix_normal_lib_path, soname) + try: + handle = _load_lib(libname, soname) + except OSError: + pass + else: + # TODO KEITH: Do we need this abs_path_for_dynamic_library call? + # We're already resolving the absolute path based on the conda environment variables + abs_path = abs_path_for_dynamic_library(libname, handle) + if abs_path is None: + raise RuntimeError(f"No expected symbol for {libname=!r}") + return LoadedDL(abs_path, False, handle._handle) + elif in_conda_env: + if conda_prefix := os.getenv("CONDA_PREFIX"): + for nvidia_conda_target_lib_path in nvidia_conda_target_lib_paths: + conda_prefix_target_lib_path = os.path.join(conda_prefix, nvidia_conda_target_lib_path) + if os.path.isdir(conda_prefix_target_lib_path): + soname = os.path.join(conda_prefix_target_lib_path, soname) + try: + handle = _load_lib(libname, soname) + except OSError: + pass + else: + # TODO KEITH: Do we need this abs_path_for_dynamic_library call? + # We're already resolving the absolute path based on the conda environment variables + abs_path = abs_path_for_dynamic_library(libname, handle) + if abs_path is None: + raise RuntimeError(f"No expected symbol for {libname=!r}") + return LoadedDL(abs_path, False, handle._handle) + # Only run if not found in the target lib paths + conda_prefix_normal_lib_path = os.path.join(conda_prefix, normal_conda_lib_path) + if os.path.isdir(conda_prefix_normal_lib_path): + soname = os.path.join(conda_prefix_normal_lib_path, soname) + try: + handle = _load_lib(libname, soname) + except OSError: + pass + else: + # TODO KEITH: Do we need this abs_path_for_dynamic_library call? + # We're already resolving the absolute path based on the conda environment variables + abs_path = abs_path_for_dynamic_library(libname, handle) + return None + + def _work_around_known_bugs(libname: str, found_path: str) -> None: if libname == "nvrtc": # Work around bug/oversight in diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py index 1a4f32cf213..8423c0161c1 100644 --- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py +++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py @@ -132,6 +132,64 @@ def load_with_system_search(libname: str) -> Optional[LoadedDL]: return None +def load_with_conda_search(libname: str) -> Optional[LoadedDL]: + """Try to load a DLL using conda search paths. + + Args: + libname: The name of the library to load + + Returns: + A LoadedDL object if successful, None if the library cannot be loaded + """ + in_conda_build = False + in_conda_env = False + if os.getenv("CONDA_BUILD") == "1": + in_conda_build = True + elif os.getenv("CONDA_PREFIX"): + in_conda_env = True + else: + return None + + normal_conda_lib_path = os.path.join("Library", "bin", "x64") + if libname == "nvvm": + normal_conda_lib_path = os.path.join("Library", "nvvm", "bin", "x64") + + for dll_name in SUPPORTED_WINDOWS_DLLS.get(libname, ()): + if in_conda_build: + if prefix := os.getenv("PREFIX"): + prefix_normal_lib_path = os.path.join(prefix, normal_conda_lib_path) + if os.path.isdir(prefix_normal_lib_path): + dll_name = os.path.join(prefix_normal_lib_path, dll_name) + handle = kernel32.LoadLibraryExW(dll_name, None, 0) + if handle: + # TODO KEITH: Do we need this abs_path_for_dynamic_library call? + # We're already resolving the absolute path based on the conda environment variables + abs_path = abs_path_for_dynamic_library(libname, handle) + return LoadedDL(abs_path, False, ctypes_handle_to_unsigned_int(handle)) + if build_prefix := os.getenv("BUILD_PREFIX"): + build_prefix_normal_lib_path = os.path.join(build_prefix, normal_conda_lib_path) + if os.path.isdir(build_prefix_normal_lib_path): + dll_name = os.path.join(build_prefix_normal_lib_path, dll_name) + handle = kernel32.LoadLibraryExW(dll_name, None, 0) + if handle: + # TODO KEITH: Do we need this abs_path_for_dynamic_library call? + # We're already resolving the absolute path based on the conda environment variables + abs_path = abs_path_for_dynamic_library(libname, handle) + return LoadedDL(abs_path, False, ctypes_handle_to_unsigned_int(handle)) + elif in_conda_env: + if conda_prefix := os.getenv("CONDA_PREFIX"): + conda_prefix_normal_lib_path = os.path.join(conda_prefix, normal_conda_lib_path) + if os.path.isdir(conda_prefix_normal_lib_path): + dll_name = os.path.join(conda_prefix_normal_lib_path, dll_name) + handle = kernel32.LoadLibraryExW(dll_name, None, 0) + if handle: + # TODO KEITH: Do we need this abs_path_for_dynamic_library call? + # We're already resolving the absolute path based on the conda environment variables + abs_path = abs_path_for_dynamic_library(libname, handle) + return LoadedDL(abs_path, False, ctypes_handle_to_unsigned_int(handle)) + return None + + def load_with_abs_path(libname: str, found_path: str) -> LoadedDL: """Load a dynamic library from the given path. diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py index 3160333aa92..6a6882addf7 100644 --- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py +++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py @@ -13,12 +13,14 @@ from cuda.pathfinder._dynamic_libs.load_dl_windows import ( check_if_already_loaded_from_elsewhere, load_with_abs_path, + load_with_conda_search, load_with_system_search, ) else: from cuda.pathfinder._dynamic_libs.load_dl_linux import ( check_if_already_loaded_from_elsewhere, load_with_abs_path, + load_with_conda_search, load_with_system_search, ) @@ -41,6 +43,9 @@ def _load_lib_no_cache(libname: str) -> LoadedDL: return loaded if not have_abs_path: + loaded = load_with_conda_search(libname) + if loaded is not None: + return loaded loaded = load_with_system_search(libname) if loaded is not None: return loaded From ca6ae8e4a7535e5611919102c1bb17911569e0f6 Mon Sep 17 00:00:00 2001 From: Keith Kraus Date: Thu, 28 Aug 2025 01:40:45 -0400 Subject: [PATCH 2/2] Update docstring and add release note --- .../pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py | 7 ++++++- cuda_pathfinder/docs/source/release/1.X.Y-notes.rst | 5 +++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py index 6a6882addf7..b5040123f1e 100644 --- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py +++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py @@ -82,7 +82,12 @@ def load_nvidia_dynamic_lib(libname: str) -> LoadedDL: - Scan installed distributions (``site-packages``) to find libraries shipped in NVIDIA wheels. - 2. **OS default mechanisms / Conda environments** + 2. **Conda environment** + + - Detects if in a conda environment and searches for libraries in the expected locations within the conda + environment. + + 3. **OS default mechanisms** - Fall back to the native loader: diff --git a/cuda_pathfinder/docs/source/release/1.X.Y-notes.rst b/cuda_pathfinder/docs/source/release/1.X.Y-notes.rst index 769e6f546ee..4fe9adc1fd0 100644 --- a/cuda_pathfinder/docs/source/release/1.X.Y-notes.rst +++ b/cuda_pathfinder/docs/source/release/1.X.Y-notes.rst @@ -23,3 +23,8 @@ Highlights - Improves stability in general and supports nvmath specifically - Proactive change to improve library loading consistency - Drops boilerplate docstrings for private functions + +* Add conda search path prioritization for ``load_nvidia_dynamic_lib()`` (`PR #856 `_) + + - Enables loading libraries from conda environments before system paths + - Handles version mismatches between the conda environment and the system CTK