Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def _no_such_file_in_sub_dirs(
attachments.append(f" {node}")


def _find_so_using_nvidia_lib_dirs(
def _find_so_using_nvidia_wheel_lib_dirs(
libname: str, so_basename: str, error_messages: list[str], attachments: list[str]
) -> Optional[str]:
rel_dirs = SITE_PACKAGES_LIBDIRS_LINUX.get(libname)
Expand Down Expand Up @@ -61,7 +61,7 @@ def _find_dll_under_dir(dirpath: str, file_wild: str) -> Optional[str]:
return None


def _find_dll_using_nvidia_bin_dirs(
def _find_dll_using_nvidia_wheel_bin_dirs(
libname: str, lib_searched_for: str, error_messages: list[str], attachments: list[str]
) -> Optional[str]:
rel_dirs = SITE_PACKAGES_LIBDIRS_WINDOWS.get(libname)
Expand Down Expand Up @@ -157,7 +157,7 @@ def __init__(self, libname: str):
if IS_WINDOWS:
self.lib_searched_for = f"{libname}*.dll"
if self.abs_path is None:
self.abs_path = _find_dll_using_nvidia_bin_dirs(
self.abs_path = _find_dll_using_nvidia_wheel_bin_dirs(
libname,
self.lib_searched_for,
self.error_messages,
Expand All @@ -166,7 +166,7 @@ def __init__(self, libname: str):
else:
self.lib_searched_for = f"lib{libname}.so"
if self.abs_path is None:
self.abs_path = _find_so_using_nvidia_lib_dirs(
self.abs_path = _find_so_using_nvidia_wheel_lib_dirs(
libname,
self.lib_searched_for,
self.error_messages,
Expand Down
129 changes: 129 additions & 0 deletions cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_linux.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,135 @@ def load_with_system_search(libname: str) -> Optional[LoadedDL]:
return None


def load_with_conda_search(libname: str) -> Optional[LoadedDL]:
"""Try to load a library using conda search paths.

Args:
libname: The name of the library to load

Returns:
A LoadedDL object if successful, None if the library cannot be loaded
"""
in_conda_build = False
in_conda_env = False
if os.getenv("CONDA_BUILD") == "1":
in_conda_build = True
elif os.getenv("CONDA_PREFIX"):
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I spent a few minutes quizzing ChatGPT about the most robust way to figure out if we're in a conda environment. It produced this:


Typical code snippet looks like:

import os, sys

def in_conda_env() -> bool:
    return os.path.exists(os.path.join(sys.prefix, "conda-meta"))

That is generally considered the most robust test.


Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had thought about this as well, but ultimately decided on using the $CONDA_PREFIX approach for a few reasons:

  • Conda environment activation is guaranteed to set the environment variable and conda usage in general is dependent on it
  • Aligned implementation between conda-build and general conda environments
  • You can have a conda environment without python, where technically you could have a pure C++ environment and someone could want to use cuda-pathfinder as a utility in a C++ build pipeline where Python would potentially be coming from outside of the conda environment, in which case sys.prefix wouldn't properly point inside of the activated conda environment.

in_conda_env = True
else:
return None

normal_conda_lib_path = os.path.join("lib")
# TODO KEITH: All the libs in the targets directory are symlinked into the lib directory, do we need to search it?
# TODO KEITH: Should we do platform detection here to avoid extra searches? Any considerations we need to do in a
# cross compilation build environment?
nvidia_conda_target_lib_paths = [
os.path.join("targets", "x86_64-linux", "lib"),
os.path.join("targets", "sbsa-linux", "lib"),
]
if libname == "nvvm":
normal_conda_lib_path = os.path.join("nvvm")
nvidia_conda_target_lib_paths = [
os.path.join("targets", "x86_64-linux", "nvvm", "lib64"),
os.path.join("targets", "sbsa-linux", "nvvm", "lib64"),
]

for soname in get_candidate_sonames(libname):
if in_conda_build:
if prefix := os.getenv("PREFIX"):
for nvidia_conda_target_lib_path in nvidia_conda_target_lib_paths:
prefix_target_lib_path = os.path.join(prefix, nvidia_conda_target_lib_path)
if os.path.isdir(prefix_target_lib_path):
soname = os.path.join(prefix_target_lib_path, soname)
try:
handle = _load_lib(libname, soname)
except OSError:
pass
else:
# TODO KEITH: Do we need this abs_path_for_dynamic_library call?
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rwgk was this only needed in the case we were relying on dlopen / windows equivalent to find the library for us?

# We're already resolving the absolute path based on the conda environment variables
abs_path = abs_path_for_dynamic_library(libname, handle)
if abs_path is None:
raise RuntimeError(f"No expected symbol for {libname=!r}")
return LoadedDL(abs_path, False, handle._handle)
Comment on lines +216 to +226
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should probably be refactored into a helper function. I copy pasted it from load_with_system_search

# Only run if not found in the target lib paths
prefix_normal_lib_path = os.path.join(prefix, normal_conda_lib_path)
if os.path.isdir(prefix_normal_lib_path):
soname = os.path.join(prefix_normal_lib_path, soname)
try:
handle = _load_lib(libname, soname)
except OSError:
pass
else:
# TODO KEITH: Do we need this abs_path_for_dynamic_library call?
# We're already resolving the absolute path based on the conda environment variables
abs_path = abs_path_for_dynamic_library(libname, handle)
if abs_path is None:
raise RuntimeError(f"No expected symbol for {libname=!r}")
return LoadedDL(abs_path, False, handle._handle)
if build_prefix := os.getenv("BUILD_PREFIX"):
for nvidia_conda_target_lib_path in nvidia_conda_target_lib_paths:
build_prefix_target_lib_path = os.path.join(build_prefix, nvidia_conda_target_lib_path)
if os.path.isdir(build_prefix_target_lib_path):
soname = os.path.join(build_prefix_target_lib_path, soname)
try:
handle = _load_lib(libname, soname)
except OSError:
pass
else:
# TODO KEITH: Do we need this abs_path_for_dynamic_library call?
# We're already resolving the absolute path based on the conda environment variables
abs_path = abs_path_for_dynamic_library(libname, handle)
if abs_path is None:
raise RuntimeError(f"No expected symbol for {libname=!r}")
return LoadedDL(abs_path, False, handle._handle)
# Only run if not found in the target lib paths
build_prefix_normal_lib_path = os.path.join(build_prefix, normal_conda_lib_path)
if os.path.isdir(build_prefix_normal_lib_path):
soname = os.path.join(build_prefix_normal_lib_path, soname)
try:
handle = _load_lib(libname, soname)
except OSError:
pass
else:
# TODO KEITH: Do we need this abs_path_for_dynamic_library call?
# We're already resolving the absolute path based on the conda environment variables
abs_path = abs_path_for_dynamic_library(libname, handle)
if abs_path is None:
raise RuntimeError(f"No expected symbol for {libname=!r}")
return LoadedDL(abs_path, False, handle._handle)
elif in_conda_env:
if conda_prefix := os.getenv("CONDA_PREFIX"):
for nvidia_conda_target_lib_path in nvidia_conda_target_lib_paths:
conda_prefix_target_lib_path = os.path.join(conda_prefix, nvidia_conda_target_lib_path)
if os.path.isdir(conda_prefix_target_lib_path):
soname = os.path.join(conda_prefix_target_lib_path, soname)
try:
handle = _load_lib(libname, soname)
except OSError:
pass
else:
# TODO KEITH: Do we need this abs_path_for_dynamic_library call?
# We're already resolving the absolute path based on the conda environment variables
abs_path = abs_path_for_dynamic_library(libname, handle)
if abs_path is None:
raise RuntimeError(f"No expected symbol for {libname=!r}")
return LoadedDL(abs_path, False, handle._handle)
# Only run if not found in the target lib paths
conda_prefix_normal_lib_path = os.path.join(conda_prefix, normal_conda_lib_path)
if os.path.isdir(conda_prefix_normal_lib_path):
soname = os.path.join(conda_prefix_normal_lib_path, soname)
try:
handle = _load_lib(libname, soname)
except OSError:
pass
else:
# TODO KEITH: Do we need this abs_path_for_dynamic_library call?
# We're already resolving the absolute path based on the conda environment variables
abs_path = abs_path_for_dynamic_library(libname, handle)
return None


def _work_around_known_bugs(libname: str, found_path: str) -> None:
if libname == "nvrtc":
# Work around bug/oversight in
Expand Down
58 changes: 58 additions & 0 deletions cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_dl_windows.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,64 @@ def load_with_system_search(libname: str) -> Optional[LoadedDL]:
return None


def load_with_conda_search(libname: str) -> Optional[LoadedDL]:
"""Try to load a DLL using conda search paths.

Args:
libname: The name of the library to load

Returns:
A LoadedDL object if successful, None if the library cannot be loaded
"""
in_conda_build = False
in_conda_env = False
if os.getenv("CONDA_BUILD") == "1":
in_conda_build = True
elif os.getenv("CONDA_PREFIX"):
in_conda_env = True
else:
return None

normal_conda_lib_path = os.path.join("Library", "bin", "x64")
if libname == "nvvm":
normal_conda_lib_path = os.path.join("Library", "nvvm", "bin", "x64")

for dll_name in SUPPORTED_WINDOWS_DLLS.get(libname, ()):
if in_conda_build:
if prefix := os.getenv("PREFIX"):
prefix_normal_lib_path = os.path.join(prefix, normal_conda_lib_path)
if os.path.isdir(prefix_normal_lib_path):
dll_name = os.path.join(prefix_normal_lib_path, dll_name)
handle = kernel32.LoadLibraryExW(dll_name, None, 0)
if handle:
# TODO KEITH: Do we need this abs_path_for_dynamic_library call?
# We're already resolving the absolute path based on the conda environment variables
abs_path = abs_path_for_dynamic_library(libname, handle)
return LoadedDL(abs_path, False, ctypes_handle_to_unsigned_int(handle))
Comment on lines +163 to +168
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 we should probably refactor this into a helper function

if build_prefix := os.getenv("BUILD_PREFIX"):
build_prefix_normal_lib_path = os.path.join(build_prefix, normal_conda_lib_path)
if os.path.isdir(build_prefix_normal_lib_path):
dll_name = os.path.join(build_prefix_normal_lib_path, dll_name)
handle = kernel32.LoadLibraryExW(dll_name, None, 0)
if handle:
# TODO KEITH: Do we need this abs_path_for_dynamic_library call?
# We're already resolving the absolute path based on the conda environment variables
abs_path = abs_path_for_dynamic_library(libname, handle)
return LoadedDL(abs_path, False, ctypes_handle_to_unsigned_int(handle))
elif in_conda_env:
if conda_prefix := os.getenv("CONDA_PREFIX"):
conda_prefix_normal_lib_path = os.path.join(conda_prefix, normal_conda_lib_path)
if os.path.isdir(conda_prefix_normal_lib_path):
dll_name = os.path.join(conda_prefix_normal_lib_path, dll_name)
handle = kernel32.LoadLibraryExW(dll_name, None, 0)
if handle:
# TODO KEITH: Do we need this abs_path_for_dynamic_library call?
# We're already resolving the absolute path based on the conda environment variables
abs_path = abs_path_for_dynamic_library(libname, handle)
return LoadedDL(abs_path, False, ctypes_handle_to_unsigned_int(handle))
return None


def load_with_abs_path(libname: str, found_path: str) -> LoadedDL:
"""Load a dynamic library from the given path.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,14 @@
from cuda.pathfinder._dynamic_libs.load_dl_windows import (
check_if_already_loaded_from_elsewhere,
load_with_abs_path,
load_with_conda_search,
load_with_system_search,
)
else:
from cuda.pathfinder._dynamic_libs.load_dl_linux import (
check_if_already_loaded_from_elsewhere,
load_with_abs_path,
load_with_conda_search,
load_with_system_search,
)

Expand All @@ -41,6 +43,9 @@ def _load_lib_no_cache(libname: str) -> LoadedDL:
return loaded

if not have_abs_path:
loaded = load_with_conda_search(libname)
if loaded is not None:
return loaded
loaded = load_with_system_search(libname)
if loaded is not None:
return loaded
Expand Down Expand Up @@ -77,7 +82,12 @@ def load_nvidia_dynamic_lib(libname: str) -> LoadedDL:
- Scan installed distributions (``site-packages``) to find libraries
shipped in NVIDIA wheels.

2. **OS default mechanisms / Conda environments**
2. **Conda environment**

- Detects if in a conda environment and searches for libraries in the expected locations within the conda
environment.

3. **OS default mechanisms**

- Fall back to the native loader:

Expand Down
5 changes: 5 additions & 0 deletions cuda_pathfinder/docs/source/release/1.X.Y-notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,8 @@ Highlights
- Improves stability in general and supports nvmath specifically
- Proactive change to improve library loading consistency
- Drops boilerplate docstrings for private functions

* Add conda search path prioritization for ``load_nvidia_dynamic_lib()`` (`PR #856 <https://github.com/NVIDIA/cuda-python/pull/856>`_)
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
* Add conda search path prioritization for ``load_nvidia_dynamic_lib()`` (`PR #856 <https://github.com/NVIDIA/cuda-python/pull/856>`_)
* Add conda search path prioritization for ``load_nvidia_dynamic_lib()`` (`PR #919 <https://github.com/NVIDIA/cuda-python/pull/919>`_)


- Enables loading libraries from conda environments before system paths
- Handles version mismatches between the conda environment and the system CTK
Loading