-
Notifications
You must be signed in to change notification settings - Fork 278
Add conda-specific search into load_nvidia_dynamic_lib()
#1003
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 9 commits
5174ea2
8da27a7
37cb325
3e5aa1d
5b3c5eb
233ad08
2642d35
edff6ec
5ba032c
cc7d84d
894c6b5
bd5e2ae
ce2ca30
9e0b4a8
9cd31bb
5fd4801
7498392
2841734
c83fa0d
22999d1
3e7ca0e
917803e
f1d39d9
37414a5
1bde30e
5056e46
2c336c9
735ca85
3d6ea31
ab2a349
44ed3e6
45fb2cd
d19d3ec
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -80,10 +80,7 @@ def _find_dll_using_nvidia_bin_dirs( | |
| return None | ||
|
|
||
|
|
||
| def _find_lib_dir_using_cuda_home(libname: str) -> Optional[str]: | ||
| cuda_home = get_cuda_home_or_path() | ||
| if cuda_home is None: | ||
| return None | ||
| def _find_lib_dir_using_anchor_point(libname: str, anchor_point: str, linux_lib_dir: str) -> Optional[str]: | ||
| subdirs_list: tuple[tuple[str, ...], ...] | ||
| if IS_WINDOWS: | ||
| if libname == "nvvm": # noqa: SIM108 | ||
|
|
@@ -100,17 +97,30 @@ def _find_lib_dir_using_cuda_home(libname: str) -> Optional[str]: | |
| if libname == "nvvm": # noqa: SIM108 | ||
| subdirs_list = (("nvvm", "lib64"),) | ||
| else: | ||
| subdirs_list = ( | ||
| ("lib64",), # CTK | ||
| ("lib",), # Conda | ||
| ) | ||
| subdirs_list = ((linux_lib_dir,),) | ||
| for sub_dirs in subdirs_list: | ||
| dirname: str # work around bug in mypy | ||
| for dirname in find_sub_dirs((cuda_home,), sub_dirs): | ||
| for dirname in find_sub_dirs((anchor_point,), sub_dirs): | ||
| return dirname | ||
| return None | ||
|
|
||
|
|
||
| def _find_lib_dir_using_cuda_home(libname: str) -> Optional[str]: | ||
| cuda_home = get_cuda_home_or_path() | ||
| if cuda_home is None: | ||
| return None | ||
| return _find_lib_dir_using_anchor_point(libname, anchor_point=cuda_home, linux_lib_dir="lib64") | ||
|
|
||
|
|
||
| def _find_lib_dir_using_conda_prefix(libname: str) -> Optional[str]: | ||
| conda_prefix = os.getenv("CONDA_PREFIX") | ||
| if not conda_prefix: | ||
| return None | ||
| return _find_lib_dir_using_anchor_point( | ||
| libname, anchor_point=os.path.join(conda_prefix, "Library") if IS_WINDOWS else conda_prefix, linux_lib_dir="lib" | ||
| ) | ||
|
kkraus14 marked this conversation as resolved.
|
||
|
|
||
|
|
||
| def _find_so_using_lib_dir( | ||
| lib_dir: str, so_basename: str, error_messages: list[str], attachments: list[str] | ||
| ) -> Optional[str]: | ||
|
|
@@ -146,44 +156,56 @@ def __init__(self, libname: str): | |
| self.libname = libname | ||
| self.error_messages: list[str] = [] | ||
| self.attachments: list[str] = [] | ||
| self.abs_path = None | ||
| self.abs_path: Optional[str] = None | ||
|
|
||
| self._try_site_packages() | ||
| self._try_with_conda_prefix() | ||
|
leofang marked this conversation as resolved.
Outdated
|
||
|
|
||
| def _try_site_packages(self) -> None: | ||
| if IS_WINDOWS: | ||
| self.lib_searched_for = f"{libname}*.dll" | ||
| self.lib_searched_for = f"{self.libname}*.dll" | ||
| if self.abs_path is None: | ||
|
rwgk marked this conversation as resolved.
Outdated
|
||
| self.abs_path = _find_dll_using_nvidia_bin_dirs( | ||
| libname, | ||
| self.libname, | ||
| self.lib_searched_for, | ||
| self.error_messages, | ||
| self.attachments, | ||
| ) | ||
| else: | ||
| self.lib_searched_for = f"lib{libname}.so" | ||
| self.lib_searched_for = f"lib{self.libname}.so" | ||
| if self.abs_path is None: | ||
|
rwgk marked this conversation as resolved.
Outdated
|
||
| self.abs_path = _find_so_using_nvidia_lib_dirs( | ||
| libname, | ||
| self.libname, | ||
| self.lib_searched_for, | ||
| self.error_messages, | ||
| self.attachments, | ||
| ) | ||
|
|
||
| def retry_with_cuda_home_priority_last(self) -> None: | ||
| def _try_with_conda_prefix(self) -> None: | ||
| conda_lib_dir = _find_lib_dir_using_conda_prefix(self.libname) | ||
| if conda_lib_dir is not None: | ||
| self._find_using_lib_dir(conda_lib_dir) | ||
|
|
||
| def try_with_cuda_home(self) -> None: | ||
| cuda_home_lib_dir = _find_lib_dir_using_cuda_home(self.libname) | ||
| if cuda_home_lib_dir is not None: | ||
| if IS_WINDOWS: | ||
| self.abs_path = _find_dll_using_lib_dir( | ||
| cuda_home_lib_dir, | ||
| self.libname, | ||
| self.error_messages, | ||
| self.attachments, | ||
| ) | ||
| else: | ||
| self.abs_path = _find_so_using_lib_dir( | ||
| cuda_home_lib_dir, | ||
| self.lib_searched_for, | ||
| self.error_messages, | ||
| self.attachments, | ||
| ) | ||
| self._find_using_lib_dir(cuda_home_lib_dir) | ||
|
|
||
| def _find_using_lib_dir(self, lib_dir: str) -> None: | ||
| if IS_WINDOWS: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know this is pre-existing, but it seems like this is begging for some sort of interface that you can isolate into modules and then import at the top level based on platform, similar to how to the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tried this twice before but backed off both times after seeing where it's going, and determining it's a net loss in terms of code organization and readability. In contrast, it was clearly a net win for the rest of the code underneath |
||
| self.abs_path = _find_dll_using_lib_dir( | ||
| lib_dir, | ||
| self.libname, | ||
| self.error_messages, | ||
| self.attachments, | ||
| ) | ||
| else: | ||
| self.abs_path = _find_so_using_lib_dir( | ||
| lib_dir, | ||
| self.lib_searched_for, | ||
| self.error_messages, | ||
| self.attachments, | ||
| ) | ||
|
|
||
| def raise_if_abs_path_is_None(self) -> str: # noqa: N802 | ||
| if self.abs_path: | ||
|
|
||
|
leofang marked this conversation as resolved.
|
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -44,7 +44,7 @@ def _load_lib_no_cache(libname: str) -> LoadedDL: | |||||
| loaded = load_with_system_search(libname) | ||||||
| if loaded is not None: | ||||||
| return loaded | ||||||
| found.retry_with_cuda_home_priority_last() | ||||||
| found.try_with_cuda_home() | ||||||
| found.raise_if_abs_path_is_None() | ||||||
|
|
||||||
| assert found.abs_path is not None # for mypy | ||||||
|
|
@@ -77,22 +77,20 @@ def load_nvidia_dynamic_lib(libname: str) -> LoadedDL: | |||||
| - Scan installed distributions (``site-packages``) to find libraries | ||||||
| shipped in NVIDIA wheels. | ||||||
|
|
||||||
| 2. **OS default mechanisms / Conda environments** | ||||||
| 2. **Conda environment** | ||||||
|
|
||||||
| - Conda installations are discovered via ``CONDA_PREFIX``, which is | ||||||
| predefined in activated conda environments (see | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
I don't think the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Changed to "defined automatically": commit bd5e2ae This is for people not-so-familiar with Conda. That one extra word is to reassure them that they don't have to think about defining the variable themselves. |
||||||
| https://docs.conda.io/projects/conda-build/en/stable/user-guide/environment-variables.html). | ||||||
|
|
||||||
| 3. **OS default mechanisms** | ||||||
|
|
||||||
| - Fall back to the native loader: | ||||||
|
|
||||||
| - Linux: ``dlopen()`` | ||||||
|
|
||||||
| - Windows: ``LoadLibraryW()`` | ||||||
|
|
||||||
| - Conda installations are commonly discovered via: | ||||||
|
|
||||||
| - Linux: ``$ORIGIN/../lib`` in the ``RPATH`` of the ``python`` binary | ||||||
| (note: this can take precedence over ``LD_LIBRARY_PATH`` and | ||||||
| ``/etc/ld.so.conf.d/``). | ||||||
|
|
||||||
| - Windows: ``%CONDA_PREFIX%\\Library\\bin`` on the system ``PATH``. | ||||||
|
|
||||||
| - CUDA Toolkit (CTK) system installs with system config updates are often | ||||||
| discovered via: | ||||||
|
|
||||||
|
|
@@ -101,7 +99,7 @@ def load_nvidia_dynamic_lib(libname: str) -> LoadedDL: | |||||
| - Windows: ``C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\vX.Y\\bin`` | ||||||
| on the system ``PATH``. | ||||||
|
|
||||||
| 3. **Environment variables** | ||||||
| 4. **Environment variables** | ||||||
|
|
||||||
| - If set, use ``CUDA_HOME`` or ``CUDA_PATH`` (in that order). | ||||||
|
|
||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,4 @@ | ||
| # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
|
|
||
| __version__ = "1.2.3" | ||
| __version__ = "1.2.4a0" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,7 +2,10 @@ | |
| # SPDX-License-Identifier: Apache-2.0 | ||
|
|
||
| import functools | ||
| import json | ||
| import os | ||
| import shlex | ||
| from subprocess import list2cmdline # nosec B404 | ||
| from unittest.mock import patch | ||
|
|
||
| import pytest | ||
|
|
@@ -90,6 +93,12 @@ def _get_libnames_for_test_load_nvidia_dynamic_lib(): | |
| return tuple(result) | ||
|
|
||
|
|
||
| def _quote_for_shell_copypaste(s): | ||
|
leofang marked this conversation as resolved.
Outdated
|
||
| if os.name == "nt": | ||
| return list2cmdline([s]) | ||
| return shlex.quote(s) | ||
|
leofang marked this conversation as resolved.
Outdated
|
||
|
|
||
|
|
||
| @pytest.mark.parametrize("libname", _get_libnames_for_test_load_nvidia_dynamic_lib()) | ||
| def test_load_nvidia_dynamic_lib(info_summary_append, libname): | ||
| # We intentionally run each dynamic library operation in a child process | ||
|
|
@@ -98,9 +107,18 @@ def test_load_nvidia_dynamic_lib(info_summary_append, libname): | |
| # interfere across test cases and lead to nondeterministic or platform-specific failures. | ||
| timeout = 120 if supported_nvidia_libs.IS_WINDOWS else 30 | ||
| result = spawned_process_runner.run_in_spawned_child_process(child_process_func, args=(libname,), timeout=timeout) | ||
| if result.returncode == 0: | ||
| info_summary_append(f"abs_path={result.stdout.rstrip()}") | ||
| elif STRICTNESS == "see_what_works" or "DynamicLibNotFoundError: Failure finding " in result.stderr: | ||
|
|
||
| def raise_child_process_failed(): | ||
| raise RuntimeError(build_child_process_failed_for_libname_message(libname, result)) | ||
|
|
||
| if result.returncode != 0: | ||
| raise_child_process_failed() | ||
| assert not result.stderr | ||
| if result.stdout.startswith("CHILD_LOAD_NVIDIA_DYNAMIC_LIB_HELPER_DYNAMIC_LIB_NOT_FOUND_ERROR:"): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there some reason not to use stderr here, since that is its intended use (communicating errors)?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That was a deliberate choice I made while working on commit edff6ec. See the bug-fix commit right before (2642d35) for why I worked on it. The requirements on my mind:
So I introduced a tag that is practically certain to be unique: I added |
||
| if STRICTNESS == "all_must_work": | ||
| raise_child_process_failed() | ||
| info_summary_append(f"Not found: {libname=!r}") | ||
| else: | ||
| raise RuntimeError(build_child_process_failed_for_libname_message(libname, result)) | ||
| abs_path = json.loads(result.stdout.rstrip()) | ||
|
leofang marked this conversation as resolved.
|
||
| info_summary_append(f"abs_path={_quote_for_shell_copypaste(abs_path)}") | ||
| assert os.path.isfile(abs_path) # double-check the abs_path | ||
Uh oh!
There was an error while loading. Please reload this page.