Skip to content
Merged
11 changes: 9 additions & 2 deletions build_tools/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import sys
import platform
from pathlib import Path
from importlib.metadata import version as get_version
from importlib.metadata import PackageNotFoundError, distribution, version as get_version
from subprocess import CalledProcessError
from typing import List, Optional, Tuple, Union

Expand Down Expand Up @@ -292,10 +292,17 @@ def cuda_version() -> Tuple[int, ...]:
version_str = get_version("nvidia-cuda-runtime-cu12")
version_tuple = tuple(int(part) for part in version_str.split(".") if part.isdigit())
return version_tuple
except importlib.metadata.PackageNotFoundError:
except PackageNotFoundError:
raise RuntimeError("Could neither find NVCC executable nor CUDA runtime Python package.")


def cusolvermp_pypi_package_name(cuda_major: Optional[int] = None) -> str:
"""PyPI package providing cuSolverMp runtime libraries for a CUDA major version."""
if cuda_major is None:
cuda_major = cuda_version()[0]
return f"nvidia-cusolvermp-cu{cuda_major}"


def get_frameworks() -> List[str]:
"""DL frameworks to build support for"""
_frameworks: List[str] = []
Expand Down
13 changes: 12 additions & 1 deletion build_tools/wheel_utils/Dockerfile.aarch
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,23 @@ RUN dnf clean all
RUN dnf -y install glog.aarch64 glog-devel.aarch64
RUN dnf -y install libnccl libnccl-devel libnccl-static

# expose system libs for TE CMake build.
RUN dnf -y install \
libcusolvermp0-cuda-${CUDA_MAJOR} libcusolvermp0-devel-cuda-${CUDA_MAJOR} && \
dnf clean all
RUN mkdir -p /opt/nvidia/cusolvermp && \
ln -s /usr/include/libcusolvermp/${CUDA_MAJOR} /opt/nvidia/cusolvermp/include && \
ln -s /usr/lib64/libcusolvermp/${CUDA_MAJOR} /opt/nvidia/cusolvermp/lib && \
echo "/usr/lib64/libcusolvermp/${CUDA_MAJOR}" > /etc/ld.so.conf.d/999_nvidia_cusolvermp.conf && \
ldconfig

ENV PATH="/usr/local/cuda/bin:${PATH}"
ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}"
ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:/opt/nvidia/cusolvermp/lib:${LD_LIBRARY_PATH}"
ENV CUDA_HOME=/usr/local/cuda
ENV CUDA_ROOT=/usr/local/cuda
ENV CUDA_PATH=/usr/local/cuda
ENV CUDADIR=/usr/local/cuda
ENV CUSOLVERMP_HOME=/opt/nvidia/cusolvermp
ENV NVTE_RELEASE_BUILD=1

CMD ["/bin/bash", "-c", "bash /TransformerEngine/build_tools/wheel_utils/build_wheels.sh manylinux_2_28_aarch64 $BUILD_METAPACKAGE $BUILD_COMMON $BUILD_PYTORCH $BUILD_JAX $CUDA_MAJOR"]
13 changes: 12 additions & 1 deletion build_tools/wheel_utils/Dockerfile.x86
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,23 @@ RUN dnf clean all
RUN dnf -y install glog.x86_64 glog-devel.x86_64
RUN dnf -y install libnccl libnccl-devel libnccl-static

# expose system libs for TE CMake build.
RUN dnf -y install \
libcusolvermp0-cuda-${CUDA_MAJOR} libcusolvermp0-devel-cuda-${CUDA_MAJOR} && \
dnf clean all
RUN mkdir -p /opt/nvidia/cusolvermp && \
ln -s /usr/include/libcusolvermp/${CUDA_MAJOR} /opt/nvidia/cusolvermp/include && \
ln -s /usr/lib64/libcusolvermp/${CUDA_MAJOR} /opt/nvidia/cusolvermp/lib && \
echo "/usr/lib64/libcusolvermp/${CUDA_MAJOR}" > /etc/ld.so.conf.d/999_nvidia_cusolvermp.conf && \
ldconfig

ENV PATH="/usr/local/cuda/bin:${PATH}"
ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:${LD_LIBRARY_PATH}"
ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64:/opt/nvidia/cusolvermp/lib:${LD_LIBRARY_PATH}"
ENV CUDA_HOME=/usr/local/cuda
ENV CUDA_ROOT=/usr/local/cuda
ENV CUDA_PATH=/usr/local/cuda
ENV CUDADIR=/usr/local/cuda
ENV CUSOLVERMP_HOME=/opt/nvidia/cusolvermp
ENV NVTE_RELEASE_BUILD=1

CMD ["/bin/bash", "-c", "bash /TransformerEngine/build_tools/wheel_utils/build_wheels.sh manylinux_2_28_x86_64 $BUILD_METAPACKAGE $BUILD_COMMON $BUILD_PYTORCH $BUILD_JAX $CUDA_MAJOR"]
4 changes: 4 additions & 0 deletions build_tools/wheel_utils/build_wheels.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ git submodule update --init --recursive
# Install deps
/opt/python/cp310-cp310/bin/pip install cmake pybind11[global] ninja setuptools wheel

# Enable optional build features. cuSolverMp is provided by the build image
# (see Dockerfile.x86 / Dockerfile.aarch), which also sets CUSOLVERMP_HOME.
export NVTE_WITH_CUSOLVERMP=1
Comment thread
ksivaman marked this conversation as resolved.

if $BUILD_METAPACKAGE ; then
cd /TransformerEngine
NVTE_BUILD_METAPACKAGE=1 /opt/python/cp310-cp310/bin/python setup.py bdist_wheel 2>&1 | tee /wheelhouse/logs/metapackage.txt
Expand Down
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from build_tools.utils import (
cuda_archs,
cuda_version,
cusolvermp_pypi_package_name,
get_frameworks,
remove_dups,
min_python_version_str,
Expand Down Expand Up @@ -109,6 +110,7 @@ def setup_requirements() -> Tuple[List[str], List[str]]:
"pydantic",
"importlib-metadata>=1.0",
"packaging",
cusolvermp_pypi_package_name(),
Comment thread
cyanguwa marked this conversation as resolved.
]
test_reqs: List[str] = ["pytest>=8.2.1"]

Expand Down
28 changes: 28 additions & 0 deletions transformer_engine/common/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,29 @@ def _nvidia_cudart_include_dir() -> str:
return str(include_dir) if include_dir.exists() else ""


@functools.lru_cache(maxsize=None)
def _is_cusolvermp_installed_in_system() -> bool:
"""Check if cuSolverMp is registered in the system library cache."""

if platform.system() != "Linux":
return False

try:
result = subprocess.run(
["ldconfig", "-p"],
capture_output=True,
text=True,
check=False,
)
except (OSError, subprocess.SubprocessError):
return False

if result.returncode != 0:
return False

return any("cusolvermp" in line.lower() for line in result.stdout.splitlines())


@functools.lru_cache(maxsize=None)
def _load_cuda_library_from_python(lib_name: str, strict: bool = False):
"""
Expand Down Expand Up @@ -369,6 +392,11 @@ def _load_core_library():
_, _CUDNN_LIB_CTYPES = _load_cuda_library("cudnn")
system_nvrtc, _NVRTC_LIB_CTYPES = _load_cuda_library("nvrtc")
system_curand, _CURAND_LIB_CTYPES = _load_cuda_library("curand")
_CUSOLVERMP_LIB_CTYPES = None
if not _is_cusolvermp_installed_in_system() and any(
_is_package_installed(p) for p in ("nvidia-cusolvermp-cu12", "nvidia-cusolvermp-cu13")
):
_, _CUSOLVERMP_LIB_CTYPES = _load_cuda_library_from_python("cusolverMp", strict=False)

# This additional step is necessary to be able to install TE wheels
# and import TE (without any guards) in an environment where the cuda
Expand Down
Loading