@@ -14,6 +14,130 @@ if (RSC_BUILD_EXTENSIONS)
1414 find_package (Python REQUIRED COMPONENTS Interpreter Development.Module ${SKBUILD_SABI_COMPONENT} )
1515 find_package (nanobind CONFIG REQUIRED )
1616 find_package (CUDAToolkit REQUIRED )
17+ set (RSC_RMM_HINTS)
18+ set (RSC_RAPIDS_CMAKE_PREFIXES)
19+ set (RSC_CCCL_HINTS)
20+ set (RSC_RAPIDS_LOGGER_HINTS)
21+ set (RSC_NVTX3_HINTS)
22+ macro (_rsc_collect_rapids_python_prefix _rsc_prefix )
23+ if (NOT "${_rsc_prefix} " STREQUAL "" )
24+ file (GLOB _rsc_rmm_dirs "${_rsc_prefix} /lib/python*/site-packages/librmm/lib64/cmake/rmm" )
25+ file (GLOB _rsc_rapids_prefixes
26+ "${_rsc_prefix} /lib/python*/site-packages/librmm/lib64"
27+ "${_rsc_prefix} /lib/python*/site-packages/librmm/lib64/rapids"
28+ "${_rsc_prefix} /lib/python*/site-packages/rapids_logger/lib64"
29+ "${_rsc_prefix} /lib/python*/site-packages/nvidia/cu*/lib"
30+ )
31+ file (GLOB _rsc_cccl_dirs
32+ "${_rsc_prefix} /lib/python*/site-packages/librmm/lib64/rapids/cmake/cccl"
33+ "${_rsc_prefix} /lib/python*/site-packages/nvidia/cu*/lib/cmake/cccl"
34+ )
35+ file (GLOB _rsc_rapids_logger_dirs "${_rsc_prefix} /lib/python*/site-packages/rapids_logger/lib64/cmake/rapids_logger" )
36+ file (GLOB _rsc_nvtx3_dirs "${_rsc_prefix} /lib/python*/site-packages/librmm/lib64/cmake/nvtx3" )
37+ list (APPEND RSC_RMM_HINTS ${_rsc_rmm_dirs} )
38+ list (APPEND RSC_RAPIDS_CMAKE_PREFIXES ${_rsc_rapids_prefixes} )
39+ list (APPEND RSC_CCCL_HINTS ${_rsc_cccl_dirs} )
40+ list (APPEND RSC_RAPIDS_LOGGER_HINTS ${_rsc_rapids_logger_dirs} )
41+ list (APPEND RSC_NVTX3_HINTS ${_rsc_nvtx3_dirs} )
42+ endif ()
43+ endmacro ()
44+ execute_process (
45+ COMMAND "${Python_EXECUTABLE} " -c "import importlib.util, pathlib; spec = importlib.util.find_spec('librmm'); print(pathlib.Path(spec.origin).parent / 'lib64' / 'cmake' / 'rmm' if spec else '')"
46+ OUTPUT_VARIABLE RSC_PYTHON_RMM_DIR
47+ OUTPUT_STRIP_TRAILING_WHITESPACE
48+ ERROR_QUIET
49+ )
50+ if (RSC_PYTHON_RMM_DIR AND EXISTS "${RSC_PYTHON_RMM_DIR} /rmm-config.cmake" )
51+ set (_rsc_python_rmm_hint "${RSC_PYTHON_RMM_DIR} " )
52+ else ()
53+ set (_rsc_python_rmm_hint "" )
54+ endif ()
55+ # Wheel builds write build/.librmm_dir from CIBW_BEFORE_BUILD.
56+ # publish.yml symlinks runtime libs so auditwheel excludes them.
57+ if (DEFINED ENV{RSC_LIBRMM_DIR} AND EXISTS "$ENV{RSC_LIBRMM_DIR} /lib64/cmake/rmm/rmm-config.cmake" )
58+ set (_rsc_librmm_marker "$ENV{RSC_LIBRMM_DIR} " )
59+ elseif (EXISTS "${CMAKE_SOURCE_DIR } /build/.librmm_dir" )
60+ file (READ "${CMAKE_SOURCE_DIR } /build/.librmm_dir" _rsc_librmm_marker )
61+ string (STRIP "${_rsc_librmm_marker} " _rsc_librmm_marker)
62+ else ()
63+ set (_rsc_librmm_marker "" )
64+ endif ()
65+ if (NOT "${_rsc_librmm_marker} " STREQUAL "" AND EXISTS "${_rsc_librmm_marker} /lib64/cmake/rmm/rmm-config.cmake" )
66+ file (GLOB _rsc_marker_rmm_dirs "${_rsc_librmm_marker} /lib64/cmake/rmm" )
67+ file (GLOB _rsc_marker_rapids_prefixes
68+ "${_rsc_librmm_marker} /lib64"
69+ "${_rsc_librmm_marker} /lib64/rapids"
70+ "${_rsc_librmm_marker} /../rapids_logger/lib64"
71+ )
72+ file (GLOB _rsc_marker_cccl_dirs
73+ "${_rsc_librmm_marker} /lib64/rapids/cmake/cccl"
74+ )
75+ file (GLOB _rsc_marker_rapids_logger_dirs "${_rsc_librmm_marker} /../rapids_logger/lib64/cmake/rapids_logger" )
76+ file (GLOB _rsc_marker_nvtx3_dirs "${_rsc_librmm_marker} /lib64/cmake/nvtx3" )
77+ list (APPEND RSC_RMM_HINTS ${_rsc_marker_rmm_dirs} )
78+ list (APPEND RSC_RAPIDS_CMAKE_PREFIXES ${_rsc_marker_rapids_prefixes} )
79+ list (APPEND RSC_CCCL_HINTS ${_rsc_marker_cccl_dirs} )
80+ list (APPEND RSC_RAPIDS_LOGGER_HINTS ${_rsc_marker_rapids_logger_dirs} )
81+ list (APPEND RSC_NVTX3_HINTS ${_rsc_marker_nvtx3_dirs} )
82+ endif ()
83+ foreach (_rsc_python_prefix IN ITEMS "${Python_ROOT_DIR} " "${Python3_ROOT_DIR} " )
84+ _rsc_collect_rapids_python_prefix ("${_rsc_python_prefix} " )
85+ endforeach ()
86+ foreach (_rsc_env_prefix IN ITEMS "$ENV{CONDA_PREFIX} " "$ENV{VIRTUAL_ENV} " )
87+ _rsc_collect_rapids_python_prefix ("${_rsc_env_prefix} " )
88+ endforeach ()
89+ string (REPLACE ":" ";" _rsc_path_entries "$ENV{PATH} " )
90+ foreach (_rsc_path_entry IN LISTS _rsc_path_entries)
91+ get_filename_component (_rsc_path_prefix "${_rsc_path_entry} /.." ABSOLUTE )
92+ _rsc_collect_rapids_python_prefix ("${_rsc_path_prefix} " )
93+ endforeach ()
94+ if (NOT RSC_RMM_HINTS
95+ AND NOT "${_rsc_python_rmm_hint} " STREQUAL "" )
96+ list (APPEND RSC_RMM_HINTS "${_rsc_python_rmm_hint} " )
97+ endif ()
98+ if (RSC_RAPIDS_CMAKE_PREFIXES)
99+ list (APPEND CMAKE_PREFIX_PATH ${RSC_RAPIDS_CMAKE_PREFIXES} )
100+ if (RSC_CCCL_HINTS)
101+ list (GET RSC_CCCL_HINTS 0 _rsc_cccl_dir)
102+ set (CCCL_DIR "${_rsc_cccl_dir} " CACHE PATH "Path to CCCL package config" FORCE )
103+ endif ()
104+ if (RSC_RAPIDS_LOGGER_HINTS)
105+ list (GET RSC_RAPIDS_LOGGER_HINTS 0 _rsc_rapids_logger_dir)
106+ set (rapids_logger_DIR "${_rsc_rapids_logger_dir} " CACHE PATH "Path to rapids_logger package config" FORCE )
107+ endif ()
108+ if (RSC_NVTX3_HINTS)
109+ list (GET RSC_NVTX3_HINTS 0 _rsc_nvtx3_dir)
110+ set (nvtx3_DIR "${_rsc_nvtx3_dir} " CACHE PATH "Path to nvtx3 package config" FORCE )
111+ endif ()
112+ endif ()
113+ if (RSC_RMM_HINTS)
114+ list (GET RSC_RMM_HINTS 0 _rsc_rmm_dir)
115+ set (rmm_DIR "${_rsc_rmm_dir} " CACHE PATH "Path to rmm package config" FORCE )
116+ find_package (rmm CONFIG REQUIRED )
117+ else ()
118+ find_package (rmm CONFIG REQUIRED )
119+ endif ()
120+
121+ # CCCL 3.3.0 gates cudaDevAttrHostNumaMemoryPoolsSupported too loosely.
122+ # Fail fast for CUDA 12.6-12.8 source builds with that buggy CCCL.
123+ set (_rsc_cccl_buggy_numa_guard TRUE )
124+ if (DEFINED CCCL_VERSION AND CCCL_VERSION VERSION_GREATER 3.3.0)
125+ set (_rsc_cccl_buggy_numa_guard FALSE )
126+ endif ()
127+ if (NOT RSC_SKIP_CUDA_VERSION_CHECK
128+ AND _rsc_cccl_buggy_numa_guard
129+ AND CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.6
130+ AND CUDAToolkit_VERSION VERSION_LESS 12.9)
131+ message (FATAL_ERROR
132+ "Cannot build rapids_singlecell from source with CUDA ${CUDAToolkit_VERSION} against "
133+ "CCCL ${CCCL_VERSION} (RAPIDS 26.04): it references cudaDevAttrHostNumaMemoryPoolsSupported, "
134+ "which the CUDA 12.6-12.8 toolkit does not define (NVIDIA added it in 12.9). "
135+ "Use CUDA >= 12.9 (or <= 12.5), upgrade to RAPIDS >= 26.06 (CCCL > 3.3.0 fixes the guard), "
136+ "or install the prebuilt wheel (pip install rapids-singlecell-cu12). "
137+ "If your toolkit does define this enum, override with -DRSC_SKIP_CUDA_VERSION_CHECK=ON." )
138+ endif ()
139+
140+ message (STATUS "Using RMM for CUDA extension scratch allocations" )
17141 message (STATUS "Building for CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES} " )
18142else ()
19143 message (STATUS "RSC_BUILD_EXTENSIONS=OFF -> skipping compiled extensions for docs" )
@@ -62,6 +186,57 @@ function(add_nb_cuda_module target src)
62186 endif ()
63187endfunction ()
64188
189+ # RMM-backed nanobind CUDA module: normal module plus shared scratch allocator.
190+ # Wheels use sibling RAPIDS packages; editable imports still preload fallbacks.
191+ function (add_rmm_cuda_module target src )
192+ add_nb_cuda_module (${target} ${src} )
193+ if (RSC_BUILD_EXTENSIONS)
194+ target_sources (${target} PRIVATE
195+ src/rapids_singlecell/_cuda/rmm_scratch.cu )
196+ target_link_libraries (${target} PRIVATE rmm::rmm )
197+ set (_rsc_rmm_build_rpath)
198+ set (_rsc_rmm_have_build_librmm FALSE )
199+ set (_rsc_rmm_have_build_rapids_logger FALSE )
200+ if (DEFINED ENV{CONDA_PREFIX})
201+ set (_rsc_rmm_env_site
202+ "$ENV{CONDA_PREFIX} /lib/python${Python_VERSION_MAJOR} .${Python_VERSION_MINOR} /site-packages" )
203+ if (EXISTS "${_rsc_rmm_env_site} /librmm/lib64" )
204+ list (APPEND _rsc_rmm_build_rpath
205+ "${_rsc_rmm_env_site} /librmm/lib64" )
206+ set (_rsc_rmm_have_build_librmm TRUE )
207+ endif ()
208+ if (EXISTS "${_rsc_rmm_env_site} /rapids_logger/lib64" )
209+ list (APPEND _rsc_rmm_build_rpath
210+ "${_rsc_rmm_env_site} /rapids_logger/lib64" )
211+ set (_rsc_rmm_have_build_rapids_logger TRUE )
212+ endif ()
213+ endif ()
214+ if (NOT _rsc_rmm_have_build_librmm AND rmm_DIR)
215+ get_filename_component (_rsc_rmm_build_librmm_dir
216+ "${rmm_DIR} /../.." REALPATH )
217+ list (APPEND _rsc_rmm_build_rpath "${_rsc_rmm_build_librmm_dir} " )
218+ endif ()
219+ if (NOT _rsc_rmm_have_build_rapids_logger AND rapids_logger_DIR)
220+ get_filename_component (_rsc_rmm_build_rapids_logger_dir
221+ "${rapids_logger_DIR} /../.." REALPATH )
222+ list (APPEND _rsc_rmm_build_rpath
223+ "${_rsc_rmm_build_rapids_logger_dir} " )
224+ endif ()
225+ set (_rsc_rmm_install_rpath
226+ "\$ ORIGIN/../../librmm/lib64"
227+ "\$ ORIGIN/../../rapids_logger/lib64"
228+ )
229+ if (CUDAToolkit_LIBRARY_DIR)
230+ list (APPEND _rsc_rmm_build_rpath "${CUDAToolkit_LIBRARY_DIR} " )
231+ list (APPEND _rsc_rmm_install_rpath "${CUDAToolkit_LIBRARY_DIR} " )
232+ endif ()
233+ set_target_properties (${target} PROPERTIES
234+ BUILD_RPATH "${_rsc_rmm_build_rpath} "
235+ INSTALL_RPATH "${_rsc_rmm_install_rpath} "
236+ )
237+ endif ()
238+ endfunction ()
239+
65240if (RSC_BUILD_EXTENSIONS)
66241 # CUDA modules
67242 add_nb_cuda_module (_mean_var_cuda src/rapids_singlecell/_cuda/mean_var/mean_var.cu )
@@ -91,7 +266,9 @@ if (RSC_BUILD_EXTENSIONS)
91266 add_nb_cuda_module (_pseudobulk_cuda src/rapids_singlecell/_cuda/pseudobulk/pseudobulk.cu )
92267 add_nb_cuda_module (_hvg_cuda src/rapids_singlecell/_cuda/hvg/hvg.cu )
93268 add_nb_cuda_module (_kde_cuda src/rapids_singlecell/_cuda/kde/kde.cu )
94- add_nb_cuda_module (_wilcoxon_cuda src/rapids_singlecell/_cuda/wilcoxon/wilcoxon.cu )
269+ add_rmm_cuda_module (_wilcoxon_cuda src/rapids_singlecell/_cuda/wilcoxon/wilcoxon.cu )
270+ add_rmm_cuda_module (_wilcoxon_sparse_cuda src/rapids_singlecell/_cuda/wilcoxon/wilcoxon_sparse.cu )
271+ add_nb_cuda_module (_rank_stats_cuda src/rapids_singlecell/_cuda/rank_genes/rank_stats.cu )
95272 # Harmony CUDA modules
96273 add_nb_cuda_module (_harmony_scatter_cuda src/rapids_singlecell/_cuda/harmony/scatter/scatter.cu )
97274 add_nb_cuda_module (_harmony_outer_cuda src/rapids_singlecell/_cuda/harmony/outer/outer.cu )
0 commit comments