From 8a5ec10b1fb7f9e7850036136d5d88c13c93b508 Mon Sep 17 00:00:00 2001 From: tarang-jain Date: Wed, 20 May 2026 08:22:02 -0700 Subject: [PATCH 1/4] replace _ref --- c/src/core/c_api.cpp | 6 ++-- .../src/common/cuda_huge_page_resource.hpp | 2 +- cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h | 6 ++-- cpp/bench/ann/src/cuvs/cuvs_cagra_diskann.cu | 4 +-- cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu | 4 +-- cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h | 6 ++-- cpp/bench/ann/src/cuvs/cuvs_vamana.cu | 4 +-- cpp/cmake/patches/faiss-1.14-cuvs-26.06.diff | 2 +- .../cuvs_internal/neighbors/naive_knn.cuh | 4 +-- cpp/src/cluster/detail/kmeans_balanced.cuh | 30 ++++++++-------- cpp/src/cluster/kmeans_balanced.cuh | 4 +-- .../kmeans_balanced_build_clusters_impl.cuh | 2 +- cpp/src/distance/detail/masked_nn.cuh | 4 +-- cpp/src/neighbors/composite/index.cu | 2 +- cpp/src/neighbors/detail/ann_utils.cuh | 24 ++++++------- cpp/src/neighbors/detail/cagra/add_nodes.cuh | 2 +- .../neighbors/detail/cagra/cagra_build.cuh | 10 +++--- .../neighbors/detail/cagra/cagra_merge.cuh | 2 +- cpp/src/neighbors/detail/cagra/graph_core.cuh | 10 +++--- .../neighbors/detail/cagra/search_plan.cuh | 8 ++--- cpp/src/neighbors/detail/cagra/utils.hpp | 6 ++-- .../neighbors/detail/vamana/greedy_search.cuh | 2 +- .../neighbors/detail/vamana/vamana_build.cuh | 30 ++++++++-------- cpp/src/neighbors/detail/vpq_dataset.cuh | 4 +-- cpp/src/neighbors/ivf_common.cu | 2 +- cpp/src/neighbors/ivf_common.cuh | 4 +-- cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh | 16 ++++----- .../neighbors/ivf_flat/ivf_flat_search.cuh | 35 +++++++++---------- cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh | 32 ++++++++--------- cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh | 10 +++--- cpp/src/neighbors/ivf_pq/ivf_pq_transform.cuh | 10 +++--- cpp/src/neighbors/scann/detail/scann_avq.cuh | 17 ++++----- .../neighbors/scann/detail/scann_build.cuh | 2 +- .../preprocessing/quantize/detail/binary.cuh | 8 ++--- cpp/src/preprocessing/quantize/detail/pq.cuh | 2 +- .../ann_cagra/test_batch_load_iterator.cu | 10 +++--- cpp/tests/neighbors/naive_knn.cuh | 4 +-- docs/source/api_basics.rst | 2 +- examples/cpp/src/cagra_example.cu | 2 +- examples/cpp/src/cagra_hnsw_ace_example.cu | 2 +- examples/cpp/src/cagra_persistent_example.cu | 2 +- examples/cpp/src/hnsw_ace_example.cu | 2 +- examples/cpp/src/ivf_flat_example.cu | 2 +- examples/cpp/src/ivf_pq_example.cu | 2 +- examples/cpp/src/scann_example.cu | 2 +- examples/cpp/src/vamana_example.cu | 2 +- 46 files changed, 170 insertions(+), 178 deletions(-) diff --git a/c/src/core/c_api.cpp b/c/src/core/c_api.cpp index f4e3664482..3847875aa4 100644 --- a/c/src/core/c_api.cpp +++ b/c/src/core/c_api.cpp @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include "../core/exceptions.hpp" @@ -132,7 +132,7 @@ extern "C" cuvsError_t cuvsRMMAlloc(cuvsResources_t res, void** ptr, size_t byte { return cuvs::core::translate_exceptions([=] { auto res_ptr = reinterpret_cast(res); - auto mr = rmm::mr::get_current_device_resource_ref(); + auto mr = rmm::mr::get_current_device_resource(); *ptr = mr.allocate(raft::resource::get_cuda_stream(*res_ptr), bytes); }); } @@ -141,7 +141,7 @@ extern "C" cuvsError_t cuvsRMMFree(cuvsResources_t res, void* ptr, size_t bytes) { return cuvs::core::translate_exceptions([=] { auto res_ptr = reinterpret_cast(res); - auto mr = rmm::mr::get_current_device_resource_ref(); + auto mr = rmm::mr::get_current_device_resource(); mr.deallocate(raft::resource::get_cuda_stream(*res_ptr), ptr, bytes); }); } diff --git a/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp b/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp index e5555132bb..aae71605a5 100644 --- a/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp +++ b/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp @@ -8,7 +8,7 @@ #include #include -#include +#include #include diff --git a/cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h b/cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h index 1a276e8cc8..f818a1e208 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h +++ b/cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include @@ -73,7 +73,7 @@ class shared_raft_resources { try : large_mr_() { orig_resource_ = rmm::mr::set_current_device_resource(rmm::mr::failure_callback_resource_adaptor<>{ - rmm::mr::pool_memory_resource{rmm::mr::get_current_device_resource_ref(), + rmm::mr::pool_memory_resource{rmm::mr::get_current_device_resource(), 1024 * 1024 * 1024ull}, rmm_oom_callback, nullptr}); @@ -98,7 +98,7 @@ class shared_raft_resources { ~shared_raft_resources() noexcept { rmm::mr::set_current_device_resource(orig_resource_); } - auto get_large_memory_resource() noexcept -> rmm::device_async_resource_ref { return large_mr_; } + auto get_large_memory_resource() noexcept -> rmm::device_async_resource { return large_mr_; } private: cuda::mr::any_resource orig_resource_; diff --git a/cpp/bench/ann/src/cuvs/cuvs_cagra_diskann.cu b/cpp/bench/ann/src/cuvs/cuvs_cagra_diskann.cu index 1521333c5e..e67adf9005 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_cagra_diskann.cu +++ b/cpp/bench/ann/src/cuvs/cuvs_cagra_diskann.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -9,7 +9,7 @@ #include #include -#include +#include namespace cuvs::bench { diff --git a/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu b/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu index 26028b6d98..9645e5df63 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu +++ b/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -9,7 +9,7 @@ #include #include -#include +#include namespace cuvs::bench { diff --git a/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h index 87111e4761..d872fd6aab 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h +++ b/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h @@ -25,7 +25,7 @@ #include #include -#include +#include #include #include @@ -190,12 +190,12 @@ class cuvs_cagra : public algo, public algo_gpu { std::shared_ptr filter_; std::vector>> sub_indices_; - inline rmm::device_async_resource_ref get_mr(AllocatorType mem_type) + inline rmm::device_async_resource get_mr(AllocatorType mem_type) { switch (mem_type) { case (AllocatorType::kHostPinned): return mr_pinned_; case (AllocatorType::kHostHugePage): return mr_huge_page_; - default: return rmm::mr::get_current_device_resource_ref(); + default: return rmm::mr::get_current_device_resource(); } } }; diff --git a/cpp/bench/ann/src/cuvs/cuvs_vamana.cu b/cpp/bench/ann/src/cuvs/cuvs_vamana.cu index 185095d5b4..14eb64b55a 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_vamana.cu +++ b/cpp/bench/ann/src/cuvs/cuvs_vamana.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -8,7 +8,7 @@ #include #include -#include +#include namespace cuvs::bench { diff --git a/cpp/cmake/patches/faiss-1.14-cuvs-26.06.diff b/cpp/cmake/patches/faiss-1.14-cuvs-26.06.diff index 802930da76..4db328e278 100644 --- a/cpp/cmake/patches/faiss-1.14-cuvs-26.06.diff +++ b/cpp/cmake/patches/faiss-1.14-cuvs-26.06.diff @@ -61,7 +61,7 @@ index 548618262..3be071550 100644 - rmm::mr::device_memory_resource* current_mr = - rmm::mr::get_per_device_resource( + auto current_mr = -+ rmm::mr::get_per_device_resource_ref( ++ rmm::mr::get_per_device_resource( rmm::cuda_device_id{adjReq.device}); - p = current_mr->allocate(adjReq.stream, adjReq.size); + p = current_mr.allocate(adjReq.stream, adjReq.size); diff --git a/cpp/internal/cuvs_internal/neighbors/naive_knn.cuh b/cpp/internal/cuvs_internal/neighbors/naive_knn.cuh index 7bc37193a0..9c893587dc 100644 --- a/cpp/internal/cuvs_internal/neighbors/naive_knn.cuh +++ b/cpp/internal/cuvs_internal/neighbors/naive_knn.cuh @@ -14,7 +14,7 @@ #include #include #include -#include +#include namespace cuvs::neighbors { @@ -88,7 +88,7 @@ void naive_knn(raft::resources const& handle, uint32_t k, cuvs::distance::DistanceType type) { - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource_ref(); + rmm::device_async_resource mr = rmm::mr::get_current_device_resource(); auto stream = raft::resource::get_cuda_stream(handle); dim3 block_dim(16, 32, 1); diff --git a/cpp/src/cluster/detail/kmeans_balanced.cuh b/cpp/src/cluster/detail/kmeans_balanced.cuh index 26ce110158..cd5c6f1aa0 100644 --- a/cpp/src/cluster/detail/kmeans_balanced.cuh +++ b/cpp/src/cluster/detail/kmeans_balanced.cuh @@ -36,7 +36,7 @@ #include #include #include -#include +#include #include #include @@ -83,7 +83,7 @@ inline std::enable_if_t> predict_core( const MathT* dataset_norm, IdxT n_rows, LabelT* labels, - rmm::device_async_resource_ref mr) + rmm::device_async_resource mr) { auto stream = raft::resource::get_cuda_stream(handle); switch (params.metric) { @@ -253,7 +253,7 @@ void calc_centers_and_sizes(const raft::resources& handle, const LabelT* labels, bool reset_counters, MappingOpT mapping_op, - rmm::device_async_resource_ref mr) + rmm::device_async_resource mr) { auto stream = raft::resource::get_cuda_stream(handle); @@ -316,12 +316,12 @@ void compute_norm(const raft::resources& handle, IdxT n_rows, MappingOpT mapping_op, FinOpT norm_fin_op, - std::optional mr = std::nullopt) + std::optional mr = std::nullopt) { raft::common::nvtx::range fun_scope("compute_norm"); auto stream = raft::resource::get_cuda_stream(handle); rmm::device_uvector mapped_dataset( - 0, stream, mr.value_or(raft::resource::get_workspace_resource_ref(handle))); + 0, stream, mr.value_or(raft::resource::get_workspace_resource(handle))); const MathT* dataset_ptr = nullptr; @@ -377,13 +377,13 @@ void predict(const raft::resources& handle, IdxT n_rows, LabelT* labels, MappingOpT mapping_op, - std::optional mr = std::nullopt, - const MathT* dataset_norm = nullptr) + std::optional mr = std::nullopt, + const MathT* dataset_norm = nullptr) { auto stream = raft::resource::get_cuda_stream(handle); raft::common::nvtx::range fun_scope( "predict(%zu, %u)", static_cast(n_rows), n_clusters); - auto mem_res = mr.value_or(raft::resource::get_workspace_resource_ref(handle)); + auto mem_res = mr.value_or(raft::resource::get_workspace_resource(handle)); auto [max_minibatch_size, _mem_per_row] = calc_minibatch_size( handle, n_clusters, n_rows, dim, params.metric, std::is_same_v); rmm::device_uvector cur_dataset( @@ -551,7 +551,7 @@ auto adjust_centers(MathT* centers, MathT threshold, MappingOpT mapping_op, rmm::cuda_stream_view stream, - rmm::device_async_resource_ref device_memory) -> bool + rmm::device_async_resource device_memory) -> bool { raft::common::nvtx::range fun_scope( "adjust_centers(%zu, %u)", static_cast(n_rows), n_clusters); @@ -649,7 +649,7 @@ void balancing_em_iters(const raft::resources& handle, uint32_t balancing_pullback, MathT balancing_threshold, MappingOpT mapping_op, - rmm::device_async_resource_ref device_memory) + rmm::device_async_resource device_memory) { auto stream = raft::resource::get_cuda_stream(handle); uint32_t balancing_counter = balancing_pullback; @@ -732,7 +732,7 @@ void build_clusters(const raft::resources& handle, LabelT* cluster_labels, CounterT* cluster_sizes, MappingOpT mapping_op, - rmm::device_async_resource_ref device_memory, + rmm::device_async_resource device_memory, const MathT* dataset_norm = nullptr) { auto stream = raft::resource::get_cuda_stream(handle); @@ -874,8 +874,8 @@ auto build_fine_clusters(const raft::resources& handle, IdxT fine_clusters_nums_max, MathT* cluster_centers, MappingOpT mapping_op, - rmm::device_async_resource_ref managed_memory, - rmm::device_async_resource_ref device_memory) -> IdxT + rmm::device_async_resource managed_memory, + rmm::device_async_resource device_memory) -> IdxT { auto stream = raft::resource::get_cuda_stream(handle); rmm::device_uvector mc_trainset_ids_buf(mesocluster_size_max, stream, managed_memory); @@ -995,8 +995,8 @@ void build_hierarchical(const raft::resources& handle, // TODO: Remove the explicit managed memory- we shouldn't be creating this on the user's behalf. rmm::mr::managed_memory_resource managed_memory; - rmm::device_async_resource_ref device_memory = raft::resource::get_workspace_resource_ref(handle); - auto [max_minibatch_size, mem_per_row] = calc_minibatch_size( + rmm::device_async_resource device_memory = raft::resource::get_workspace_resource(handle); + auto [max_minibatch_size, mem_per_row] = calc_minibatch_size( handle, n_clusters, n_rows, dim, params.metric, std::is_same_v); // Precompute the L2 norm of the dataset if relevant and not yet computed. diff --git a/cpp/src/cluster/kmeans_balanced.cuh b/cpp/src/cluster/kmeans_balanced.cuh index f3f52c2d8f..0c0df03397 100644 --- a/cpp/src/cluster/kmeans_balanced.cuh +++ b/cpp/src/cluster/kmeans_balanced.cuh @@ -154,7 +154,7 @@ void predict(const raft::resources& handle, X.extent(0), labels.data_handle(), mapping_op, - raft::resource::get_workspace_resource_ref(handle)); + raft::resource::get_workspace_resource(handle)); } namespace helpers { @@ -305,7 +305,7 @@ void calc_centers_and_sizes(const raft::resources& handle, labels.data_handle(), reset_counters, mapping_op, - raft::resource::get_workspace_resource_ref(handle)); + raft::resource::get_workspace_resource(handle)); } } // namespace helpers diff --git a/cpp/src/cluster/kmeans_balanced_build_clusters_impl.cuh b/cpp/src/cluster/kmeans_balanced_build_clusters_impl.cuh index 2bce856c6c..e1ace274e2 100644 --- a/cpp/src/cluster/kmeans_balanced_build_clusters_impl.cuh +++ b/cpp/src/cluster/kmeans_balanced_build_clusters_impl.cuh @@ -68,7 +68,7 @@ void build_clusters(const raft::resources& handle, labels.data_handle(), cluster_sizes.data_handle(), mapping_op, - raft::resource::get_workspace_resource_ref(handle), + raft::resource::get_workspace_resource(handle), X_norm.has_value() ? X_norm.value().data_handle() : nullptr); } diff --git a/cpp/src/distance/detail/masked_nn.cuh b/cpp/src/distance/detail/masked_nn.cuh index a3a187e8c4..4c2ac0a993 100644 --- a/cpp/src/distance/detail/masked_nn.cuh +++ b/cpp/src/distance/detail/masked_nn.cuh @@ -246,8 +246,8 @@ void masked_l2_nn_impl(raft::resources const& handle, static_assert(P::Mblk == 64, "masked_l2_nn_impl only supports a policy with 64 rows per block."); // Get stream and workspace memory resource - rmm::device_async_resource_ref ws_mr = raft::resource::get_workspace_resource_ref(handle); - auto stream = raft::resource::get_cuda_stream(handle); + rmm::device_async_resource ws_mr = raft::resource::get_workspace_resource(handle); + auto stream = raft::resource::get_cuda_stream(handle); // Acquire temporary buffers and initialize to zero: // 1) Adjacency matrix bitfield diff --git a/cpp/src/neighbors/composite/index.cu b/cpp/src/neighbors/composite/index.cu index 7c02d0e43e..57bd0816eb 100644 --- a/cpp/src/neighbors/composite/index.cu +++ b/cpp/src/neighbors/composite/index.cu @@ -40,7 +40,7 @@ void composite_index::search( size_t buffer_size = num_queries * K * num_indices; auto main_stream = raft::resource::get_cuda_stream(handle); - auto tmp_res = raft::resource::get_workspace_resource_ref(handle); + auto tmp_res = raft::resource::get_workspace_resource(handle); rmm::device_uvector neighbors_buffer(buffer_size, main_stream, tmp_res); rmm::device_uvector distances_buffer(buffer_size, main_stream, tmp_res); diff --git a/cpp/src/neighbors/detail/ann_utils.cuh b/cpp/src/neighbors/detail/ann_utils.cuh index bdb2c310c6..10aee6dee0 100644 --- a/cpp/src/neighbors/detail/ann_utils.cuh +++ b/cpp/src/neighbors/detail/ann_utils.cuh @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include @@ -593,7 +593,7 @@ struct batch_load_iterator { MdspanT input_view, size_type batch_size, rmm::cuda_stream_view copy_stream, - rmm::device_async_resource_ref mr, + rmm::device_async_resource mr, bool prefetch, bool initialize, bool host_writeback) @@ -853,7 +853,7 @@ struct batch_load_iterator { MdspanT input_view, size_type batch_size, rmm::cuda_stream_view copy_stream, - rmm::device_async_resource_ref mr, + rmm::device_async_resource mr, bool prefetch = false, bool initialize = true, bool host_writeback = false) @@ -864,7 +864,7 @@ struct batch_load_iterator { { } - /** Convenience overload that uses `get_workspace_resource_ref(res)` as the memory resource. */ + /** Convenience overload that uses `get_workspace_resource(res)` as the memory resource. */ batch_load_iterator(raft::resources const& res, MdspanT input_view, size_type batch_size, @@ -876,7 +876,7 @@ struct batch_load_iterator { input_view, batch_size, copy_stream, - raft::resource::get_workspace_resource_ref(res), + raft::resource::get_workspace_resource(res), prefetch, initialize, host_writeback) @@ -1018,7 +1018,7 @@ class batch_load_iterator_dyn { IdxT row_width, size_type batch_size, rmm::cuda_stream_view copy_stream, - rmm::device_async_resource_ref mr, + rmm::device_async_resource mr, bool prefetch = false, bool initialize = true, bool host_writeback = false) @@ -1036,7 +1036,7 @@ class batch_load_iterator_dyn { { } - /** Convenience overload that uses `get_workspace_resource_ref(res)` as the memory resource. */ + /** Convenience overload that uses `get_workspace_resource(res)` as the memory resource. */ batch_load_iterator_dyn(raft::resources const& res, T* ptr, IdxT n_rows, @@ -1052,7 +1052,7 @@ class batch_load_iterator_dyn { row_width, batch_size, copy_stream, - raft::resource::get_workspace_resource_ref(res), + raft::resource::get_workspace_resource(res), prefetch, initialize, host_writeback) @@ -1149,7 +1149,7 @@ class batch_load_iterator_dyn { IdxT row_width, size_type batch_size, rmm::cuda_stream_view copy_stream, - rmm::device_async_resource_ref mr, + rmm::device_async_resource mr, bool prefetch, bool initialize, bool host_writeback) -> std::variant @@ -1212,7 +1212,7 @@ auto make_batch_load_iterator(raft::resources const& res, detail::type_identity_t row_width, size_t batch_size, rmm::cuda_stream_view copy_stream, - rmm::device_async_resource_ref mr, + rmm::device_async_resource mr, bool prefetch = false, bool initialize = true, bool host_writeback = false) -> batch_load_iterator_dyn @@ -1229,7 +1229,7 @@ auto make_batch_load_iterator(raft::resources const& res, host_writeback); } -/** Convenience overload that uses `get_workspace_resource_ref(res)` as the memory resource. */ +/** Convenience overload that uses `get_workspace_resource(res)` as the memory resource. */ template auto make_batch_load_iterator(raft::resources const& res, T const* ptr, @@ -1247,7 +1247,7 @@ auto make_batch_load_iterator(raft::resources const& res, row_width, batch_size, copy_stream, - raft::resource::get_workspace_resource_ref(res), + raft::resource::get_workspace_resource(res), prefetch, initialize, host_writeback); diff --git a/cpp/src/neighbors/detail/cagra/add_nodes.cuh b/cpp/src/neighbors/detail/cagra/add_nodes.cuh index 5d0a6654e9..169a667776 100644 --- a/cpp/src/neighbors/detail/cagra/add_nodes.cuh +++ b/cpp/src/neighbors/detail/cagra/add_nodes.cuh @@ -70,7 +70,7 @@ void add_node_core( params.itopk_size = std::max(base_degree * 2lu, 256lu); // Memory space for rank-based neighbor list - auto mr = raft::resource::get_workspace_resource_ref(handle); + auto mr = raft::resource::get_workspace_resource(handle); auto neighbor_indices = raft::make_device_mdarray( handle, mr, raft::make_extents(max_search_batch_size, base_degree)); diff --git a/cpp/src/neighbors/detail/cagra/cagra_build.cuh b/cpp/src/neighbors/detail/cagra/cagra_build.cuh index 96ff8344d3..f5221724ea 100644 --- a/cpp/src/neighbors/detail/cagra/cagra_build.cuh +++ b/cpp/src/neighbors/detail/cagra/cagra_build.cuh @@ -31,7 +31,7 @@ // TODO: This shouldn't be calling spatial/knn APIs #include "../ann_utils.cuh" -#include +#include #include #include @@ -1700,9 +1700,9 @@ void build_knn_graph( } // If the workspace is smaller than desired, put the I/O buffers into the large workspace. - rmm::device_async_resource_ref workspace_mr = - use_large_workspace ? raft::resource::get_large_workspace_resource_ref(res) - : raft::resource::get_workspace_resource_ref(res); + rmm::device_async_resource workspace_mr = use_large_workspace + ? raft::resource::get_large_workspace_resource(res) + : raft::resource::get_workspace_resource(res); RAFT_LOG_DEBUG( "IVF-PQ search node_degree: %d, top_k: %d, gpu_top_k: %d, max_batch_size:: %d, n_probes: %u", @@ -2123,7 +2123,7 @@ auto iterative_build_graph( static_cast(dev_query_view.extent(1)), max_chunk_size, raft::resource::get_cuda_stream(res), - raft::resource::get_workspace_resource_ref(res)); + raft::resource::get_workspace_resource(res)); for (const auto& batch : query_batch) { auto batch_dev_query_view = raft::make_device_matrix_view( batch.data(), batch.size(), dev_query_view.extent(1)); diff --git a/cpp/src/neighbors/detail/cagra/cagra_merge.cuh b/cpp/src/neighbors/detail/cagra/cagra_merge.cuh index 1dd4cbe075..fcce6f691c 100644 --- a/cpp/src/neighbors/detail/cagra/cagra_merge.cuh +++ b/cpp/src/neighbors/detail/cagra/cagra_merge.cuh @@ -21,7 +21,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/src/neighbors/detail/cagra/graph_core.cuh b/cpp/src/neighbors/detail/cagra/graph_core.cuh index 0e2ad6d769..f478cd3fa2 100644 --- a/cpp/src/neighbors/detail/cagra/graph_core.cuh +++ b/cpp/src/neighbors/detail/cagra/graph_core.cuh @@ -851,7 +851,7 @@ void merge_graph_gpu( namespace bli = cuvs::spatial::knn::detail::utils; auto [copy_stream, enable_prefetch] = bli::get_prefetch_stream(res); - auto workspace_mr = raft::resource::get_workspace_resource_ref(res); + auto workspace_mr = raft::resource::get_workspace_resource(res); bli::batch_load_iterator< raft::mdspan, raft::row_major, AccessorOutputGraph>> @@ -1001,7 +1001,7 @@ void sort_knn_graph( const uint64_t input_graph_degree = knn_graph.extent(1); IdxT* const input_graph_ptr = knn_graph.data_handle(); - auto large_tmp_mr = raft::resource::get_large_workspace_resource_ref(res); + auto large_tmp_mr = raft::resource::get_large_workspace_resource(res); auto d_input_graph = raft::make_device_mdarray( res, large_tmp_mr, raft::make_extents(graph_size, input_graph_degree)); @@ -1625,7 +1625,7 @@ void prune_graph_gpu( namespace bli = cuvs::spatial::knn::detail::utils; auto [copy_stream, enable_prefetch] = bli::get_prefetch_stream(res); - auto workspace_mr = raft::resource::get_workspace_resource_ref(res); + auto workspace_mr = raft::resource::get_workspace_resource(res); // Single-batch read-only iterator for the input graph (graph_size rows fit in one batch). bli::batch_load_iterator< @@ -1722,9 +1722,9 @@ void optimize( raft::resources res{res_const}; // large temporary memory for large arrays, e.g. everything >= O(graph_size) - auto large_tmp_mr = raft::resource::get_large_workspace_resource_ref(res); + auto large_tmp_mr = raft::resource::get_large_workspace_resource(res); // temporary memory for small arrays, e.g. everything <= O(batchsize * graph_degree) - auto default_ws_mr = raft::resource::get_workspace_resource_ref(res); + auto default_ws_mr = raft::resource::get_workspace_resource(res); // create a stream pool if not already present if (!res.has_resource_factory(raft::resource::resource_type::CUDA_STREAM_POOL) || diff --git a/cpp/src/neighbors/detail/cagra/search_plan.cuh b/cpp/src/neighbors/detail/cagra/search_plan.cuh index 8a9d79e177..f7a5d15a6d 100644 --- a/cpp/src/neighbors/detail/cagra/search_plan.cuh +++ b/cpp/src/neighbors/detail/cagra/search_plan.cuh @@ -38,8 +38,8 @@ namespace cuvs::neighbors::cagra::detail { template struct lightweight_uvector { private: - using raft_res_type = const raft::resources*; - using rmm_res_type = std::tuple; + using raft_res_type = const raft::resources*; + using rmm_res_type = std::tuple; static constexpr size_t kAlign = 256; std::variant res_; @@ -58,7 +58,7 @@ struct lightweight_uvector { if (new_size == size_) { return; } if (std::holds_alternative(res_)) { auto& h = std::get(res_); - res_ = rmm_res_type{raft::resource::get_workspace_resource_ref(*h), + res_ = rmm_res_type{raft::resource::get_workspace_resource(*h), raft::resource::get_cuda_stream(*h)}; } auto& [r, s] = std::get(res_); @@ -80,7 +80,7 @@ struct lightweight_uvector { if (new_size == size_) { return; } if (std::holds_alternative(res_)) { auto& h = std::get(res_); - res_ = rmm_res_type{raft::resource::get_workspace_resource_ref(*h), stream}; + res_ = rmm_res_type{raft::resource::get_workspace_resource(*h), stream}; } else { std::get(std::get(res_)) = stream; } diff --git a/cpp/src/neighbors/detail/cagra/utils.hpp b/cpp/src/neighbors/detail/cagra/utils.hpp index 58bf68bb43..16e84a5d95 100644 --- a/cpp/src/neighbors/detail/cagra/utils.hpp +++ b/cpp/src/neighbors/detail/cagra/utils.hpp @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include @@ -190,7 +190,7 @@ class device_matrix_view_from_host { // live on stack and not returned to a user. // The user may opt to set this resource to managed memory to allow large allocations. device_mem_.emplace(raft::make_device_mdarray( - res, raft::resource::get_large_workspace_resource_ref(res), host_view.extents())); + res, raft::resource::get_large_workspace_resource(res), host_view.extents())); raft::copy(res, device_mem_->view(), host_view); device_ptr = device_mem_->data_handle(); } @@ -276,7 +276,7 @@ void copy_with_padding( raft::resources const& res, raft::device_matrix& dst, raft::mdspan, raft::row_major, data_accessor> src, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource_ref()) + rmm::device_async_resource mr = rmm::mr::get_current_device_resource()) { size_t padded_dim = raft::round_up_safe(src.extent(1) * sizeof(T), 16) / sizeof(T); diff --git a/cpp/src/neighbors/detail/vamana/greedy_search.cuh b/cpp/src/neighbors/detail/vamana/greedy_search.cuh index 4e71c1189c..4bdbd4695f 100644 --- a/cpp/src/neighbors/detail/vamana/greedy_search.cuh +++ b/cpp/src/neighbors/detail/vamana/greedy_search.cuh @@ -14,7 +14,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/src/neighbors/detail/vamana/vamana_build.cuh b/cpp/src/neighbors/detail/vamana/vamana_build.cuh index 336d81215b..ecfa809190 100644 --- a/cpp/src/neighbors/detail/vamana/vamana_build.cuh +++ b/cpp/src/neighbors/detail/vamana/vamana_build.cuh @@ -143,7 +143,7 @@ void batched_insert_vamana( auto query_ids = raft::make_device_vector(res, max_batchsize); auto query_list_ptr = raft::make_device_mdarray>( res, - raft::resource::get_large_workspace_resource_ref(res), + raft::resource::get_large_workspace_resource(res), raft::make_extents(max_batchsize + 1)); QueryCandidates* query_list = static_cast*>(query_list_ptr.data_handle()); @@ -151,11 +151,11 @@ void batched_insert_vamana( // Results of each batch of inserts during build - Memory is used by query_list structure auto visited_ids = raft::make_device_mdarray(res, - raft::resource::get_large_workspace_resource_ref(res), + raft::resource::get_large_workspace_resource(res), raft::make_extents(max_batchsize, visited_size)); auto visited_dists = raft::make_device_mdarray(res, - raft::resource::get_large_workspace_resource_ref(res), + raft::resource::get_large_workspace_resource(res), raft::make_extents(max_batchsize, visited_size)); // Assign memory to query_list structures and initialize @@ -167,14 +167,14 @@ void batched_insert_vamana( 1); auto topk_pq_mem = raft::make_device_mdarray>(res, - raft::resource::get_large_workspace_resource_ref(res), + raft::resource::get_large_workspace_resource(res), raft::make_extents(max_batchsize, visited_size)); int align_padding = raft::alignTo(dim, 16) - dim; auto s_coords_mem = raft::make_device_mdarray( res, - raft::resource::get_large_workspace_resource_ref(res), + raft::resource::get_large_workspace_resource(res), raft::make_extents(min(maxBlocks, max(max_batchsize, reverse_batch)), dim + align_padding)); @@ -318,7 +318,7 @@ void batched_insert_vamana( // compute prefix sums of query_list sizes - TODO parallelize prefix sums // auto d_total_edges = raft::make_device_mdarray( - // res, raft::resource::get_workspace_resource_ref(res), raft::make_extents(1)); + // res, raft::resource::get_workspace_resource(res), raft::make_extents(1)); rmm::device_scalar d_total_edges(stream); prefix_sums_sizes<<<1, 1, 0, stream>>>(query_list, step_size, d_total_edges.data()); RAFT_CUDA_TRY(cudaPeekAtLastError()); @@ -329,16 +329,16 @@ void batched_insert_vamana( auto edge_dist_pair = raft::make_device_mdarray>( res, - raft::resource::get_large_workspace_resource_ref(res), + raft::resource::get_large_workspace_resource(res), raft::make_extents(total_edges)); auto edge_dest = raft::make_device_mdarray(res, - raft::resource::get_large_workspace_resource_ref(res), + raft::resource::get_large_workspace_resource(res), raft::make_extents(total_edges)); auto edge_src = raft::make_device_mdarray(res, - raft::resource::get_large_workspace_resource_ref(res), + raft::resource::get_large_workspace_resource(res), raft::make_extents(total_edges)); // Create reverse edge list @@ -367,7 +367,7 @@ void batched_insert_vamana( auto temp_sort_storage = raft::make_device_mdarray( res, - raft::resource::get_large_workspace_resource_ref(res), + raft::resource::get_large_workspace_resource(res), raft::make_extents(temp_storage_bytes / sizeof(IdxT))); // Sort to group reverse edges by destination @@ -406,7 +406,7 @@ void batched_insert_vamana( auto temp_sort_storage = raft::make_device_mdarray( res, - raft::resource::get_large_workspace_resource_ref(res), + raft::resource::get_large_workspace_resource(res), raft::make_extents(temp_storage_bytes / sizeof(IdxT))); // Sort to group reverse edges by destination @@ -451,16 +451,16 @@ void batched_insert_vamana( // Allocate reverse QueryCandidate list based on number of unique destinations auto reverse_list_ptr = raft::make_device_mdarray>( res, - raft::resource::get_large_workspace_resource_ref(res), + raft::resource::get_large_workspace_resource(res), raft::make_extents(reverse_batch)); auto rev_ids = raft::make_device_mdarray(res, - raft::resource::get_large_workspace_resource_ref(res), + raft::resource::get_large_workspace_resource(res), raft::make_extents(reverse_batch, visited_size)); auto rev_dists = raft::make_device_mdarray(res, - raft::resource::get_large_workspace_resource_ref(res), + raft::resource::get_large_workspace_resource(res), raft::make_extents(reverse_batch, visited_size)); QueryCandidates* reverse_list = @@ -663,7 +663,7 @@ index build( static_cast(dim), static_cast(max_batch_size), raft::resource::get_cuda_stream(res), - raft::resource::get_workspace_resource_ref(res)); + raft::resource::get_workspace_resource(res)); for (const auto& batch : _vamana_batches) { // perform rotation auto dataset_rotated = raft::make_device_matrix(res, batch.size(), dim); diff --git a/cpp/src/neighbors/detail/vpq_dataset.cuh b/cpp/src/neighbors/detail/vpq_dataset.cuh index ec4a684274..c1136db645 100644 --- a/cpp/src/neighbors/detail/vpq_dataset.cuh +++ b/cpp/src/neighbors/detail/vpq_dataset.cuh @@ -513,7 +513,7 @@ void process_and_fill_codes( static_cast(dim), static_cast(max_batch_size), stream, - rmm::mr::get_current_device_resource_ref()); + rmm::mr::get_current_device_resource()); for (const auto& batch : _vpq_batches_codes) { auto batch_view = raft::make_device_matrix_view(batch.data(), ix_t(batch.size()), dim); auto batch_labels_view = raft::make_device_vector_view(nullptr, 0); @@ -910,7 +910,7 @@ void process_and_fill_codes_subspaces( static_cast(dim), static_cast(max_batch_size), copy_stream, - raft::resource::get_workspace_resource_ref(res), + raft::resource::get_workspace_resource(res), enable_prefetch_stream); vec_batches.prefetch_next_batch(); for (const auto& batch : vec_batches) { diff --git a/cpp/src/neighbors/ivf_common.cu b/cpp/src/neighbors/ivf_common.cu index b87a14f7c3..a7a1a2823a 100644 --- a/cpp/src/neighbors/ivf_common.cu +++ b/cpp/src/neighbors/ivf_common.cu @@ -79,7 +79,7 @@ void sort_cluster_sizes_descending(uint32_t* input, uint32_t* output, uint32_t n_lists, rmm::cuda_stream_view stream, - rmm::device_async_resource_ref tmp_res) + rmm::device_async_resource tmp_res) { int begin_bit = 0; int end_bit = sizeof(uint32_t) * 8; diff --git a/cpp/src/neighbors/ivf_common.cuh b/cpp/src/neighbors/ivf_common.cuh index e466a13fd7..b8fb54ca60 100644 --- a/cpp/src/neighbors/ivf_common.cuh +++ b/cpp/src/neighbors/ivf_common.cuh @@ -20,7 +20,7 @@ void sort_cluster_sizes_descending(uint32_t* input, uint32_t* output, uint32_t n_lists, rmm::cuda_stream_view stream, - rmm::device_async_resource_ref tmp_res); + rmm::device_async_resource tmp_res); /** * Default value returned by `search` when the `n_probes` is too small and top-k is too large. @@ -256,7 +256,7 @@ template void recompute_internal_state(const raft::resources& res, Index& index) { auto stream = raft::resource::get_cuda_stream(res); - auto tmp_res = raft::resource::get_workspace_resource_ref(res); + auto tmp_res = raft::resource::get_workspace_resource(res); rmm::device_uvector sorted_sizes(index.n_lists(), stream, tmp_res); // Actualize the list pointers diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh index fffe5134ae..4165bd1087 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh @@ -180,10 +180,8 @@ void extend(raft::resources const& handle, RAFT_EXPECTS(new_indices != nullptr || index->size() == 0, "You must pass data indices when the index is non-empty."); - auto new_labels = - raft::make_device_mdarray(handle, - raft::resource::get_large_workspace_resource_ref(handle), - raft::make_extents(n_rows)); + auto new_labels = raft::make_device_mdarray( + handle, raft::resource::get_large_workspace_resource(handle), raft::make_extents(n_rows)); cuvs::cluster::kmeans::balanced_params kmeans_params; kmeans_params.metric = index->metric(); auto orig_centroids_view = @@ -210,7 +208,7 @@ void extend(raft::resources const& handle, IdxT{index->dim()}, max_batch_size, copy_stream, - raft::resource::get_workspace_resource_ref(handle), + raft::resource::get_workspace_resource(handle), enable_prefetch); vec_batches.prefetch_next_batch(); @@ -229,7 +227,7 @@ void extend(raft::resources const& handle, auto* list_sizes_ptr = index->list_sizes().data_handle(); auto old_list_sizes_dev = raft::make_device_mdarray( - handle, raft::resource::get_workspace_resource_ref(handle), raft::make_extents(n_lists)); + handle, raft::resource::get_workspace_resource(handle), raft::make_extents(n_lists)); raft::copy(handle, old_list_sizes_dev.view(), raft::make_device_vector_view(list_sizes_ptr, n_lists)); @@ -305,7 +303,7 @@ void extend(raft::resources const& handle, IdxT{1}, max_batch_size, stream, - raft::resource::get_workspace_resource_ref(handle)); + raft::resource::get_workspace_resource(handle)); vec_batches.reset(); vec_batches.prefetch_next_batch(); auto idx_batch = vec_indices.begin(); @@ -420,7 +418,7 @@ inline auto build(raft::resources const& handle, 1, n_rows / std::max(params.kmeans_trainset_fraction * n_rows, index.n_lists())); auto n_rows_train = n_rows / trainset_ratio; rmm::device_uvector trainset( - n_rows_train * index.dim(), stream, raft::resource::get_large_workspace_resource_ref(handle)); + n_rows_train * index.dim(), stream, raft::resource::get_large_workspace_resource(handle)); // TODO: a proper sampling raft::copy_matrix(trainset.data(), index.dim(), @@ -479,7 +477,7 @@ inline void fill_refinement_index(raft::resources const& handle, "ivf_flat::fill_refinement_index(%zu, %u)", size_t(n_queries)); rmm::device_uvector new_labels( - n_queries * n_candidates, stream, raft::resource::get_workspace_resource_ref(handle)); + n_queries * n_candidates, stream, raft::resource::get_workspace_resource(handle)); auto new_labels_view = raft::make_device_vector_view(new_labels.data(), n_queries * n_candidates); raft::linalg::map_offset( diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh index 960d48c818..6831c77d8c 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh @@ -24,7 +24,7 @@ #include // raft::linalg::norm #include -#include +#include #include @@ -51,7 +51,7 @@ void search_impl(raft::resources const& handle, bool select_min, IdxT* neighbors, AccT* distances, - rmm::device_async_resource_ref search_mr, + rmm::device_async_resource search_mr, IvfSampleFilterT sample_filter) { auto stream = raft::resource::get_cuda_stream(handle); @@ -355,22 +355,21 @@ inline void search_with_filtering(raft::resources const& handle, for (uint32_t offset_q = 0; offset_q < n_queries; offset_q += max_queries) { uint32_t queries_batch = raft::min(max_queries, n_queries - offset_q); - search_impl( - handle, - index, - effective_metric, - params, - queries + offset_q * index.dim(), - queries_batch, - offset_q, - k, - n_probes, - max_samples, - cuvs::distance::is_min_close(effective_metric), - neighbors + offset_q * k, - distances + offset_q * k, - raft::resource::get_workspace_resource_ref(handle), - sample_filter); + search_impl(handle, + index, + effective_metric, + params, + queries + offset_q * index.dim(), + queries_batch, + offset_q, + k, + n_probes, + max_samples, + cuvs::distance::is_min_close(effective_metric), + neighbors + offset_q * k, + distances + offset_q * k, + raft::resource::get_workspace_resource(handle), + sample_filter); } } diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh b/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh index c562ca9e00..a668fa3a4b 100644 --- a/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh +++ b/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh @@ -91,7 +91,7 @@ void select_residuals(raft::resources const& handle, const float* center, // [dim] const T* dataset, // [.., dim] const IdxT* row_ids, // [n_rows] - rmm::device_async_resource_ref device_memory + rmm::device_async_resource device_memory ) { @@ -147,7 +147,7 @@ void flat_compute_residuals( raft::device_matrix_view centers, // [n_lists, dim_ext] const T* dataset, // [n_rows, dim] std::variant labels, // [n_rows] - rmm::device_async_resource_ref device_memory, + rmm::device_async_resource device_memory, cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Expanded) { auto stream = raft::resource::get_cuda_stream(handle); @@ -334,7 +334,7 @@ void train_per_subset(raft::resources const& handle, uint32_t max_train_points_per_pq_code) { auto stream = raft::resource::get_cuda_stream(handle); - auto device_memory = raft::resource::get_workspace_resource_ref(handle); + auto device_memory = raft::resource::get_workspace_resource(handle); rmm::device_uvector pq_centers_tmp(impl->pq_centers().size(), stream, device_memory); // Subsampling the train set for codebook generation based on max_train_points_per_pq_code. @@ -416,7 +416,7 @@ void train_per_cluster(raft::resources const& handle, uint32_t max_train_points_per_pq_code) { auto stream = raft::resource::get_cuda_stream(handle); - auto device_memory = raft::resource::get_workspace_resource_ref(handle); + auto device_memory = raft::resource::get_workspace_resource(handle); // NB: Managed memory is used for small arrays accessed from both device and host. There's no // performance reasoning behind this, just avoiding the boilerplate of explicit copies. rmm::mr::managed_memory_resource managed_memory; @@ -596,7 +596,7 @@ void reconstruct_list_data(raft::resources const& res, auto tmp = raft::make_device_mdarray(res, - raft::resource::get_workspace_resource_ref(res), + raft::resource::get_workspace_resource(res), raft::make_extents(n_rows, index.rot_dim())); constexpr uint32_t kBlockSize = 256; @@ -623,7 +623,7 @@ void reconstruct_list_data(raft::resources const& res, float* out_float_ptr = nullptr; rmm::device_uvector out_float_buf( - 0, raft::resource::get_cuda_stream(res), raft::resource::get_workspace_resource_ref(res)); + 0, raft::resource::get_cuda_stream(res), raft::resource::get_workspace_resource(res)); if constexpr (std::is_same_v) { out_float_ptr = out_vectors.data_handle(); } else { @@ -706,7 +706,7 @@ void encode_list_data(raft::resources const& res, auto n_rows = new_vectors.extent(0); if (n_rows == 0) { return; } - auto mr = raft::resource::get_workspace_resource_ref(res); + auto mr = raft::resource::get_workspace_resource(res); auto new_vectors_residual = raft::make_device_mdarray( res, mr, raft::make_extents(n_rows, index->rot_dim())); @@ -805,7 +805,7 @@ void process_and_fill_codes(raft::resources const& handle, std::variant src_offset_or_indices, const uint32_t* new_labels, IdxT n_rows, - rmm::device_async_resource_ref mr) + rmm::device_async_resource mr) { auto new_vectors_residual = raft::make_device_mdarray(handle, mr, raft::make_extents(n_rows, index.rot_dim())); @@ -997,9 +997,8 @@ void extend(raft::resources const& handle, std::is_same_v, "Unsupported data type"); - rmm::device_async_resource_ref device_memory = raft::resource::get_workspace_resource_ref(handle); - rmm::device_async_resource_ref large_memory = - raft::resource::get_large_workspace_resource_ref(handle); + rmm::device_async_resource device_memory = raft::resource::get_workspace_resource(handle); + rmm::device_async_resource large_memory = raft::resource::get_large_workspace_resource(handle); // Try to allocate an index with the same parameters and the projected new size // (which can be slightly larger than index->size() + n_rows, due to padding for interleaved). @@ -1028,8 +1027,8 @@ void extend(raft::resources const& handle, // `large_workspace_resource`, which does not have the explicit allocation limit. The user may opt // to populate the `large_workspace_resource` memory resource with managed memory for easier // scaling. - rmm::device_async_resource_ref labels_mr = device_memory; - rmm::device_async_resource_ref batches_mr = device_memory; + rmm::device_async_resource labels_mr = device_memory; + rmm::device_async_resource batches_mr = device_memory; if (n_rows * (index->dim() * sizeof(T) + index->pq_dim() + sizeof(IdxT) + sizeof(uint32_t)) > free_mem) { labels_mr = large_memory; @@ -1269,14 +1268,13 @@ auto build(raft::resources const& handle, size_t(n_rows) / std::max(params.kmeans_trainset_fraction * n_rows, impl->n_lists())); size_t n_rows_train = n_rows / trainset_ratio; - rmm::device_async_resource_ref device_memory = - raft::resource::get_workspace_resource_ref(handle); + rmm::device_async_resource device_memory = raft::resource::get_workspace_resource(handle); // If the trainset is small enough to comfortably fit into device memory, put it there. // Otherwise, use the managed memory. constexpr size_t kTolerableRatio = 4; - rmm::device_async_resource_ref big_memory_resource = - raft::resource::get_large_workspace_resource_ref(handle); + rmm::device_async_resource big_memory_resource = + raft::resource::get_large_workspace_resource(handle); if (sizeof(float) * n_rows_train * impl->dim() * kTolerableRatio < raft::resource::get_workspace_free_bytes(handle)) { big_memory_resource = device_memory; diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh b/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh index 08fcd1f09a..7a5fd198f9 100644 --- a/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh +++ b/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh @@ -69,7 +69,7 @@ void select_clusters(raft::resources const& handle, cuvs::distance::DistanceType metric, const T* queries, // [n_queries, dim] const float* cluster_centers, // [n_lists, dim_ext] - rmm::device_async_resource_ref mr) + rmm::device_async_resource mr) { raft::common::nvtx::range fun_scope( "ivf_pq::search::select_clusters(n_probes = %u, n_queries = %u, n_lists = %u, dim = %u)", @@ -179,7 +179,7 @@ void select_clusters(raft::resources const& handle, cuvs::distance::DistanceType metric, const T* queries, // [n_queries, dim] const int8_t* cluster_centers, // [n_lists, dim_ext] - rmm::device_async_resource_ref mr) + rmm::device_async_resource mr) { raft::common::nvtx::range fun_scope( "ivf_pq::search::select_clusters(n_probes = %u, n_queries = %u, n_lists = %u, dim = %u)", @@ -267,7 +267,7 @@ void select_clusters(raft::resources const& handle, cuvs::distance::DistanceType metric, const T* queries, // [n_queries, dim] const half* cluster_centers, // [n_lists, dim_ext] - rmm::device_async_resource_ref mr) + rmm::device_async_resource mr) { raft::common::nvtx::range fun_scope( "ivf_pq::search::select_clusters(n_probes = %u, n_queries = %u, n_lists = %u, dim = %u)", @@ -440,7 +440,7 @@ void ivfpq_search_worker(raft::resources const& handle, topK, index.dim()); auto stream = raft::resource::get_cuda_stream(handle); - auto mr = raft::resource::get_workspace_resource_ref(handle); + auto mr = raft::resource::get_workspace_resource(handle); bool manage_local_topk = is_local_topk_feasible(topK, n_probes, n_queries); auto topk_len = manage_local_topk ? n_probes * topK : max_samples; @@ -927,7 +927,7 @@ inline void search(raft::resources const& handle, max_samples = ms; } - auto mr = raft::resource::get_workspace_resource_ref(handle); + auto mr = raft::resource::get_workspace_resource(handle); // Maximum number of query vectors to search at the same time. // Number of queries in the outer loop, which includes query transform and coarse search. diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_transform.cuh b/cpp/src/neighbors/ivf_pq/ivf_pq_transform.cuh index e0032a4ac5..07045b772b 100644 --- a/cpp/src/neighbors/ivf_pq/ivf_pq_transform.cuh +++ b/cpp/src/neighbors/ivf_pq/ivf_pq_transform.cuh @@ -50,8 +50,8 @@ void transform_batch(raft::resources const& res, raft::device_vector_view output_labels, raft::device_matrix_view output_dataset) { - IdxT n_rows = dataset.extent(0); - rmm::device_async_resource_ref mr = raft::resource::get_workspace_resource_ref(res); + IdxT n_rows = dataset.extent(0); + rmm::device_async_resource mr = raft::resource::get_workspace_resource(res); // Compute the labels for each vector cuvs::cluster::kmeans::balanced_params kmeans_params; @@ -115,7 +115,7 @@ void transform(raft::resources const& res, raft::common::nvtx::range fun_scope( "ivf_pq::transform(n_rows = %u, dim = %u)", n_rows, dataset.extent(1)); - rmm::device_async_resource_ref mr = raft::resource::get_workspace_resource_ref(res); + rmm::device_async_resource mr = raft::resource::get_workspace_resource(res); // The cluster centers in the index are stored padded, which is not acceptable by // the kmeans_balanced::predict. Thus, we need the restructuring raft::copy. @@ -137,8 +137,8 @@ void transform(raft::resources const& res, } } - constexpr size_t max_batch_size = 65536; - rmm::device_async_resource_ref device_memory = raft::resource::get_workspace_resource_ref(res); + constexpr size_t max_batch_size = 65536; + rmm::device_async_resource device_memory = raft::resource::get_workspace_resource(res); auto vec_batches = utils::make_batch_load_iterator(res, dataset.data_handle(), diff --git a/cpp/src/neighbors/scann/detail/scann_avq.cuh b/cpp/src/neighbors/scann/detail/scann_avq.cuh index 0e138ce1a3..e4a2042991 100644 --- a/cpp/src/neighbors/scann/detail/scann_avq.cuh +++ b/cpp/src/neighbors/scann/detail/scann_avq.cuh @@ -57,9 +57,8 @@ void compute_cluster_offsets(raft::resources const& dev_resources, raft::device_vector_view cluster_sizes, int64_t& max_cluster_size) { - cudaStream_t stream = raft::resource::get_cuda_stream(dev_resources); - rmm::device_async_resource_ref device_memory = - raft::resource::get_workspace_resource_ref(dev_resources); + cudaStream_t stream = raft::resource::get_cuda_stream(dev_resources); + rmm::device_async_resource device_memory = raft::resource::get_workspace_resource(dev_resources); // Histrogram to compute cluster sizes int num_levels = cluster_sizes.extent(0) + 1; @@ -136,9 +135,8 @@ void sum_reduce_vector(raft::resources const& dev_resources, raft::device_vector_view v, raft::device_scalar_view s) { - cudaStream_t stream = raft::resource::get_cuda_stream(dev_resources); - rmm::device_async_resource_ref device_memory = - raft::resource::get_workspace_resource_ref(dev_resources); + cudaStream_t stream = raft::resource::get_cuda_stream(dev_resources); + rmm::device_async_resource device_memory = raft::resource::get_workspace_resource(dev_resources); size_t temp_storage_bytes = 0; @@ -163,10 +161,9 @@ void cholesky_solver(raft::resources const& dev_resources, raft::device_vector_view b, raft::device_vector_view x) { - cudaStream_t stream = raft::resource::get_cuda_stream(dev_resources); - cusolverDnHandle_t cusolverH = raft::resource::get_cusolver_dn_handle(dev_resources); - rmm::device_async_resource_ref device_memory = - raft::resource::get_workspace_resource_ref(dev_resources); + cudaStream_t stream = raft::resource::get_cuda_stream(dev_resources); + cusolverDnHandle_t cusolverH = raft::resource::get_cusolver_dn_handle(dev_resources); + rmm::device_async_resource device_memory = raft::resource::get_workspace_resource(dev_resources); // RAFT_CUSOLVER_TRY(cusolverDnSetStream(cusolverH, stream)); diff --git a/cpp/src/neighbors/scann/detail/scann_build.cuh b/cpp/src/neighbors/scann/detail/scann_build.cuh index c01e50bc83..41d0e2cc95 100644 --- a/cpp/src/neighbors/scann/detail/scann_build.cuh +++ b/cpp/src/neighbors/scann/detail/scann_build.cuh @@ -87,7 +87,7 @@ index build( raft::device_vector_view labels_view = idx.labels(); // setup batching for kmeans prediction + quantization - auto device_memory = raft::resource::get_workspace_resource_ref(res); + auto device_memory = raft::resource::get_workspace_resource(res); constexpr size_t kReasonableMaxBatchSize = 65536; size_t max_batch_size = std::min(dataset.extent(0), kReasonableMaxBatchSize); diff --git a/cpp/src/preprocessing/quantize/detail/binary.cuh b/cpp/src/preprocessing/quantize/detail/binary.cuh index 6695b8b126..fe8288a7cb 100644 --- a/cpp/src/preprocessing/quantize/detail/binary.cuh +++ b/cpp/src/preprocessing/quantize/detail/binary.cuh @@ -146,7 +146,7 @@ void mean_f16_in_f32(raft::resources const& res, const size_t dataset_size, cudaStream_t cuda_stream) { - auto mr = raft::resource::get_workspace_resource_ref(res); + auto mr = raft::resource::get_workspace_resource(res); auto f32_result_vec = raft::make_device_mdarray(res, mr, raft::make_extents(dataset_dim)); raft::matrix::fill(res, f32_result_vec.view(), float(0)); @@ -212,7 +212,7 @@ auto train(raft::resources const& res, static_cast(dataset_dim)); raft::random::RngState rng(29837lu); - auto mr = raft::resource::get_workspace_resource_ref(res); + auto mr = raft::resource::get_workspace_resource(res); auto sampled_dataset_chunk = raft::make_device_mdarray( res, mr, raft::make_extents(num_samples, max_dim_chunk)); auto transposed_sampled_dataset_chunk = raft::make_device_mdarray( @@ -331,7 +331,7 @@ auto train(raft::resources const& res, raft::make_host_vector_view(host_threshold_vec.data(), (int64_t)dataset_dim)); } else { - auto mr = raft::resource::get_workspace_resource_ref(res); + auto mr = raft::resource::get_workspace_resource(res); auto casted_vec = raft::make_device_mdarray( res, mr, raft::make_extents(dataset_dim)); raft::copy(res, @@ -425,7 +425,7 @@ void transform(raft::resources const& res, raft::make_device_vector_view(quantizer.threshold.data_handle(), (int64_t)dataset_dim)); } else { - auto mr = raft::resource::get_workspace_resource_ref(res); + auto mr = raft::resource::get_workspace_resource(res); auto casted_vec = raft::make_device_mdarray( res, mr, raft::make_extents(dataset_dim)); raft::linalg::map(res, diff --git a/cpp/src/preprocessing/quantize/detail/pq.cuh b/cpp/src/preprocessing/quantize/detail/pq.cuh index 5d77e2dd44..2f417e6281 100644 --- a/cpp/src/preprocessing/quantize/detail/pq.cuh +++ b/cpp/src/preprocessing/quantize/detail/pq.cuh @@ -119,7 +119,7 @@ auto train_pq_subspaces( auto trainset_ptr = !vq_centers.empty() ? pq_trainset.data_handle() : dataset.data_handle(); auto sub_labels = raft::make_device_vector(res, 0); auto pq_cluster_sizes = raft::make_device_vector(res, 0); - auto device_memory = raft::resource::get_workspace_resource_ref(res); + auto device_memory = raft::resource::get_workspace_resource(res); if (is_balanced_kmeans(params)) { sub_labels = raft::make_device_mdarray( res, device_memory, raft::make_extents(n_rows_train)); diff --git a/cpp/tests/neighbors/ann_cagra/test_batch_load_iterator.cu b/cpp/tests/neighbors/ann_cagra/test_batch_load_iterator.cu index c0df2b8826..0f78805b5e 100644 --- a/cpp/tests/neighbors/ann_cagra/test_batch_load_iterator.cu +++ b/cpp/tests/neighbors/ann_cagra/test_batch_load_iterator.cu @@ -85,7 +85,7 @@ class BatchLoadIteratorTest : public ::testing::Test { int64_t total_processed = 0; auto [copy_stream, enable_prefetch] = bli::get_prefetch_stream(res); - auto workspace_mr = raft::resource::get_workspace_resource_ref(res); + auto workspace_mr = raft::resource::get_workspace_resource(res); { bli::batch_load_iterator iter(res, @@ -154,7 +154,7 @@ TEST_F(BatchLoadIteratorTest, EmptyViewFromHost) auto host_view = host_empty.view(); auto [copy_stream, enable_prefetch] = bli::get_prefetch_stream(res); - auto workspace_mr = raft::resource::get_workspace_resource_ref(res); + auto workspace_mr = raft::resource::get_workspace_resource(res); bli::batch_load_iterator> iter( res, host_view, /*batch_size=*/128, copy_stream, workspace_mr, enable_prefetch); @@ -167,7 +167,7 @@ TEST_F(BatchLoadIteratorTest, EmptyViewFromDevice) auto device_view = device_empty.view(); auto [copy_stream, enable_prefetch] = bli::get_prefetch_stream(res); - auto workspace_mr = raft::resource::get_workspace_resource_ref(res); + auto workspace_mr = raft::resource::get_workspace_resource(res); bli::batch_load_iterator> iter( res, device_view, /*batch_size=*/128, copy_stream, workspace_mr, enable_prefetch); @@ -290,7 +290,7 @@ TEST_F(BatchLoadIteratorTest, MakeBatchLoadIteratorHostPtr) n_cols, batch_size_rows, copy_stream, - raft::resource::get_workspace_resource_ref(res), + raft::resource::get_workspace_resource(res), enable_prefetch); EXPECT_TRUE(iter.does_copy()); @@ -329,7 +329,7 @@ TEST_F(BatchLoadIteratorTest, MakeBatchLoadIteratorDevicePtr) n_cols, batch_size_rows, copy_stream, - raft::resource::get_workspace_resource_ref(res), + raft::resource::get_workspace_resource(res), enable_prefetch); EXPECT_FALSE(iter.does_copy()); diff --git a/cpp/tests/neighbors/naive_knn.cuh b/cpp/tests/neighbors/naive_knn.cuh index d21e982902..3ce7a51b06 100644 --- a/cpp/tests/neighbors/naive_knn.cuh +++ b/cpp/tests/neighbors/naive_knn.cuh @@ -14,7 +14,7 @@ #include #include #include -#include +#include namespace cuvs::neighbors { @@ -104,7 +104,7 @@ void naive_knn(raft::resources const& handle, uint32_t k, cuvs::distance::DistanceType type) { - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource_ref(); + rmm::device_async_resource mr = rmm::mr::get_current_device_resource(); auto stream = raft::resource::get_cuda_stream(handle); dim3 block_dim(16, 32, 1); diff --git a/docs/source/api_basics.rst b/docs/source/api_basics.rst index 5ffb1da630..8c0aadb09a 100644 --- a/docs/source/api_basics.rst +++ b/docs/source/api_basics.rst @@ -24,7 +24,7 @@ Here's an example of configuring RMM to use a pool allocator in C++ (derived fro auto initial_size = rmm::percent_of_free_device_memory(50); rmm::mr::pool_memory_resource pool_mr{cuda_mr, initial_size}; rmm::mr::set_current_device_resource(pool_mr); - auto mr = rmm::mr::get_current_device_resource_ref(); + auto mr = rmm::mr::get_current_device_resource(); Python ^^^^^^ diff --git a/examples/cpp/src/cagra_example.cu b/examples/cpp/src/cagra_example.cu index 856030c520..85be6e4035 100644 --- a/examples/cpp/src/cagra_example.cu +++ b/examples/cpp/src/cagra_example.cu @@ -53,7 +53,7 @@ int main() raft::device_resources dev_resources; // Set pool memory resource with 1 GiB initial pool size. All allocations use the same pool. - rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource_ref(), + rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource(), 1024 * 1024 * 1024ull); rmm::mr::set_current_device_resource(pool_mr); diff --git a/examples/cpp/src/cagra_hnsw_ace_example.cu b/examples/cpp/src/cagra_hnsw_ace_example.cu index d1bde25ad6..79f08a7098 100644 --- a/examples/cpp/src/cagra_hnsw_ace_example.cu +++ b/examples/cpp/src/cagra_hnsw_ace_example.cu @@ -156,7 +156,7 @@ int main() raft::device_resources dev_resources; // Set pool memory resource with 1 GiB initial pool size. All allocations use the same pool. - rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource_ref(), + rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource(), 1024 * 1024 * 1024ull); rmm::mr::set_current_device_resource(pool_mr); diff --git a/examples/cpp/src/cagra_persistent_example.cu b/examples/cpp/src/cagra_persistent_example.cu index ded3a287b2..a413858a9c 100644 --- a/examples/cpp/src/cagra_persistent_example.cu +++ b/examples/cpp/src/cagra_persistent_example.cu @@ -260,7 +260,7 @@ int main() // This is important because we run the async loop with a very large number of jobs, // which would otherwise swamp a normal pool memory resource. // (the non-persistent implementation would hang forever). - rmm::mr::arena_memory_resource mr(rmm::mr::get_current_device_resource_ref(), mem_size); + rmm::mr::arena_memory_resource mr(rmm::mr::get_current_device_resource(), mem_size); rmm::mr::set_current_device_resource(mr); std::cout << "GPU Arena memory resource size: " << mem_size / (1024ll * 1024ll) << " MiB" << std::endl; diff --git a/examples/cpp/src/hnsw_ace_example.cu b/examples/cpp/src/hnsw_ace_example.cu index 22354193e5..6ede6b50ea 100644 --- a/examples/cpp/src/hnsw_ace_example.cu +++ b/examples/cpp/src/hnsw_ace_example.cu @@ -138,7 +138,7 @@ int main() raft::device_resources dev_resources; // Set pool memory resource with 1 GiB initial pool size. All allocations use the same pool. - rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource_ref(), + rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource(), 1024 * 1024 * 1024ull); rmm::mr::set_current_device_resource(pool_mr); diff --git a/examples/cpp/src/ivf_flat_example.cu b/examples/cpp/src/ivf_flat_example.cu index 163e3b4719..ed9b04e6f1 100644 --- a/examples/cpp/src/ivf_flat_example.cu +++ b/examples/cpp/src/ivf_flat_example.cu @@ -112,7 +112,7 @@ int main() raft::device_resources dev_resources; // Set pool memory resource with 1 GiB initial pool size. All allocations use the same pool. - rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource_ref(), + rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource(), 1024 * 1024 * 1024ull); rmm::mr::set_current_device_resource(pool_mr); diff --git a/examples/cpp/src/ivf_pq_example.cu b/examples/cpp/src/ivf_pq_example.cu index f27507489e..f1b4e8af7c 100644 --- a/examples/cpp/src/ivf_pq_example.cu +++ b/examples/cpp/src/ivf_pq_example.cu @@ -78,7 +78,7 @@ int main() raft::device_resources dev_resources; // Set pool memory resource with 1 GiB initial pool size. All allocations use the same pool. - rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource_ref(), + rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource(), 1024 * 1024 * 1024ull); rmm::mr::set_current_device_resource(pool_mr); diff --git a/examples/cpp/src/scann_example.cu b/examples/cpp/src/scann_example.cu index 70aff11b44..a857fd9ff4 100644 --- a/examples/cpp/src/scann_example.cu +++ b/examples/cpp/src/scann_example.cu @@ -54,7 +54,7 @@ int main(int argc, char* argv[]) // Set pool memory resource with 1 GiB initial pool size. All allocations use // the same pool. - rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource_ref(), + rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource(), 1024 * 1024 * 1024ull); rmm::mr::set_current_device_resource(pool_mr); diff --git a/examples/cpp/src/vamana_example.cu b/examples/cpp/src/vamana_example.cu index b43eb20b1d..c23a49e735 100644 --- a/examples/cpp/src/vamana_example.cu +++ b/examples/cpp/src/vamana_example.cu @@ -86,7 +86,7 @@ int main(int argc, char* argv[]) // Set pool memory resource with 1 GiB initial pool size. All allocations use // the same pool. - rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource_ref(), + rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource(), 1024 * 1024 * 1024ull); rmm::mr::set_current_device_resource(pool_mr); From ce1cdc1ddb4da4fde2fdd1e47bf07f062d89a5eb Mon Sep 17 00:00:00 2001 From: tarang-jain Date: Wed, 20 May 2026 08:38:30 -0700 Subject: [PATCH 2/4] revert references to resource_ref --- c/src/core/c_api.cpp | 2 +- .../src/common/cuda_huge_page_resource.hpp | 2 +- cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h | 4 +-- cpp/bench/ann/src/cuvs/cuvs_cagra_diskann.cu | 4 +-- cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu | 4 +-- cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h | 4 +-- cpp/bench/ann/src/cuvs/cuvs_vamana.cu | 4 +-- .../cuvs_internal/neighbors/naive_knn.cuh | 4 +-- cpp/src/cluster/detail/kmeans_balanced.cuh | 26 +++++++++---------- cpp/src/distance/detail/masked_nn.cuh | 4 +-- cpp/src/neighbors/detail/ann_utils.cuh | 12 ++++----- .../neighbors/detail/cagra/cagra_build.cuh | 8 +++--- .../neighbors/detail/cagra/cagra_merge.cuh | 2 +- .../neighbors/detail/cagra/search_plan.cuh | 4 +-- cpp/src/neighbors/detail/cagra/utils.hpp | 4 +-- .../neighbors/detail/vamana/greedy_search.cuh | 2 +- cpp/src/neighbors/ivf_common.cu | 2 +- cpp/src/neighbors/ivf_common.cuh | 2 +- .../neighbors/ivf_flat/ivf_flat_search.cuh | 4 +-- cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh | 19 +++++++------- cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh | 6 ++--- cpp/src/neighbors/ivf_pq/ivf_pq_transform.cuh | 10 +++---- cpp/src/neighbors/scann/detail/scann_avq.cuh | 17 +++++++----- cpp/tests/neighbors/naive_knn.cuh | 4 +-- 24 files changed, 79 insertions(+), 75 deletions(-) diff --git a/c/src/core/c_api.cpp b/c/src/core/c_api.cpp index 3847875aa4..9e7aefe93c 100644 --- a/c/src/core/c_api.cpp +++ b/c/src/core/c_api.cpp @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include "../core/exceptions.hpp" diff --git a/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp b/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp index aae71605a5..e5555132bb 100644 --- a/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp +++ b/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp @@ -8,7 +8,7 @@ #include #include -#include +#include #include diff --git a/cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h b/cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h index f818a1e208..99a0ae6f76 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h +++ b/cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include @@ -98,7 +98,7 @@ class shared_raft_resources { ~shared_raft_resources() noexcept { rmm::mr::set_current_device_resource(orig_resource_); } - auto get_large_memory_resource() noexcept -> rmm::device_async_resource { return large_mr_; } + auto get_large_memory_resource() noexcept -> rmm::device_async_resource_ref { return large_mr_; } private: cuda::mr::any_resource orig_resource_; diff --git a/cpp/bench/ann/src/cuvs/cuvs_cagra_diskann.cu b/cpp/bench/ann/src/cuvs/cuvs_cagra_diskann.cu index e67adf9005..1521333c5e 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_cagra_diskann.cu +++ b/cpp/bench/ann/src/cuvs/cuvs_cagra_diskann.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -9,7 +9,7 @@ #include #include -#include +#include namespace cuvs::bench { diff --git a/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu b/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu index 9645e5df63..26028b6d98 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu +++ b/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -9,7 +9,7 @@ #include #include -#include +#include namespace cuvs::bench { diff --git a/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h index d872fd6aab..022d8a0934 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h +++ b/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h @@ -25,7 +25,7 @@ #include #include -#include +#include #include #include @@ -190,7 +190,7 @@ class cuvs_cagra : public algo, public algo_gpu { std::shared_ptr filter_; std::vector>> sub_indices_; - inline rmm::device_async_resource get_mr(AllocatorType mem_type) + inline rmm::device_async_resource_ref get_mr(AllocatorType mem_type) { switch (mem_type) { case (AllocatorType::kHostPinned): return mr_pinned_; diff --git a/cpp/bench/ann/src/cuvs/cuvs_vamana.cu b/cpp/bench/ann/src/cuvs/cuvs_vamana.cu index 14eb64b55a..185095d5b4 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_vamana.cu +++ b/cpp/bench/ann/src/cuvs/cuvs_vamana.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -8,7 +8,7 @@ #include #include -#include +#include namespace cuvs::bench { diff --git a/cpp/internal/cuvs_internal/neighbors/naive_knn.cuh b/cpp/internal/cuvs_internal/neighbors/naive_knn.cuh index 9c893587dc..c9f39b82bb 100644 --- a/cpp/internal/cuvs_internal/neighbors/naive_knn.cuh +++ b/cpp/internal/cuvs_internal/neighbors/naive_knn.cuh @@ -14,7 +14,7 @@ #include #include #include -#include +#include namespace cuvs::neighbors { @@ -88,7 +88,7 @@ void naive_knn(raft::resources const& handle, uint32_t k, cuvs::distance::DistanceType type) { - rmm::device_async_resource mr = rmm::mr::get_current_device_resource(); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource(); auto stream = raft::resource::get_cuda_stream(handle); dim3 block_dim(16, 32, 1); diff --git a/cpp/src/cluster/detail/kmeans_balanced.cuh b/cpp/src/cluster/detail/kmeans_balanced.cuh index cd5c6f1aa0..7003587e7a 100644 --- a/cpp/src/cluster/detail/kmeans_balanced.cuh +++ b/cpp/src/cluster/detail/kmeans_balanced.cuh @@ -36,7 +36,7 @@ #include #include #include -#include +#include #include #include @@ -83,7 +83,7 @@ inline std::enable_if_t> predict_core( const MathT* dataset_norm, IdxT n_rows, LabelT* labels, - rmm::device_async_resource mr) + rmm::device_async_resource_ref mr) { auto stream = raft::resource::get_cuda_stream(handle); switch (params.metric) { @@ -253,7 +253,7 @@ void calc_centers_and_sizes(const raft::resources& handle, const LabelT* labels, bool reset_counters, MappingOpT mapping_op, - rmm::device_async_resource mr) + rmm::device_async_resource_ref mr) { auto stream = raft::resource::get_cuda_stream(handle); @@ -316,7 +316,7 @@ void compute_norm(const raft::resources& handle, IdxT n_rows, MappingOpT mapping_op, FinOpT norm_fin_op, - std::optional mr = std::nullopt) + std::optional mr = std::nullopt) { raft::common::nvtx::range fun_scope("compute_norm"); auto stream = raft::resource::get_cuda_stream(handle); @@ -377,8 +377,8 @@ void predict(const raft::resources& handle, IdxT n_rows, LabelT* labels, MappingOpT mapping_op, - std::optional mr = std::nullopt, - const MathT* dataset_norm = nullptr) + std::optional mr = std::nullopt, + const MathT* dataset_norm = nullptr) { auto stream = raft::resource::get_cuda_stream(handle); raft::common::nvtx::range fun_scope( @@ -551,7 +551,7 @@ auto adjust_centers(MathT* centers, MathT threshold, MappingOpT mapping_op, rmm::cuda_stream_view stream, - rmm::device_async_resource device_memory) -> bool + rmm::device_async_resource_ref device_memory) -> bool { raft::common::nvtx::range fun_scope( "adjust_centers(%zu, %u)", static_cast(n_rows), n_clusters); @@ -649,7 +649,7 @@ void balancing_em_iters(const raft::resources& handle, uint32_t balancing_pullback, MathT balancing_threshold, MappingOpT mapping_op, - rmm::device_async_resource device_memory) + rmm::device_async_resource_ref device_memory) { auto stream = raft::resource::get_cuda_stream(handle); uint32_t balancing_counter = balancing_pullback; @@ -732,7 +732,7 @@ void build_clusters(const raft::resources& handle, LabelT* cluster_labels, CounterT* cluster_sizes, MappingOpT mapping_op, - rmm::device_async_resource device_memory, + rmm::device_async_resource_ref device_memory, const MathT* dataset_norm = nullptr) { auto stream = raft::resource::get_cuda_stream(handle); @@ -874,8 +874,8 @@ auto build_fine_clusters(const raft::resources& handle, IdxT fine_clusters_nums_max, MathT* cluster_centers, MappingOpT mapping_op, - rmm::device_async_resource managed_memory, - rmm::device_async_resource device_memory) -> IdxT + rmm::device_async_resource_ref managed_memory, + rmm::device_async_resource_ref device_memory) -> IdxT { auto stream = raft::resource::get_cuda_stream(handle); rmm::device_uvector mc_trainset_ids_buf(mesocluster_size_max, stream, managed_memory); @@ -995,8 +995,8 @@ void build_hierarchical(const raft::resources& handle, // TODO: Remove the explicit managed memory- we shouldn't be creating this on the user's behalf. rmm::mr::managed_memory_resource managed_memory; - rmm::device_async_resource device_memory = raft::resource::get_workspace_resource(handle); - auto [max_minibatch_size, mem_per_row] = calc_minibatch_size( + rmm::device_async_resource_ref device_memory = raft::resource::get_workspace_resource(handle); + auto [max_minibatch_size, mem_per_row] = calc_minibatch_size( handle, n_clusters, n_rows, dim, params.metric, std::is_same_v); // Precompute the L2 norm of the dataset if relevant and not yet computed. diff --git a/cpp/src/distance/detail/masked_nn.cuh b/cpp/src/distance/detail/masked_nn.cuh index 4c2ac0a993..5381b433ec 100644 --- a/cpp/src/distance/detail/masked_nn.cuh +++ b/cpp/src/distance/detail/masked_nn.cuh @@ -246,8 +246,8 @@ void masked_l2_nn_impl(raft::resources const& handle, static_assert(P::Mblk == 64, "masked_l2_nn_impl only supports a policy with 64 rows per block."); // Get stream and workspace memory resource - rmm::device_async_resource ws_mr = raft::resource::get_workspace_resource(handle); - auto stream = raft::resource::get_cuda_stream(handle); + rmm::device_async_resource_ref ws_mr = raft::resource::get_workspace_resource(handle); + auto stream = raft::resource::get_cuda_stream(handle); // Acquire temporary buffers and initialize to zero: // 1) Adjacency matrix bitfield diff --git a/cpp/src/neighbors/detail/ann_utils.cuh b/cpp/src/neighbors/detail/ann_utils.cuh index 10aee6dee0..dda5dd8bc2 100644 --- a/cpp/src/neighbors/detail/ann_utils.cuh +++ b/cpp/src/neighbors/detail/ann_utils.cuh @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include @@ -593,7 +593,7 @@ struct batch_load_iterator { MdspanT input_view, size_type batch_size, rmm::cuda_stream_view copy_stream, - rmm::device_async_resource mr, + rmm::device_async_resource_ref mr, bool prefetch, bool initialize, bool host_writeback) @@ -853,7 +853,7 @@ struct batch_load_iterator { MdspanT input_view, size_type batch_size, rmm::cuda_stream_view copy_stream, - rmm::device_async_resource mr, + rmm::device_async_resource_ref mr, bool prefetch = false, bool initialize = true, bool host_writeback = false) @@ -1018,7 +1018,7 @@ class batch_load_iterator_dyn { IdxT row_width, size_type batch_size, rmm::cuda_stream_view copy_stream, - rmm::device_async_resource mr, + rmm::device_async_resource_ref mr, bool prefetch = false, bool initialize = true, bool host_writeback = false) @@ -1149,7 +1149,7 @@ class batch_load_iterator_dyn { IdxT row_width, size_type batch_size, rmm::cuda_stream_view copy_stream, - rmm::device_async_resource mr, + rmm::device_async_resource_ref mr, bool prefetch, bool initialize, bool host_writeback) -> std::variant @@ -1212,7 +1212,7 @@ auto make_batch_load_iterator(raft::resources const& res, detail::type_identity_t row_width, size_t batch_size, rmm::cuda_stream_view copy_stream, - rmm::device_async_resource mr, + rmm::device_async_resource_ref mr, bool prefetch = false, bool initialize = true, bool host_writeback = false) -> batch_load_iterator_dyn diff --git a/cpp/src/neighbors/detail/cagra/cagra_build.cuh b/cpp/src/neighbors/detail/cagra/cagra_build.cuh index f5221724ea..5ee9a0a59c 100644 --- a/cpp/src/neighbors/detail/cagra/cagra_build.cuh +++ b/cpp/src/neighbors/detail/cagra/cagra_build.cuh @@ -31,7 +31,7 @@ // TODO: This shouldn't be calling spatial/knn APIs #include "../ann_utils.cuh" -#include +#include #include #include @@ -1700,9 +1700,9 @@ void build_knn_graph( } // If the workspace is smaller than desired, put the I/O buffers into the large workspace. - rmm::device_async_resource workspace_mr = use_large_workspace - ? raft::resource::get_large_workspace_resource(res) - : raft::resource::get_workspace_resource(res); + rmm::device_async_resource_ref workspace_mr = + use_large_workspace ? raft::resource::get_large_workspace_resource(res) + : raft::resource::get_workspace_resource(res); RAFT_LOG_DEBUG( "IVF-PQ search node_degree: %d, top_k: %d, gpu_top_k: %d, max_batch_size:: %d, n_probes: %u", diff --git a/cpp/src/neighbors/detail/cagra/cagra_merge.cuh b/cpp/src/neighbors/detail/cagra/cagra_merge.cuh index fcce6f691c..1dd4cbe075 100644 --- a/cpp/src/neighbors/detail/cagra/cagra_merge.cuh +++ b/cpp/src/neighbors/detail/cagra/cagra_merge.cuh @@ -21,7 +21,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/src/neighbors/detail/cagra/search_plan.cuh b/cpp/src/neighbors/detail/cagra/search_plan.cuh index f7a5d15a6d..03b2962e4c 100644 --- a/cpp/src/neighbors/detail/cagra/search_plan.cuh +++ b/cpp/src/neighbors/detail/cagra/search_plan.cuh @@ -38,8 +38,8 @@ namespace cuvs::neighbors::cagra::detail { template struct lightweight_uvector { private: - using raft_res_type = const raft::resources*; - using rmm_res_type = std::tuple; + using raft_res_type = const raft::resources*; + using rmm_res_type = std::tuple; static constexpr size_t kAlign = 256; std::variant res_; diff --git a/cpp/src/neighbors/detail/cagra/utils.hpp b/cpp/src/neighbors/detail/cagra/utils.hpp index 16e84a5d95..edb325ca51 100644 --- a/cpp/src/neighbors/detail/cagra/utils.hpp +++ b/cpp/src/neighbors/detail/cagra/utils.hpp @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include @@ -276,7 +276,7 @@ void copy_with_padding( raft::resources const& res, raft::device_matrix& dst, raft::mdspan, raft::row_major, data_accessor> src, - rmm::device_async_resource mr = rmm::mr::get_current_device_resource()) + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) { size_t padded_dim = raft::round_up_safe(src.extent(1) * sizeof(T), 16) / sizeof(T); diff --git a/cpp/src/neighbors/detail/vamana/greedy_search.cuh b/cpp/src/neighbors/detail/vamana/greedy_search.cuh index 4bdbd4695f..4e71c1189c 100644 --- a/cpp/src/neighbors/detail/vamana/greedy_search.cuh +++ b/cpp/src/neighbors/detail/vamana/greedy_search.cuh @@ -14,7 +14,7 @@ #include #include -#include +#include #include #include diff --git a/cpp/src/neighbors/ivf_common.cu b/cpp/src/neighbors/ivf_common.cu index a7a1a2823a..b87a14f7c3 100644 --- a/cpp/src/neighbors/ivf_common.cu +++ b/cpp/src/neighbors/ivf_common.cu @@ -79,7 +79,7 @@ void sort_cluster_sizes_descending(uint32_t* input, uint32_t* output, uint32_t n_lists, rmm::cuda_stream_view stream, - rmm::device_async_resource tmp_res) + rmm::device_async_resource_ref tmp_res) { int begin_bit = 0; int end_bit = sizeof(uint32_t) * 8; diff --git a/cpp/src/neighbors/ivf_common.cuh b/cpp/src/neighbors/ivf_common.cuh index b8fb54ca60..303098176c 100644 --- a/cpp/src/neighbors/ivf_common.cuh +++ b/cpp/src/neighbors/ivf_common.cuh @@ -20,7 +20,7 @@ void sort_cluster_sizes_descending(uint32_t* input, uint32_t* output, uint32_t n_lists, rmm::cuda_stream_view stream, - rmm::device_async_resource tmp_res); + rmm::device_async_resource_ref tmp_res); /** * Default value returned by `search` when the `n_probes` is too small and top-k is too large. diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh index 6831c77d8c..f42ffdc837 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh @@ -24,7 +24,7 @@ #include // raft::linalg::norm #include -#include +#include #include @@ -51,7 +51,7 @@ void search_impl(raft::resources const& handle, bool select_min, IdxT* neighbors, AccT* distances, - rmm::device_async_resource search_mr, + rmm::device_async_resource_ref search_mr, IvfSampleFilterT sample_filter) { auto stream = raft::resource::get_cuda_stream(handle); diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh b/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh index a668fa3a4b..f0a878f2ec 100644 --- a/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh +++ b/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh @@ -91,7 +91,7 @@ void select_residuals(raft::resources const& handle, const float* center, // [dim] const T* dataset, // [.., dim] const IdxT* row_ids, // [n_rows] - rmm::device_async_resource device_memory + rmm::device_async_resource_ref device_memory ) { @@ -147,7 +147,7 @@ void flat_compute_residuals( raft::device_matrix_view centers, // [n_lists, dim_ext] const T* dataset, // [n_rows, dim] std::variant labels, // [n_rows] - rmm::device_async_resource device_memory, + rmm::device_async_resource_ref device_memory, cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Expanded) { auto stream = raft::resource::get_cuda_stream(handle); @@ -805,7 +805,7 @@ void process_and_fill_codes(raft::resources const& handle, std::variant src_offset_or_indices, const uint32_t* new_labels, IdxT n_rows, - rmm::device_async_resource mr) + rmm::device_async_resource_ref mr) { auto new_vectors_residual = raft::make_device_mdarray(handle, mr, raft::make_extents(n_rows, index.rot_dim())); @@ -997,8 +997,9 @@ void extend(raft::resources const& handle, std::is_same_v, "Unsupported data type"); - rmm::device_async_resource device_memory = raft::resource::get_workspace_resource(handle); - rmm::device_async_resource large_memory = raft::resource::get_large_workspace_resource(handle); + rmm::device_async_resource_ref device_memory = raft::resource::get_workspace_resource(handle); + rmm::device_async_resource_ref large_memory = + raft::resource::get_large_workspace_resource(handle); // Try to allocate an index with the same parameters and the projected new size // (which can be slightly larger than index->size() + n_rows, due to padding for interleaved). @@ -1027,8 +1028,8 @@ void extend(raft::resources const& handle, // `large_workspace_resource`, which does not have the explicit allocation limit. The user may opt // to populate the `large_workspace_resource` memory resource with managed memory for easier // scaling. - rmm::device_async_resource labels_mr = device_memory; - rmm::device_async_resource batches_mr = device_memory; + rmm::device_async_resource_ref labels_mr = device_memory; + rmm::device_async_resource_ref batches_mr = device_memory; if (n_rows * (index->dim() * sizeof(T) + index->pq_dim() + sizeof(IdxT) + sizeof(uint32_t)) > free_mem) { labels_mr = large_memory; @@ -1268,12 +1269,12 @@ auto build(raft::resources const& handle, size_t(n_rows) / std::max(params.kmeans_trainset_fraction * n_rows, impl->n_lists())); size_t n_rows_train = n_rows / trainset_ratio; - rmm::device_async_resource device_memory = raft::resource::get_workspace_resource(handle); + rmm::device_async_resource_ref device_memory = raft::resource::get_workspace_resource(handle); // If the trainset is small enough to comfortably fit into device memory, put it there. // Otherwise, use the managed memory. constexpr size_t kTolerableRatio = 4; - rmm::device_async_resource big_memory_resource = + rmm::device_async_resource_ref big_memory_resource = raft::resource::get_large_workspace_resource(handle); if (sizeof(float) * n_rows_train * impl->dim() * kTolerableRatio < raft::resource::get_workspace_free_bytes(handle)) { diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh b/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh index 7a5fd198f9..5799c4bbb4 100644 --- a/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh +++ b/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh @@ -69,7 +69,7 @@ void select_clusters(raft::resources const& handle, cuvs::distance::DistanceType metric, const T* queries, // [n_queries, dim] const float* cluster_centers, // [n_lists, dim_ext] - rmm::device_async_resource mr) + rmm::device_async_resource_ref mr) { raft::common::nvtx::range fun_scope( "ivf_pq::search::select_clusters(n_probes = %u, n_queries = %u, n_lists = %u, dim = %u)", @@ -179,7 +179,7 @@ void select_clusters(raft::resources const& handle, cuvs::distance::DistanceType metric, const T* queries, // [n_queries, dim] const int8_t* cluster_centers, // [n_lists, dim_ext] - rmm::device_async_resource mr) + rmm::device_async_resource_ref mr) { raft::common::nvtx::range fun_scope( "ivf_pq::search::select_clusters(n_probes = %u, n_queries = %u, n_lists = %u, dim = %u)", @@ -267,7 +267,7 @@ void select_clusters(raft::resources const& handle, cuvs::distance::DistanceType metric, const T* queries, // [n_queries, dim] const half* cluster_centers, // [n_lists, dim_ext] - rmm::device_async_resource mr) + rmm::device_async_resource_ref mr) { raft::common::nvtx::range fun_scope( "ivf_pq::search::select_clusters(n_probes = %u, n_queries = %u, n_lists = %u, dim = %u)", diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_transform.cuh b/cpp/src/neighbors/ivf_pq/ivf_pq_transform.cuh index 07045b772b..9a8b5d7166 100644 --- a/cpp/src/neighbors/ivf_pq/ivf_pq_transform.cuh +++ b/cpp/src/neighbors/ivf_pq/ivf_pq_transform.cuh @@ -50,8 +50,8 @@ void transform_batch(raft::resources const& res, raft::device_vector_view output_labels, raft::device_matrix_view output_dataset) { - IdxT n_rows = dataset.extent(0); - rmm::device_async_resource mr = raft::resource::get_workspace_resource(res); + IdxT n_rows = dataset.extent(0); + rmm::device_async_resource_ref mr = raft::resource::get_workspace_resource(res); // Compute the labels for each vector cuvs::cluster::kmeans::balanced_params kmeans_params; @@ -115,7 +115,7 @@ void transform(raft::resources const& res, raft::common::nvtx::range fun_scope( "ivf_pq::transform(n_rows = %u, dim = %u)", n_rows, dataset.extent(1)); - rmm::device_async_resource mr = raft::resource::get_workspace_resource(res); + rmm::device_async_resource_ref mr = raft::resource::get_workspace_resource(res); // The cluster centers in the index are stored padded, which is not acceptable by // the kmeans_balanced::predict. Thus, we need the restructuring raft::copy. @@ -137,8 +137,8 @@ void transform(raft::resources const& res, } } - constexpr size_t max_batch_size = 65536; - rmm::device_async_resource device_memory = raft::resource::get_workspace_resource(res); + constexpr size_t max_batch_size = 65536; + rmm::device_async_resource_ref device_memory = raft::resource::get_workspace_resource(res); auto vec_batches = utils::make_batch_load_iterator(res, dataset.data_handle(), diff --git a/cpp/src/neighbors/scann/detail/scann_avq.cuh b/cpp/src/neighbors/scann/detail/scann_avq.cuh index e4a2042991..6c3bb045e4 100644 --- a/cpp/src/neighbors/scann/detail/scann_avq.cuh +++ b/cpp/src/neighbors/scann/detail/scann_avq.cuh @@ -57,8 +57,9 @@ void compute_cluster_offsets(raft::resources const& dev_resources, raft::device_vector_view cluster_sizes, int64_t& max_cluster_size) { - cudaStream_t stream = raft::resource::get_cuda_stream(dev_resources); - rmm::device_async_resource device_memory = raft::resource::get_workspace_resource(dev_resources); + cudaStream_t stream = raft::resource::get_cuda_stream(dev_resources); + rmm::device_async_resource_ref device_memory = + raft::resource::get_workspace_resource(dev_resources); // Histrogram to compute cluster sizes int num_levels = cluster_sizes.extent(0) + 1; @@ -135,8 +136,9 @@ void sum_reduce_vector(raft::resources const& dev_resources, raft::device_vector_view v, raft::device_scalar_view s) { - cudaStream_t stream = raft::resource::get_cuda_stream(dev_resources); - rmm::device_async_resource device_memory = raft::resource::get_workspace_resource(dev_resources); + cudaStream_t stream = raft::resource::get_cuda_stream(dev_resources); + rmm::device_async_resource_ref device_memory = + raft::resource::get_workspace_resource(dev_resources); size_t temp_storage_bytes = 0; @@ -161,9 +163,10 @@ void cholesky_solver(raft::resources const& dev_resources, raft::device_vector_view b, raft::device_vector_view x) { - cudaStream_t stream = raft::resource::get_cuda_stream(dev_resources); - cusolverDnHandle_t cusolverH = raft::resource::get_cusolver_dn_handle(dev_resources); - rmm::device_async_resource device_memory = raft::resource::get_workspace_resource(dev_resources); + cudaStream_t stream = raft::resource::get_cuda_stream(dev_resources); + cusolverDnHandle_t cusolverH = raft::resource::get_cusolver_dn_handle(dev_resources); + rmm::device_async_resource_ref device_memory = + raft::resource::get_workspace_resource(dev_resources); // RAFT_CUSOLVER_TRY(cusolverDnSetStream(cusolverH, stream)); diff --git a/cpp/tests/neighbors/naive_knn.cuh b/cpp/tests/neighbors/naive_knn.cuh index 3ce7a51b06..b8b4c791a7 100644 --- a/cpp/tests/neighbors/naive_knn.cuh +++ b/cpp/tests/neighbors/naive_knn.cuh @@ -14,7 +14,7 @@ #include #include #include -#include +#include namespace cuvs::neighbors { @@ -104,7 +104,7 @@ void naive_knn(raft::resources const& handle, uint32_t k, cuvs::distance::DistanceType type) { - rmm::device_async_resource mr = rmm::mr::get_current_device_resource(); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource(); auto stream = raft::resource::get_cuda_stream(handle); dim3 block_dim(16, 32, 1); From ed94de8bf0340272ea8fb68a1bd2120056fa8bc2 Mon Sep 17 00:00:00 2001 From: tarang-jain Date: Wed, 20 May 2026 08:43:31 -0700 Subject: [PATCH 3/4] fix compilation errors --- c/src/core/c_api.cpp | 4 ++-- cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h | 2 +- cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h | 2 +- cpp/internal/cuvs_internal/neighbors/naive_knn.cuh | 2 +- cpp/src/neighbors/detail/cagra/utils.hpp | 2 +- cpp/src/neighbors/detail/vpq_dataset.cuh | 2 +- cpp/tests/neighbors/naive_knn.cuh | 2 +- docs/source/api_basics.rst | 2 +- examples/cpp/src/cagra_example.cu | 2 +- examples/cpp/src/cagra_hnsw_ace_example.cu | 2 +- examples/cpp/src/cagra_persistent_example.cu | 2 +- examples/cpp/src/hnsw_ace_example.cu | 2 +- examples/cpp/src/ivf_flat_example.cu | 2 +- examples/cpp/src/ivf_pq_example.cu | 2 +- examples/cpp/src/scann_example.cu | 2 +- examples/cpp/src/vamana_example.cu | 2 +- 16 files changed, 17 insertions(+), 17 deletions(-) diff --git a/c/src/core/c_api.cpp b/c/src/core/c_api.cpp index 9e7aefe93c..f4e3664482 100644 --- a/c/src/core/c_api.cpp +++ b/c/src/core/c_api.cpp @@ -132,7 +132,7 @@ extern "C" cuvsError_t cuvsRMMAlloc(cuvsResources_t res, void** ptr, size_t byte { return cuvs::core::translate_exceptions([=] { auto res_ptr = reinterpret_cast(res); - auto mr = rmm::mr::get_current_device_resource(); + auto mr = rmm::mr::get_current_device_resource_ref(); *ptr = mr.allocate(raft::resource::get_cuda_stream(*res_ptr), bytes); }); } @@ -141,7 +141,7 @@ extern "C" cuvsError_t cuvsRMMFree(cuvsResources_t res, void* ptr, size_t bytes) { return cuvs::core::translate_exceptions([=] { auto res_ptr = reinterpret_cast(res); - auto mr = rmm::mr::get_current_device_resource(); + auto mr = rmm::mr::get_current_device_resource_ref(); mr.deallocate(raft::resource::get_cuda_stream(*res_ptr), ptr, bytes); }); } diff --git a/cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h b/cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h index 99a0ae6f76..1a276e8cc8 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h +++ b/cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h @@ -73,7 +73,7 @@ class shared_raft_resources { try : large_mr_() { orig_resource_ = rmm::mr::set_current_device_resource(rmm::mr::failure_callback_resource_adaptor<>{ - rmm::mr::pool_memory_resource{rmm::mr::get_current_device_resource(), + rmm::mr::pool_memory_resource{rmm::mr::get_current_device_resource_ref(), 1024 * 1024 * 1024ull}, rmm_oom_callback, nullptr}); diff --git a/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h index 022d8a0934..87111e4761 100644 --- a/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h +++ b/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h @@ -195,7 +195,7 @@ class cuvs_cagra : public algo, public algo_gpu { switch (mem_type) { case (AllocatorType::kHostPinned): return mr_pinned_; case (AllocatorType::kHostHugePage): return mr_huge_page_; - default: return rmm::mr::get_current_device_resource(); + default: return rmm::mr::get_current_device_resource_ref(); } } }; diff --git a/cpp/internal/cuvs_internal/neighbors/naive_knn.cuh b/cpp/internal/cuvs_internal/neighbors/naive_knn.cuh index c9f39b82bb..7bc37193a0 100644 --- a/cpp/internal/cuvs_internal/neighbors/naive_knn.cuh +++ b/cpp/internal/cuvs_internal/neighbors/naive_knn.cuh @@ -88,7 +88,7 @@ void naive_knn(raft::resources const& handle, uint32_t k, cuvs::distance::DistanceType type) { - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource(); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource_ref(); auto stream = raft::resource::get_cuda_stream(handle); dim3 block_dim(16, 32, 1); diff --git a/cpp/src/neighbors/detail/cagra/utils.hpp b/cpp/src/neighbors/detail/cagra/utils.hpp index edb325ca51..07417a3f09 100644 --- a/cpp/src/neighbors/detail/cagra/utils.hpp +++ b/cpp/src/neighbors/detail/cagra/utils.hpp @@ -276,7 +276,7 @@ void copy_with_padding( raft::resources const& res, raft::device_matrix& dst, raft::mdspan, raft::row_major, data_accessor> src, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource_ref()) { size_t padded_dim = raft::round_up_safe(src.extent(1) * sizeof(T), 16) / sizeof(T); diff --git a/cpp/src/neighbors/detail/vpq_dataset.cuh b/cpp/src/neighbors/detail/vpq_dataset.cuh index c1136db645..49b5e12455 100644 --- a/cpp/src/neighbors/detail/vpq_dataset.cuh +++ b/cpp/src/neighbors/detail/vpq_dataset.cuh @@ -513,7 +513,7 @@ void process_and_fill_codes( static_cast(dim), static_cast(max_batch_size), stream, - rmm::mr::get_current_device_resource()); + rmm::mr::get_current_device_resource_ref()); for (const auto& batch : _vpq_batches_codes) { auto batch_view = raft::make_device_matrix_view(batch.data(), ix_t(batch.size()), dim); auto batch_labels_view = raft::make_device_vector_view(nullptr, 0); diff --git a/cpp/tests/neighbors/naive_knn.cuh b/cpp/tests/neighbors/naive_knn.cuh index b8b4c791a7..d21e982902 100644 --- a/cpp/tests/neighbors/naive_knn.cuh +++ b/cpp/tests/neighbors/naive_knn.cuh @@ -104,7 +104,7 @@ void naive_knn(raft::resources const& handle, uint32_t k, cuvs::distance::DistanceType type) { - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource(); + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource_ref(); auto stream = raft::resource::get_cuda_stream(handle); dim3 block_dim(16, 32, 1); diff --git a/docs/source/api_basics.rst b/docs/source/api_basics.rst index 8c0aadb09a..5ffb1da630 100644 --- a/docs/source/api_basics.rst +++ b/docs/source/api_basics.rst @@ -24,7 +24,7 @@ Here's an example of configuring RMM to use a pool allocator in C++ (derived fro auto initial_size = rmm::percent_of_free_device_memory(50); rmm::mr::pool_memory_resource pool_mr{cuda_mr, initial_size}; rmm::mr::set_current_device_resource(pool_mr); - auto mr = rmm::mr::get_current_device_resource(); + auto mr = rmm::mr::get_current_device_resource_ref(); Python ^^^^^^ diff --git a/examples/cpp/src/cagra_example.cu b/examples/cpp/src/cagra_example.cu index 85be6e4035..856030c520 100644 --- a/examples/cpp/src/cagra_example.cu +++ b/examples/cpp/src/cagra_example.cu @@ -53,7 +53,7 @@ int main() raft::device_resources dev_resources; // Set pool memory resource with 1 GiB initial pool size. All allocations use the same pool. - rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource(), + rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource_ref(), 1024 * 1024 * 1024ull); rmm::mr::set_current_device_resource(pool_mr); diff --git a/examples/cpp/src/cagra_hnsw_ace_example.cu b/examples/cpp/src/cagra_hnsw_ace_example.cu index 79f08a7098..d1bde25ad6 100644 --- a/examples/cpp/src/cagra_hnsw_ace_example.cu +++ b/examples/cpp/src/cagra_hnsw_ace_example.cu @@ -156,7 +156,7 @@ int main() raft::device_resources dev_resources; // Set pool memory resource with 1 GiB initial pool size. All allocations use the same pool. - rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource(), + rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource_ref(), 1024 * 1024 * 1024ull); rmm::mr::set_current_device_resource(pool_mr); diff --git a/examples/cpp/src/cagra_persistent_example.cu b/examples/cpp/src/cagra_persistent_example.cu index a413858a9c..ded3a287b2 100644 --- a/examples/cpp/src/cagra_persistent_example.cu +++ b/examples/cpp/src/cagra_persistent_example.cu @@ -260,7 +260,7 @@ int main() // This is important because we run the async loop with a very large number of jobs, // which would otherwise swamp a normal pool memory resource. // (the non-persistent implementation would hang forever). - rmm::mr::arena_memory_resource mr(rmm::mr::get_current_device_resource(), mem_size); + rmm::mr::arena_memory_resource mr(rmm::mr::get_current_device_resource_ref(), mem_size); rmm::mr::set_current_device_resource(mr); std::cout << "GPU Arena memory resource size: " << mem_size / (1024ll * 1024ll) << " MiB" << std::endl; diff --git a/examples/cpp/src/hnsw_ace_example.cu b/examples/cpp/src/hnsw_ace_example.cu index 6ede6b50ea..22354193e5 100644 --- a/examples/cpp/src/hnsw_ace_example.cu +++ b/examples/cpp/src/hnsw_ace_example.cu @@ -138,7 +138,7 @@ int main() raft::device_resources dev_resources; // Set pool memory resource with 1 GiB initial pool size. All allocations use the same pool. - rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource(), + rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource_ref(), 1024 * 1024 * 1024ull); rmm::mr::set_current_device_resource(pool_mr); diff --git a/examples/cpp/src/ivf_flat_example.cu b/examples/cpp/src/ivf_flat_example.cu index ed9b04e6f1..163e3b4719 100644 --- a/examples/cpp/src/ivf_flat_example.cu +++ b/examples/cpp/src/ivf_flat_example.cu @@ -112,7 +112,7 @@ int main() raft::device_resources dev_resources; // Set pool memory resource with 1 GiB initial pool size. All allocations use the same pool. - rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource(), + rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource_ref(), 1024 * 1024 * 1024ull); rmm::mr::set_current_device_resource(pool_mr); diff --git a/examples/cpp/src/ivf_pq_example.cu b/examples/cpp/src/ivf_pq_example.cu index f1b4e8af7c..f27507489e 100644 --- a/examples/cpp/src/ivf_pq_example.cu +++ b/examples/cpp/src/ivf_pq_example.cu @@ -78,7 +78,7 @@ int main() raft::device_resources dev_resources; // Set pool memory resource with 1 GiB initial pool size. All allocations use the same pool. - rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource(), + rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource_ref(), 1024 * 1024 * 1024ull); rmm::mr::set_current_device_resource(pool_mr); diff --git a/examples/cpp/src/scann_example.cu b/examples/cpp/src/scann_example.cu index a857fd9ff4..70aff11b44 100644 --- a/examples/cpp/src/scann_example.cu +++ b/examples/cpp/src/scann_example.cu @@ -54,7 +54,7 @@ int main(int argc, char* argv[]) // Set pool memory resource with 1 GiB initial pool size. All allocations use // the same pool. - rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource(), + rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource_ref(), 1024 * 1024 * 1024ull); rmm::mr::set_current_device_resource(pool_mr); diff --git a/examples/cpp/src/vamana_example.cu b/examples/cpp/src/vamana_example.cu index c23a49e735..b43eb20b1d 100644 --- a/examples/cpp/src/vamana_example.cu +++ b/examples/cpp/src/vamana_example.cu @@ -86,7 +86,7 @@ int main(int argc, char* argv[]) // Set pool memory resource with 1 GiB initial pool size. All allocations use // the same pool. - rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource(), + rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource_ref(), 1024 * 1024 * 1024ull); rmm::mr::set_current_device_resource(pool_mr); From 668746ca53a4501846b1dbced80538d644af7f9f Mon Sep 17 00:00:00 2001 From: tarang-jain Date: Wed, 20 May 2026 08:50:35 -0700 Subject: [PATCH 4/4] revert workspace_resource_ref --- cpp/src/cluster/detail/kmeans_balanced.cuh | 6 ++-- cpp/src/cluster/kmeans_balanced.cuh | 4 +-- .../kmeans_balanced_build_clusters_impl.cuh | 2 +- cpp/src/distance/detail/masked_nn.cuh | 2 +- cpp/src/neighbors/composite/index.cu | 2 +- cpp/src/neighbors/detail/ann_utils.cuh | 12 +++---- cpp/src/neighbors/detail/cagra/add_nodes.cuh | 2 +- .../neighbors/detail/cagra/cagra_build.cuh | 6 ++-- cpp/src/neighbors/detail/cagra/graph_core.cuh | 10 +++--- .../neighbors/detail/cagra/search_plan.cuh | 4 +-- cpp/src/neighbors/detail/cagra/utils.hpp | 2 +- .../neighbors/detail/vamana/vamana_build.cuh | 30 +++++++++--------- cpp/src/neighbors/detail/vpq_dataset.cuh | 2 +- cpp/src/neighbors/ivf_common.cuh | 2 +- cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh | 16 +++++----- .../neighbors/ivf_flat/ivf_flat_search.cuh | 31 ++++++++++--------- cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh | 19 ++++++------ cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh | 4 +-- cpp/src/neighbors/ivf_pq/ivf_pq_transform.cuh | 6 ++-- cpp/src/neighbors/scann/detail/scann_avq.cuh | 6 ++-- .../neighbors/scann/detail/scann_build.cuh | 2 +- .../preprocessing/quantize/detail/binary.cuh | 8 ++--- cpp/src/preprocessing/quantize/detail/pq.cuh | 2 +- .../ann_cagra/test_batch_load_iterator.cu | 10 +++--- 24 files changed, 97 insertions(+), 93 deletions(-) diff --git a/cpp/src/cluster/detail/kmeans_balanced.cuh b/cpp/src/cluster/detail/kmeans_balanced.cuh index 7003587e7a..26ce110158 100644 --- a/cpp/src/cluster/detail/kmeans_balanced.cuh +++ b/cpp/src/cluster/detail/kmeans_balanced.cuh @@ -321,7 +321,7 @@ void compute_norm(const raft::resources& handle, raft::common::nvtx::range fun_scope("compute_norm"); auto stream = raft::resource::get_cuda_stream(handle); rmm::device_uvector mapped_dataset( - 0, stream, mr.value_or(raft::resource::get_workspace_resource(handle))); + 0, stream, mr.value_or(raft::resource::get_workspace_resource_ref(handle))); const MathT* dataset_ptr = nullptr; @@ -383,7 +383,7 @@ void predict(const raft::resources& handle, auto stream = raft::resource::get_cuda_stream(handle); raft::common::nvtx::range fun_scope( "predict(%zu, %u)", static_cast(n_rows), n_clusters); - auto mem_res = mr.value_or(raft::resource::get_workspace_resource(handle)); + auto mem_res = mr.value_or(raft::resource::get_workspace_resource_ref(handle)); auto [max_minibatch_size, _mem_per_row] = calc_minibatch_size( handle, n_clusters, n_rows, dim, params.metric, std::is_same_v); rmm::device_uvector cur_dataset( @@ -995,7 +995,7 @@ void build_hierarchical(const raft::resources& handle, // TODO: Remove the explicit managed memory- we shouldn't be creating this on the user's behalf. rmm::mr::managed_memory_resource managed_memory; - rmm::device_async_resource_ref device_memory = raft::resource::get_workspace_resource(handle); + rmm::device_async_resource_ref device_memory = raft::resource::get_workspace_resource_ref(handle); auto [max_minibatch_size, mem_per_row] = calc_minibatch_size( handle, n_clusters, n_rows, dim, params.metric, std::is_same_v); diff --git a/cpp/src/cluster/kmeans_balanced.cuh b/cpp/src/cluster/kmeans_balanced.cuh index 0c0df03397..f3f52c2d8f 100644 --- a/cpp/src/cluster/kmeans_balanced.cuh +++ b/cpp/src/cluster/kmeans_balanced.cuh @@ -154,7 +154,7 @@ void predict(const raft::resources& handle, X.extent(0), labels.data_handle(), mapping_op, - raft::resource::get_workspace_resource(handle)); + raft::resource::get_workspace_resource_ref(handle)); } namespace helpers { @@ -305,7 +305,7 @@ void calc_centers_and_sizes(const raft::resources& handle, labels.data_handle(), reset_counters, mapping_op, - raft::resource::get_workspace_resource(handle)); + raft::resource::get_workspace_resource_ref(handle)); } } // namespace helpers diff --git a/cpp/src/cluster/kmeans_balanced_build_clusters_impl.cuh b/cpp/src/cluster/kmeans_balanced_build_clusters_impl.cuh index e1ace274e2..2bce856c6c 100644 --- a/cpp/src/cluster/kmeans_balanced_build_clusters_impl.cuh +++ b/cpp/src/cluster/kmeans_balanced_build_clusters_impl.cuh @@ -68,7 +68,7 @@ void build_clusters(const raft::resources& handle, labels.data_handle(), cluster_sizes.data_handle(), mapping_op, - raft::resource::get_workspace_resource(handle), + raft::resource::get_workspace_resource_ref(handle), X_norm.has_value() ? X_norm.value().data_handle() : nullptr); } diff --git a/cpp/src/distance/detail/masked_nn.cuh b/cpp/src/distance/detail/masked_nn.cuh index 5381b433ec..a3a187e8c4 100644 --- a/cpp/src/distance/detail/masked_nn.cuh +++ b/cpp/src/distance/detail/masked_nn.cuh @@ -246,7 +246,7 @@ void masked_l2_nn_impl(raft::resources const& handle, static_assert(P::Mblk == 64, "masked_l2_nn_impl only supports a policy with 64 rows per block."); // Get stream and workspace memory resource - rmm::device_async_resource_ref ws_mr = raft::resource::get_workspace_resource(handle); + rmm::device_async_resource_ref ws_mr = raft::resource::get_workspace_resource_ref(handle); auto stream = raft::resource::get_cuda_stream(handle); // Acquire temporary buffers and initialize to zero: diff --git a/cpp/src/neighbors/composite/index.cu b/cpp/src/neighbors/composite/index.cu index 57bd0816eb..7c02d0e43e 100644 --- a/cpp/src/neighbors/composite/index.cu +++ b/cpp/src/neighbors/composite/index.cu @@ -40,7 +40,7 @@ void composite_index::search( size_t buffer_size = num_queries * K * num_indices; auto main_stream = raft::resource::get_cuda_stream(handle); - auto tmp_res = raft::resource::get_workspace_resource(handle); + auto tmp_res = raft::resource::get_workspace_resource_ref(handle); rmm::device_uvector neighbors_buffer(buffer_size, main_stream, tmp_res); rmm::device_uvector distances_buffer(buffer_size, main_stream, tmp_res); diff --git a/cpp/src/neighbors/detail/ann_utils.cuh b/cpp/src/neighbors/detail/ann_utils.cuh index dda5dd8bc2..bdb2c310c6 100644 --- a/cpp/src/neighbors/detail/ann_utils.cuh +++ b/cpp/src/neighbors/detail/ann_utils.cuh @@ -864,7 +864,7 @@ struct batch_load_iterator { { } - /** Convenience overload that uses `get_workspace_resource(res)` as the memory resource. */ + /** Convenience overload that uses `get_workspace_resource_ref(res)` as the memory resource. */ batch_load_iterator(raft::resources const& res, MdspanT input_view, size_type batch_size, @@ -876,7 +876,7 @@ struct batch_load_iterator { input_view, batch_size, copy_stream, - raft::resource::get_workspace_resource(res), + raft::resource::get_workspace_resource_ref(res), prefetch, initialize, host_writeback) @@ -1036,7 +1036,7 @@ class batch_load_iterator_dyn { { } - /** Convenience overload that uses `get_workspace_resource(res)` as the memory resource. */ + /** Convenience overload that uses `get_workspace_resource_ref(res)` as the memory resource. */ batch_load_iterator_dyn(raft::resources const& res, T* ptr, IdxT n_rows, @@ -1052,7 +1052,7 @@ class batch_load_iterator_dyn { row_width, batch_size, copy_stream, - raft::resource::get_workspace_resource(res), + raft::resource::get_workspace_resource_ref(res), prefetch, initialize, host_writeback) @@ -1229,7 +1229,7 @@ auto make_batch_load_iterator(raft::resources const& res, host_writeback); } -/** Convenience overload that uses `get_workspace_resource(res)` as the memory resource. */ +/** Convenience overload that uses `get_workspace_resource_ref(res)` as the memory resource. */ template auto make_batch_load_iterator(raft::resources const& res, T const* ptr, @@ -1247,7 +1247,7 @@ auto make_batch_load_iterator(raft::resources const& res, row_width, batch_size, copy_stream, - raft::resource::get_workspace_resource(res), + raft::resource::get_workspace_resource_ref(res), prefetch, initialize, host_writeback); diff --git a/cpp/src/neighbors/detail/cagra/add_nodes.cuh b/cpp/src/neighbors/detail/cagra/add_nodes.cuh index 169a667776..5d0a6654e9 100644 --- a/cpp/src/neighbors/detail/cagra/add_nodes.cuh +++ b/cpp/src/neighbors/detail/cagra/add_nodes.cuh @@ -70,7 +70,7 @@ void add_node_core( params.itopk_size = std::max(base_degree * 2lu, 256lu); // Memory space for rank-based neighbor list - auto mr = raft::resource::get_workspace_resource(handle); + auto mr = raft::resource::get_workspace_resource_ref(handle); auto neighbor_indices = raft::make_device_mdarray( handle, mr, raft::make_extents(max_search_batch_size, base_degree)); diff --git a/cpp/src/neighbors/detail/cagra/cagra_build.cuh b/cpp/src/neighbors/detail/cagra/cagra_build.cuh index 5ee9a0a59c..96ff8344d3 100644 --- a/cpp/src/neighbors/detail/cagra/cagra_build.cuh +++ b/cpp/src/neighbors/detail/cagra/cagra_build.cuh @@ -1701,8 +1701,8 @@ void build_knn_graph( // If the workspace is smaller than desired, put the I/O buffers into the large workspace. rmm::device_async_resource_ref workspace_mr = - use_large_workspace ? raft::resource::get_large_workspace_resource(res) - : raft::resource::get_workspace_resource(res); + use_large_workspace ? raft::resource::get_large_workspace_resource_ref(res) + : raft::resource::get_workspace_resource_ref(res); RAFT_LOG_DEBUG( "IVF-PQ search node_degree: %d, top_k: %d, gpu_top_k: %d, max_batch_size:: %d, n_probes: %u", @@ -2123,7 +2123,7 @@ auto iterative_build_graph( static_cast(dev_query_view.extent(1)), max_chunk_size, raft::resource::get_cuda_stream(res), - raft::resource::get_workspace_resource(res)); + raft::resource::get_workspace_resource_ref(res)); for (const auto& batch : query_batch) { auto batch_dev_query_view = raft::make_device_matrix_view( batch.data(), batch.size(), dev_query_view.extent(1)); diff --git a/cpp/src/neighbors/detail/cagra/graph_core.cuh b/cpp/src/neighbors/detail/cagra/graph_core.cuh index f478cd3fa2..0e2ad6d769 100644 --- a/cpp/src/neighbors/detail/cagra/graph_core.cuh +++ b/cpp/src/neighbors/detail/cagra/graph_core.cuh @@ -851,7 +851,7 @@ void merge_graph_gpu( namespace bli = cuvs::spatial::knn::detail::utils; auto [copy_stream, enable_prefetch] = bli::get_prefetch_stream(res); - auto workspace_mr = raft::resource::get_workspace_resource(res); + auto workspace_mr = raft::resource::get_workspace_resource_ref(res); bli::batch_load_iterator< raft::mdspan, raft::row_major, AccessorOutputGraph>> @@ -1001,7 +1001,7 @@ void sort_knn_graph( const uint64_t input_graph_degree = knn_graph.extent(1); IdxT* const input_graph_ptr = knn_graph.data_handle(); - auto large_tmp_mr = raft::resource::get_large_workspace_resource(res); + auto large_tmp_mr = raft::resource::get_large_workspace_resource_ref(res); auto d_input_graph = raft::make_device_mdarray( res, large_tmp_mr, raft::make_extents(graph_size, input_graph_degree)); @@ -1625,7 +1625,7 @@ void prune_graph_gpu( namespace bli = cuvs::spatial::knn::detail::utils; auto [copy_stream, enable_prefetch] = bli::get_prefetch_stream(res); - auto workspace_mr = raft::resource::get_workspace_resource(res); + auto workspace_mr = raft::resource::get_workspace_resource_ref(res); // Single-batch read-only iterator for the input graph (graph_size rows fit in one batch). bli::batch_load_iterator< @@ -1722,9 +1722,9 @@ void optimize( raft::resources res{res_const}; // large temporary memory for large arrays, e.g. everything >= O(graph_size) - auto large_tmp_mr = raft::resource::get_large_workspace_resource(res); + auto large_tmp_mr = raft::resource::get_large_workspace_resource_ref(res); // temporary memory for small arrays, e.g. everything <= O(batchsize * graph_degree) - auto default_ws_mr = raft::resource::get_workspace_resource(res); + auto default_ws_mr = raft::resource::get_workspace_resource_ref(res); // create a stream pool if not already present if (!res.has_resource_factory(raft::resource::resource_type::CUDA_STREAM_POOL) || diff --git a/cpp/src/neighbors/detail/cagra/search_plan.cuh b/cpp/src/neighbors/detail/cagra/search_plan.cuh index 03b2962e4c..8a9d79e177 100644 --- a/cpp/src/neighbors/detail/cagra/search_plan.cuh +++ b/cpp/src/neighbors/detail/cagra/search_plan.cuh @@ -58,7 +58,7 @@ struct lightweight_uvector { if (new_size == size_) { return; } if (std::holds_alternative(res_)) { auto& h = std::get(res_); - res_ = rmm_res_type{raft::resource::get_workspace_resource(*h), + res_ = rmm_res_type{raft::resource::get_workspace_resource_ref(*h), raft::resource::get_cuda_stream(*h)}; } auto& [r, s] = std::get(res_); @@ -80,7 +80,7 @@ struct lightweight_uvector { if (new_size == size_) { return; } if (std::holds_alternative(res_)) { auto& h = std::get(res_); - res_ = rmm_res_type{raft::resource::get_workspace_resource(*h), stream}; + res_ = rmm_res_type{raft::resource::get_workspace_resource_ref(*h), stream}; } else { std::get(std::get(res_)) = stream; } diff --git a/cpp/src/neighbors/detail/cagra/utils.hpp b/cpp/src/neighbors/detail/cagra/utils.hpp index 07417a3f09..58bf68bb43 100644 --- a/cpp/src/neighbors/detail/cagra/utils.hpp +++ b/cpp/src/neighbors/detail/cagra/utils.hpp @@ -190,7 +190,7 @@ class device_matrix_view_from_host { // live on stack and not returned to a user. // The user may opt to set this resource to managed memory to allow large allocations. device_mem_.emplace(raft::make_device_mdarray( - res, raft::resource::get_large_workspace_resource(res), host_view.extents())); + res, raft::resource::get_large_workspace_resource_ref(res), host_view.extents())); raft::copy(res, device_mem_->view(), host_view); device_ptr = device_mem_->data_handle(); } diff --git a/cpp/src/neighbors/detail/vamana/vamana_build.cuh b/cpp/src/neighbors/detail/vamana/vamana_build.cuh index ecfa809190..336d81215b 100644 --- a/cpp/src/neighbors/detail/vamana/vamana_build.cuh +++ b/cpp/src/neighbors/detail/vamana/vamana_build.cuh @@ -143,7 +143,7 @@ void batched_insert_vamana( auto query_ids = raft::make_device_vector(res, max_batchsize); auto query_list_ptr = raft::make_device_mdarray>( res, - raft::resource::get_large_workspace_resource(res), + raft::resource::get_large_workspace_resource_ref(res), raft::make_extents(max_batchsize + 1)); QueryCandidates* query_list = static_cast*>(query_list_ptr.data_handle()); @@ -151,11 +151,11 @@ void batched_insert_vamana( // Results of each batch of inserts during build - Memory is used by query_list structure auto visited_ids = raft::make_device_mdarray(res, - raft::resource::get_large_workspace_resource(res), + raft::resource::get_large_workspace_resource_ref(res), raft::make_extents(max_batchsize, visited_size)); auto visited_dists = raft::make_device_mdarray(res, - raft::resource::get_large_workspace_resource(res), + raft::resource::get_large_workspace_resource_ref(res), raft::make_extents(max_batchsize, visited_size)); // Assign memory to query_list structures and initialize @@ -167,14 +167,14 @@ void batched_insert_vamana( 1); auto topk_pq_mem = raft::make_device_mdarray>(res, - raft::resource::get_large_workspace_resource(res), + raft::resource::get_large_workspace_resource_ref(res), raft::make_extents(max_batchsize, visited_size)); int align_padding = raft::alignTo(dim, 16) - dim; auto s_coords_mem = raft::make_device_mdarray( res, - raft::resource::get_large_workspace_resource(res), + raft::resource::get_large_workspace_resource_ref(res), raft::make_extents(min(maxBlocks, max(max_batchsize, reverse_batch)), dim + align_padding)); @@ -318,7 +318,7 @@ void batched_insert_vamana( // compute prefix sums of query_list sizes - TODO parallelize prefix sums // auto d_total_edges = raft::make_device_mdarray( - // res, raft::resource::get_workspace_resource(res), raft::make_extents(1)); + // res, raft::resource::get_workspace_resource_ref(res), raft::make_extents(1)); rmm::device_scalar d_total_edges(stream); prefix_sums_sizes<<<1, 1, 0, stream>>>(query_list, step_size, d_total_edges.data()); RAFT_CUDA_TRY(cudaPeekAtLastError()); @@ -329,16 +329,16 @@ void batched_insert_vamana( auto edge_dist_pair = raft::make_device_mdarray>( res, - raft::resource::get_large_workspace_resource(res), + raft::resource::get_large_workspace_resource_ref(res), raft::make_extents(total_edges)); auto edge_dest = raft::make_device_mdarray(res, - raft::resource::get_large_workspace_resource(res), + raft::resource::get_large_workspace_resource_ref(res), raft::make_extents(total_edges)); auto edge_src = raft::make_device_mdarray(res, - raft::resource::get_large_workspace_resource(res), + raft::resource::get_large_workspace_resource_ref(res), raft::make_extents(total_edges)); // Create reverse edge list @@ -367,7 +367,7 @@ void batched_insert_vamana( auto temp_sort_storage = raft::make_device_mdarray( res, - raft::resource::get_large_workspace_resource(res), + raft::resource::get_large_workspace_resource_ref(res), raft::make_extents(temp_storage_bytes / sizeof(IdxT))); // Sort to group reverse edges by destination @@ -406,7 +406,7 @@ void batched_insert_vamana( auto temp_sort_storage = raft::make_device_mdarray( res, - raft::resource::get_large_workspace_resource(res), + raft::resource::get_large_workspace_resource_ref(res), raft::make_extents(temp_storage_bytes / sizeof(IdxT))); // Sort to group reverse edges by destination @@ -451,16 +451,16 @@ void batched_insert_vamana( // Allocate reverse QueryCandidate list based on number of unique destinations auto reverse_list_ptr = raft::make_device_mdarray>( res, - raft::resource::get_large_workspace_resource(res), + raft::resource::get_large_workspace_resource_ref(res), raft::make_extents(reverse_batch)); auto rev_ids = raft::make_device_mdarray(res, - raft::resource::get_large_workspace_resource(res), + raft::resource::get_large_workspace_resource_ref(res), raft::make_extents(reverse_batch, visited_size)); auto rev_dists = raft::make_device_mdarray(res, - raft::resource::get_large_workspace_resource(res), + raft::resource::get_large_workspace_resource_ref(res), raft::make_extents(reverse_batch, visited_size)); QueryCandidates* reverse_list = @@ -663,7 +663,7 @@ index build( static_cast(dim), static_cast(max_batch_size), raft::resource::get_cuda_stream(res), - raft::resource::get_workspace_resource(res)); + raft::resource::get_workspace_resource_ref(res)); for (const auto& batch : _vamana_batches) { // perform rotation auto dataset_rotated = raft::make_device_matrix(res, batch.size(), dim); diff --git a/cpp/src/neighbors/detail/vpq_dataset.cuh b/cpp/src/neighbors/detail/vpq_dataset.cuh index 49b5e12455..ec4a684274 100644 --- a/cpp/src/neighbors/detail/vpq_dataset.cuh +++ b/cpp/src/neighbors/detail/vpq_dataset.cuh @@ -910,7 +910,7 @@ void process_and_fill_codes_subspaces( static_cast(dim), static_cast(max_batch_size), copy_stream, - raft::resource::get_workspace_resource(res), + raft::resource::get_workspace_resource_ref(res), enable_prefetch_stream); vec_batches.prefetch_next_batch(); for (const auto& batch : vec_batches) { diff --git a/cpp/src/neighbors/ivf_common.cuh b/cpp/src/neighbors/ivf_common.cuh index 303098176c..e466a13fd7 100644 --- a/cpp/src/neighbors/ivf_common.cuh +++ b/cpp/src/neighbors/ivf_common.cuh @@ -256,7 +256,7 @@ template void recompute_internal_state(const raft::resources& res, Index& index) { auto stream = raft::resource::get_cuda_stream(res); - auto tmp_res = raft::resource::get_workspace_resource(res); + auto tmp_res = raft::resource::get_workspace_resource_ref(res); rmm::device_uvector sorted_sizes(index.n_lists(), stream, tmp_res); // Actualize the list pointers diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh index 4165bd1087..fffe5134ae 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh @@ -180,8 +180,10 @@ void extend(raft::resources const& handle, RAFT_EXPECTS(new_indices != nullptr || index->size() == 0, "You must pass data indices when the index is non-empty."); - auto new_labels = raft::make_device_mdarray( - handle, raft::resource::get_large_workspace_resource(handle), raft::make_extents(n_rows)); + auto new_labels = + raft::make_device_mdarray(handle, + raft::resource::get_large_workspace_resource_ref(handle), + raft::make_extents(n_rows)); cuvs::cluster::kmeans::balanced_params kmeans_params; kmeans_params.metric = index->metric(); auto orig_centroids_view = @@ -208,7 +210,7 @@ void extend(raft::resources const& handle, IdxT{index->dim()}, max_batch_size, copy_stream, - raft::resource::get_workspace_resource(handle), + raft::resource::get_workspace_resource_ref(handle), enable_prefetch); vec_batches.prefetch_next_batch(); @@ -227,7 +229,7 @@ void extend(raft::resources const& handle, auto* list_sizes_ptr = index->list_sizes().data_handle(); auto old_list_sizes_dev = raft::make_device_mdarray( - handle, raft::resource::get_workspace_resource(handle), raft::make_extents(n_lists)); + handle, raft::resource::get_workspace_resource_ref(handle), raft::make_extents(n_lists)); raft::copy(handle, old_list_sizes_dev.view(), raft::make_device_vector_view(list_sizes_ptr, n_lists)); @@ -303,7 +305,7 @@ void extend(raft::resources const& handle, IdxT{1}, max_batch_size, stream, - raft::resource::get_workspace_resource(handle)); + raft::resource::get_workspace_resource_ref(handle)); vec_batches.reset(); vec_batches.prefetch_next_batch(); auto idx_batch = vec_indices.begin(); @@ -418,7 +420,7 @@ inline auto build(raft::resources const& handle, 1, n_rows / std::max(params.kmeans_trainset_fraction * n_rows, index.n_lists())); auto n_rows_train = n_rows / trainset_ratio; rmm::device_uvector trainset( - n_rows_train * index.dim(), stream, raft::resource::get_large_workspace_resource(handle)); + n_rows_train * index.dim(), stream, raft::resource::get_large_workspace_resource_ref(handle)); // TODO: a proper sampling raft::copy_matrix(trainset.data(), index.dim(), @@ -477,7 +479,7 @@ inline void fill_refinement_index(raft::resources const& handle, "ivf_flat::fill_refinement_index(%zu, %u)", size_t(n_queries)); rmm::device_uvector new_labels( - n_queries * n_candidates, stream, raft::resource::get_workspace_resource(handle)); + n_queries * n_candidates, stream, raft::resource::get_workspace_resource_ref(handle)); auto new_labels_view = raft::make_device_vector_view(new_labels.data(), n_queries * n_candidates); raft::linalg::map_offset( diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh index f42ffdc837..960d48c818 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh @@ -355,21 +355,22 @@ inline void search_with_filtering(raft::resources const& handle, for (uint32_t offset_q = 0; offset_q < n_queries; offset_q += max_queries) { uint32_t queries_batch = raft::min(max_queries, n_queries - offset_q); - search_impl(handle, - index, - effective_metric, - params, - queries + offset_q * index.dim(), - queries_batch, - offset_q, - k, - n_probes, - max_samples, - cuvs::distance::is_min_close(effective_metric), - neighbors + offset_q * k, - distances + offset_q * k, - raft::resource::get_workspace_resource(handle), - sample_filter); + search_impl( + handle, + index, + effective_metric, + params, + queries + offset_q * index.dim(), + queries_batch, + offset_q, + k, + n_probes, + max_samples, + cuvs::distance::is_min_close(effective_metric), + neighbors + offset_q * k, + distances + offset_q * k, + raft::resource::get_workspace_resource_ref(handle), + sample_filter); } } diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh b/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh index f0a878f2ec..c562ca9e00 100644 --- a/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh +++ b/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh @@ -334,7 +334,7 @@ void train_per_subset(raft::resources const& handle, uint32_t max_train_points_per_pq_code) { auto stream = raft::resource::get_cuda_stream(handle); - auto device_memory = raft::resource::get_workspace_resource(handle); + auto device_memory = raft::resource::get_workspace_resource_ref(handle); rmm::device_uvector pq_centers_tmp(impl->pq_centers().size(), stream, device_memory); // Subsampling the train set for codebook generation based on max_train_points_per_pq_code. @@ -416,7 +416,7 @@ void train_per_cluster(raft::resources const& handle, uint32_t max_train_points_per_pq_code) { auto stream = raft::resource::get_cuda_stream(handle); - auto device_memory = raft::resource::get_workspace_resource(handle); + auto device_memory = raft::resource::get_workspace_resource_ref(handle); // NB: Managed memory is used for small arrays accessed from both device and host. There's no // performance reasoning behind this, just avoiding the boilerplate of explicit copies. rmm::mr::managed_memory_resource managed_memory; @@ -596,7 +596,7 @@ void reconstruct_list_data(raft::resources const& res, auto tmp = raft::make_device_mdarray(res, - raft::resource::get_workspace_resource(res), + raft::resource::get_workspace_resource_ref(res), raft::make_extents(n_rows, index.rot_dim())); constexpr uint32_t kBlockSize = 256; @@ -623,7 +623,7 @@ void reconstruct_list_data(raft::resources const& res, float* out_float_ptr = nullptr; rmm::device_uvector out_float_buf( - 0, raft::resource::get_cuda_stream(res), raft::resource::get_workspace_resource(res)); + 0, raft::resource::get_cuda_stream(res), raft::resource::get_workspace_resource_ref(res)); if constexpr (std::is_same_v) { out_float_ptr = out_vectors.data_handle(); } else { @@ -706,7 +706,7 @@ void encode_list_data(raft::resources const& res, auto n_rows = new_vectors.extent(0); if (n_rows == 0) { return; } - auto mr = raft::resource::get_workspace_resource(res); + auto mr = raft::resource::get_workspace_resource_ref(res); auto new_vectors_residual = raft::make_device_mdarray( res, mr, raft::make_extents(n_rows, index->rot_dim())); @@ -997,9 +997,9 @@ void extend(raft::resources const& handle, std::is_same_v, "Unsupported data type"); - rmm::device_async_resource_ref device_memory = raft::resource::get_workspace_resource(handle); + rmm::device_async_resource_ref device_memory = raft::resource::get_workspace_resource_ref(handle); rmm::device_async_resource_ref large_memory = - raft::resource::get_large_workspace_resource(handle); + raft::resource::get_large_workspace_resource_ref(handle); // Try to allocate an index with the same parameters and the projected new size // (which can be slightly larger than index->size() + n_rows, due to padding for interleaved). @@ -1269,13 +1269,14 @@ auto build(raft::resources const& handle, size_t(n_rows) / std::max(params.kmeans_trainset_fraction * n_rows, impl->n_lists())); size_t n_rows_train = n_rows / trainset_ratio; - rmm::device_async_resource_ref device_memory = raft::resource::get_workspace_resource(handle); + rmm::device_async_resource_ref device_memory = + raft::resource::get_workspace_resource_ref(handle); // If the trainset is small enough to comfortably fit into device memory, put it there. // Otherwise, use the managed memory. constexpr size_t kTolerableRatio = 4; rmm::device_async_resource_ref big_memory_resource = - raft::resource::get_large_workspace_resource(handle); + raft::resource::get_large_workspace_resource_ref(handle); if (sizeof(float) * n_rows_train * impl->dim() * kTolerableRatio < raft::resource::get_workspace_free_bytes(handle)) { big_memory_resource = device_memory; diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh b/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh index 5799c4bbb4..08fcd1f09a 100644 --- a/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh +++ b/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh @@ -440,7 +440,7 @@ void ivfpq_search_worker(raft::resources const& handle, topK, index.dim()); auto stream = raft::resource::get_cuda_stream(handle); - auto mr = raft::resource::get_workspace_resource(handle); + auto mr = raft::resource::get_workspace_resource_ref(handle); bool manage_local_topk = is_local_topk_feasible(topK, n_probes, n_queries); auto topk_len = manage_local_topk ? n_probes * topK : max_samples; @@ -927,7 +927,7 @@ inline void search(raft::resources const& handle, max_samples = ms; } - auto mr = raft::resource::get_workspace_resource(handle); + auto mr = raft::resource::get_workspace_resource_ref(handle); // Maximum number of query vectors to search at the same time. // Number of queries in the outer loop, which includes query transform and coarse search. diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_transform.cuh b/cpp/src/neighbors/ivf_pq/ivf_pq_transform.cuh index 9a8b5d7166..e0032a4ac5 100644 --- a/cpp/src/neighbors/ivf_pq/ivf_pq_transform.cuh +++ b/cpp/src/neighbors/ivf_pq/ivf_pq_transform.cuh @@ -51,7 +51,7 @@ void transform_batch(raft::resources const& res, raft::device_matrix_view output_dataset) { IdxT n_rows = dataset.extent(0); - rmm::device_async_resource_ref mr = raft::resource::get_workspace_resource(res); + rmm::device_async_resource_ref mr = raft::resource::get_workspace_resource_ref(res); // Compute the labels for each vector cuvs::cluster::kmeans::balanced_params kmeans_params; @@ -115,7 +115,7 @@ void transform(raft::resources const& res, raft::common::nvtx::range fun_scope( "ivf_pq::transform(n_rows = %u, dim = %u)", n_rows, dataset.extent(1)); - rmm::device_async_resource_ref mr = raft::resource::get_workspace_resource(res); + rmm::device_async_resource_ref mr = raft::resource::get_workspace_resource_ref(res); // The cluster centers in the index are stored padded, which is not acceptable by // the kmeans_balanced::predict. Thus, we need the restructuring raft::copy. @@ -138,7 +138,7 @@ void transform(raft::resources const& res, } constexpr size_t max_batch_size = 65536; - rmm::device_async_resource_ref device_memory = raft::resource::get_workspace_resource(res); + rmm::device_async_resource_ref device_memory = raft::resource::get_workspace_resource_ref(res); auto vec_batches = utils::make_batch_load_iterator(res, dataset.data_handle(), diff --git a/cpp/src/neighbors/scann/detail/scann_avq.cuh b/cpp/src/neighbors/scann/detail/scann_avq.cuh index 6c3bb045e4..0e138ce1a3 100644 --- a/cpp/src/neighbors/scann/detail/scann_avq.cuh +++ b/cpp/src/neighbors/scann/detail/scann_avq.cuh @@ -59,7 +59,7 @@ void compute_cluster_offsets(raft::resources const& dev_resources, { cudaStream_t stream = raft::resource::get_cuda_stream(dev_resources); rmm::device_async_resource_ref device_memory = - raft::resource::get_workspace_resource(dev_resources); + raft::resource::get_workspace_resource_ref(dev_resources); // Histrogram to compute cluster sizes int num_levels = cluster_sizes.extent(0) + 1; @@ -138,7 +138,7 @@ void sum_reduce_vector(raft::resources const& dev_resources, { cudaStream_t stream = raft::resource::get_cuda_stream(dev_resources); rmm::device_async_resource_ref device_memory = - raft::resource::get_workspace_resource(dev_resources); + raft::resource::get_workspace_resource_ref(dev_resources); size_t temp_storage_bytes = 0; @@ -166,7 +166,7 @@ void cholesky_solver(raft::resources const& dev_resources, cudaStream_t stream = raft::resource::get_cuda_stream(dev_resources); cusolverDnHandle_t cusolverH = raft::resource::get_cusolver_dn_handle(dev_resources); rmm::device_async_resource_ref device_memory = - raft::resource::get_workspace_resource(dev_resources); + raft::resource::get_workspace_resource_ref(dev_resources); // RAFT_CUSOLVER_TRY(cusolverDnSetStream(cusolverH, stream)); diff --git a/cpp/src/neighbors/scann/detail/scann_build.cuh b/cpp/src/neighbors/scann/detail/scann_build.cuh index 41d0e2cc95..c01e50bc83 100644 --- a/cpp/src/neighbors/scann/detail/scann_build.cuh +++ b/cpp/src/neighbors/scann/detail/scann_build.cuh @@ -87,7 +87,7 @@ index build( raft::device_vector_view labels_view = idx.labels(); // setup batching for kmeans prediction + quantization - auto device_memory = raft::resource::get_workspace_resource(res); + auto device_memory = raft::resource::get_workspace_resource_ref(res); constexpr size_t kReasonableMaxBatchSize = 65536; size_t max_batch_size = std::min(dataset.extent(0), kReasonableMaxBatchSize); diff --git a/cpp/src/preprocessing/quantize/detail/binary.cuh b/cpp/src/preprocessing/quantize/detail/binary.cuh index fe8288a7cb..6695b8b126 100644 --- a/cpp/src/preprocessing/quantize/detail/binary.cuh +++ b/cpp/src/preprocessing/quantize/detail/binary.cuh @@ -146,7 +146,7 @@ void mean_f16_in_f32(raft::resources const& res, const size_t dataset_size, cudaStream_t cuda_stream) { - auto mr = raft::resource::get_workspace_resource(res); + auto mr = raft::resource::get_workspace_resource_ref(res); auto f32_result_vec = raft::make_device_mdarray(res, mr, raft::make_extents(dataset_dim)); raft::matrix::fill(res, f32_result_vec.view(), float(0)); @@ -212,7 +212,7 @@ auto train(raft::resources const& res, static_cast(dataset_dim)); raft::random::RngState rng(29837lu); - auto mr = raft::resource::get_workspace_resource(res); + auto mr = raft::resource::get_workspace_resource_ref(res); auto sampled_dataset_chunk = raft::make_device_mdarray( res, mr, raft::make_extents(num_samples, max_dim_chunk)); auto transposed_sampled_dataset_chunk = raft::make_device_mdarray( @@ -331,7 +331,7 @@ auto train(raft::resources const& res, raft::make_host_vector_view(host_threshold_vec.data(), (int64_t)dataset_dim)); } else { - auto mr = raft::resource::get_workspace_resource(res); + auto mr = raft::resource::get_workspace_resource_ref(res); auto casted_vec = raft::make_device_mdarray( res, mr, raft::make_extents(dataset_dim)); raft::copy(res, @@ -425,7 +425,7 @@ void transform(raft::resources const& res, raft::make_device_vector_view(quantizer.threshold.data_handle(), (int64_t)dataset_dim)); } else { - auto mr = raft::resource::get_workspace_resource(res); + auto mr = raft::resource::get_workspace_resource_ref(res); auto casted_vec = raft::make_device_mdarray( res, mr, raft::make_extents(dataset_dim)); raft::linalg::map(res, diff --git a/cpp/src/preprocessing/quantize/detail/pq.cuh b/cpp/src/preprocessing/quantize/detail/pq.cuh index 2f417e6281..5d77e2dd44 100644 --- a/cpp/src/preprocessing/quantize/detail/pq.cuh +++ b/cpp/src/preprocessing/quantize/detail/pq.cuh @@ -119,7 +119,7 @@ auto train_pq_subspaces( auto trainset_ptr = !vq_centers.empty() ? pq_trainset.data_handle() : dataset.data_handle(); auto sub_labels = raft::make_device_vector(res, 0); auto pq_cluster_sizes = raft::make_device_vector(res, 0); - auto device_memory = raft::resource::get_workspace_resource(res); + auto device_memory = raft::resource::get_workspace_resource_ref(res); if (is_balanced_kmeans(params)) { sub_labels = raft::make_device_mdarray( res, device_memory, raft::make_extents(n_rows_train)); diff --git a/cpp/tests/neighbors/ann_cagra/test_batch_load_iterator.cu b/cpp/tests/neighbors/ann_cagra/test_batch_load_iterator.cu index 0f78805b5e..c0df2b8826 100644 --- a/cpp/tests/neighbors/ann_cagra/test_batch_load_iterator.cu +++ b/cpp/tests/neighbors/ann_cagra/test_batch_load_iterator.cu @@ -85,7 +85,7 @@ class BatchLoadIteratorTest : public ::testing::Test { int64_t total_processed = 0; auto [copy_stream, enable_prefetch] = bli::get_prefetch_stream(res); - auto workspace_mr = raft::resource::get_workspace_resource(res); + auto workspace_mr = raft::resource::get_workspace_resource_ref(res); { bli::batch_load_iterator iter(res, @@ -154,7 +154,7 @@ TEST_F(BatchLoadIteratorTest, EmptyViewFromHost) auto host_view = host_empty.view(); auto [copy_stream, enable_prefetch] = bli::get_prefetch_stream(res); - auto workspace_mr = raft::resource::get_workspace_resource(res); + auto workspace_mr = raft::resource::get_workspace_resource_ref(res); bli::batch_load_iterator> iter( res, host_view, /*batch_size=*/128, copy_stream, workspace_mr, enable_prefetch); @@ -167,7 +167,7 @@ TEST_F(BatchLoadIteratorTest, EmptyViewFromDevice) auto device_view = device_empty.view(); auto [copy_stream, enable_prefetch] = bli::get_prefetch_stream(res); - auto workspace_mr = raft::resource::get_workspace_resource(res); + auto workspace_mr = raft::resource::get_workspace_resource_ref(res); bli::batch_load_iterator> iter( res, device_view, /*batch_size=*/128, copy_stream, workspace_mr, enable_prefetch); @@ -290,7 +290,7 @@ TEST_F(BatchLoadIteratorTest, MakeBatchLoadIteratorHostPtr) n_cols, batch_size_rows, copy_stream, - raft::resource::get_workspace_resource(res), + raft::resource::get_workspace_resource_ref(res), enable_prefetch); EXPECT_TRUE(iter.does_copy()); @@ -329,7 +329,7 @@ TEST_F(BatchLoadIteratorTest, MakeBatchLoadIteratorDevicePtr) n_cols, batch_size_rows, copy_stream, - raft::resource::get_workspace_resource(res), + raft::resource::get_workspace_resource_ref(res), enable_prefetch); EXPECT_FALSE(iter.does_copy());