From 8a5ec10b1fb7f9e7850036136d5d88c13c93b508 Mon Sep 17 00:00:00 2001
From: tarang-jain <tarangj@nvidia.com>
Date: Wed, 20 May 2026 08:22:02 -0700
Subject: [PATCH 1/4] replace _ref

---
 c/src/core/c_api.cpp                          |  6 ++--
 .../src/common/cuda_huge_page_resource.hpp    |  2 +-
 cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h |  6 ++--
 cpp/bench/ann/src/cuvs/cuvs_cagra_diskann.cu  |  4 +--
 cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu  |  4 +--
 cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h   |  6 ++--
 cpp/bench/ann/src/cuvs/cuvs_vamana.cu         |  4 +--
 cpp/cmake/patches/faiss-1.14-cuvs-26.06.diff  |  2 +-
 .../cuvs_internal/neighbors/naive_knn.cuh     |  4 +--
 cpp/src/cluster/detail/kmeans_balanced.cuh    | 30 ++++++++--------
 cpp/src/cluster/kmeans_balanced.cuh           |  4 +--
 .../kmeans_balanced_build_clusters_impl.cuh   |  2 +-
 cpp/src/distance/detail/masked_nn.cuh         |  4 +--
 cpp/src/neighbors/composite/index.cu          |  2 +-
 cpp/src/neighbors/detail/ann_utils.cuh        | 24 ++++++-------
 cpp/src/neighbors/detail/cagra/add_nodes.cuh  |  2 +-
 .../neighbors/detail/cagra/cagra_build.cuh    | 10 +++---
 .../neighbors/detail/cagra/cagra_merge.cuh    |  2 +-
 cpp/src/neighbors/detail/cagra/graph_core.cuh | 10 +++---
 .../neighbors/detail/cagra/search_plan.cuh    |  8 ++---
 cpp/src/neighbors/detail/cagra/utils.hpp      |  6 ++--
 .../neighbors/detail/vamana/greedy_search.cuh |  2 +-
 .../neighbors/detail/vamana/vamana_build.cuh  | 30 ++++++++--------
 cpp/src/neighbors/detail/vpq_dataset.cuh      |  4 +--
 cpp/src/neighbors/ivf_common.cu               |  2 +-
 cpp/src/neighbors/ivf_common.cuh              |  4 +--
 cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh | 16 ++++-----
 .../neighbors/ivf_flat/ivf_flat_search.cuh    | 35 +++++++++----------
 cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh     | 32 ++++++++---------
 cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh    | 10 +++---
 cpp/src/neighbors/ivf_pq/ivf_pq_transform.cuh | 10 +++---
 cpp/src/neighbors/scann/detail/scann_avq.cuh  | 17 ++++-----
 .../neighbors/scann/detail/scann_build.cuh    |  2 +-
 .../preprocessing/quantize/detail/binary.cuh  |  8 ++---
 cpp/src/preprocessing/quantize/detail/pq.cuh  |  2 +-
 .../ann_cagra/test_batch_load_iterator.cu     | 10 +++---
 cpp/tests/neighbors/naive_knn.cuh             |  4 +--
 docs/source/api_basics.rst                    |  2 +-
 examples/cpp/src/cagra_example.cu             |  2 +-
 examples/cpp/src/cagra_hnsw_ace_example.cu    |  2 +-
 examples/cpp/src/cagra_persistent_example.cu  |  2 +-
 examples/cpp/src/hnsw_ace_example.cu          |  2 +-
 examples/cpp/src/ivf_flat_example.cu          |  2 +-
 examples/cpp/src/ivf_pq_example.cu            |  2 +-
 examples/cpp/src/scann_example.cu             |  2 +-
 examples/cpp/src/vamana_example.cu            |  2 +-
 46 files changed, 170 insertions(+), 178 deletions(-)

diff --git a/c/src/core/c_api.cpp b/c/src/core/c_api.cpp
index f4e3664482..3847875aa4 100644
--- a/c/src/core/c_api.cpp
+++ b/c/src/core/c_api.cpp
@@ -19,7 +19,7 @@
 #include <rmm/mr/per_device_resource.hpp>
 #include <rmm/mr/pinned_host_memory_resource.hpp>
 #include <rmm/mr/pool_memory_resource.hpp>
-#include <rmm/resource_ref.hpp>
+#include <rmm/resource.hpp>
 
 #include "../core/exceptions.hpp"
 
@@ -132,7 +132,7 @@ extern "C" cuvsError_t cuvsRMMAlloc(cuvsResources_t res, void** ptr, size_t byte
 {
   return cuvs::core::translate_exceptions([=] {
     auto res_ptr = reinterpret_cast<raft::resources*>(res);
-    auto mr      = rmm::mr::get_current_device_resource_ref();
+    auto mr      = rmm::mr::get_current_device_resource();
     *ptr         = mr.allocate(raft::resource::get_cuda_stream(*res_ptr), bytes);
   });
 }
@@ -141,7 +141,7 @@ extern "C" cuvsError_t cuvsRMMFree(cuvsResources_t res, void* ptr, size_t bytes)
 {
   return cuvs::core::translate_exceptions([=] {
     auto res_ptr = reinterpret_cast<raft::resources*>(res);
-    auto mr      = rmm::mr::get_current_device_resource_ref();
+    auto mr      = rmm::mr::get_current_device_resource();
     mr.deallocate(raft::resource::get_cuda_stream(*res_ptr), ptr, bytes);
   });
 }
diff --git a/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp b/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp
index e5555132bb..aae71605a5 100644
--- a/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp
+++ b/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp
@@ -8,7 +8,7 @@
 #include <raft/core/logger_macros.hpp>
 
 #include <rmm/detail/aligned.hpp>
-#include <rmm/resource_ref.hpp>
+#include <rmm/resource.hpp>
 
 #include <cuda/memory_resource>
 
diff --git a/cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h b/cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h
index 1a276e8cc8..f818a1e208 100644
--- a/cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h
+++ b/cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h
@@ -24,7 +24,7 @@
 #include <rmm/mr/managed_memory_resource.hpp>
 #include <rmm/mr/per_device_resource.hpp>
 #include <rmm/mr/pool_memory_resource.hpp>
-#include <rmm/resource_ref.hpp>
+#include <rmm/resource.hpp>
 
 #include <memory>
 #include <type_traits>
@@ -73,7 +73,7 @@ class shared_raft_resources {
   try : large_mr_() {
     orig_resource_ =
       rmm::mr::set_current_device_resource(rmm::mr::failure_callback_resource_adaptor<>{
-        rmm::mr::pool_memory_resource{rmm::mr::get_current_device_resource_ref(),
+        rmm::mr::pool_memory_resource{rmm::mr::get_current_device_resource(),
                                       1024 * 1024 * 1024ull},
         rmm_oom_callback,
         nullptr});
@@ -98,7 +98,7 @@ class shared_raft_resources {
 
   ~shared_raft_resources() noexcept { rmm::mr::set_current_device_resource(orig_resource_); }
 
-  auto get_large_memory_resource() noexcept -> rmm::device_async_resource_ref { return large_mr_; }
+  auto get_large_memory_resource() noexcept -> rmm::device_async_resource { return large_mr_; }
 
  private:
   cuda::mr::any_resource<cuda::mr::device_accessible> orig_resource_;
diff --git a/cpp/bench/ann/src/cuvs/cuvs_cagra_diskann.cu b/cpp/bench/ann/src/cuvs/cuvs_cagra_diskann.cu
index 1521333c5e..e67adf9005 100644
--- a/cpp/bench/ann/src/cuvs/cuvs_cagra_diskann.cu
+++ b/cpp/bench/ann/src/cuvs/cuvs_cagra_diskann.cu
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
  * SPDX-License-Identifier: Apache-2.0
  */
 
@@ -9,7 +9,7 @@
 
 #include <rmm/cuda_device.hpp>
 #include <rmm/mr/pool_memory_resource.hpp>
-#include <rmm/resource_ref.hpp>
+#include <rmm/resource.hpp>
 
 namespace cuvs::bench {
 
diff --git a/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu b/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu
index 26028b6d98..9645e5df63 100644
--- a/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu
+++ b/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION.
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION.
  * SPDX-License-Identifier: Apache-2.0
  */
 
@@ -9,7 +9,7 @@
 
 #include <rmm/cuda_device.hpp>
 #include <rmm/mr/pool_memory_resource.hpp>
-#include <rmm/resource_ref.hpp>
+#include <rmm/resource.hpp>
 
 namespace cuvs::bench {
 
diff --git a/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h
index 87111e4761..d872fd6aab 100644
--- a/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h
+++ b/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h
@@ -25,7 +25,7 @@
 #include <raft/util/cudart_utils.hpp>
 
 #include <rmm/device_uvector.hpp>
-#include <rmm/resource_ref.hpp>
+#include <rmm/resource.hpp>
 
 #include <atomic>
 #include <cassert>
@@ -190,12 +190,12 @@ class cuvs_cagra : public algo<T>, public algo_gpu {
   std::shared_ptr<cuvs::neighbors::filtering::base_filter> filter_;
   std::vector<std::shared_ptr<cuvs::neighbors::cagra::index<T, IdxT>>> sub_indices_;
 
-  inline rmm::device_async_resource_ref get_mr(AllocatorType mem_type)
+  inline rmm::device_async_resource get_mr(AllocatorType mem_type)
   {
     switch (mem_type) {
       case (AllocatorType::kHostPinned): return mr_pinned_;
       case (AllocatorType::kHostHugePage): return mr_huge_page_;
-      default: return rmm::mr::get_current_device_resource_ref();
+      default: return rmm::mr::get_current_device_resource();
     }
   }
 };
diff --git a/cpp/bench/ann/src/cuvs/cuvs_vamana.cu b/cpp/bench/ann/src/cuvs/cuvs_vamana.cu
index 185095d5b4..14eb64b55a 100644
--- a/cpp/bench/ann/src/cuvs/cuvs_vamana.cu
+++ b/cpp/bench/ann/src/cuvs/cuvs_vamana.cu
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
  * SPDX-License-Identifier: Apache-2.0
  */
 
@@ -8,7 +8,7 @@
 
 #include <rmm/cuda_device.hpp>
 #include <rmm/mr/pool_memory_resource.hpp>
-#include <rmm/resource_ref.hpp>
+#include <rmm/resource.hpp>
 
 namespace cuvs::bench {
 
diff --git a/cpp/cmake/patches/faiss-1.14-cuvs-26.06.diff b/cpp/cmake/patches/faiss-1.14-cuvs-26.06.diff
index 802930da76..4db328e278 100644
--- a/cpp/cmake/patches/faiss-1.14-cuvs-26.06.diff
+++ b/cpp/cmake/patches/faiss-1.14-cuvs-26.06.diff
@@ -61,7 +61,7 @@ index 548618262..3be071550 100644
 -            rmm::mr::device_memory_resource* current_mr =
 -                    rmm::mr::get_per_device_resource(
 +            auto current_mr =
-+                    rmm::mr::get_per_device_resource_ref(
++                    rmm::mr::get_per_device_resource(
                              rmm::cuda_device_id{adjReq.device});
 -            p = current_mr->allocate(adjReq.stream, adjReq.size);
 +            p = current_mr.allocate(adjReq.stream, adjReq.size);
diff --git a/cpp/internal/cuvs_internal/neighbors/naive_knn.cuh b/cpp/internal/cuvs_internal/neighbors/naive_knn.cuh
index 7bc37193a0..9c893587dc 100644
--- a/cpp/internal/cuvs_internal/neighbors/naive_knn.cuh
+++ b/cpp/internal/cuvs_internal/neighbors/naive_knn.cuh
@@ -14,7 +14,7 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
 #include <rmm/mr/per_device_resource.hpp>
-#include <rmm/resource_ref.hpp>
+#include <rmm/resource.hpp>
 
 namespace cuvs::neighbors {
 
@@ -88,7 +88,7 @@ void naive_knn(raft::resources const& handle,
                uint32_t k,
                cuvs::distance::DistanceType type)
 {
-  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource_ref();
+  rmm::device_async_resource mr = rmm::mr::get_current_device_resource();
 
   auto stream = raft::resource::get_cuda_stream(handle);
   dim3 block_dim(16, 32, 1);
diff --git a/cpp/src/cluster/detail/kmeans_balanced.cuh b/cpp/src/cluster/detail/kmeans_balanced.cuh
index 26ce110158..cd5c6f1aa0 100644
--- a/cpp/src/cluster/detail/kmeans_balanced.cuh
+++ b/cpp/src/cluster/detail/kmeans_balanced.cuh
@@ -36,7 +36,7 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_scalar.hpp>
 #include <rmm/mr/managed_memory_resource.hpp>
-#include <rmm/resource_ref.hpp>
+#include <rmm/resource.hpp>
 
 #include <thrust/gather.h>
 #include <thrust/iterator/transform_iterator.h>
@@ -83,7 +83,7 @@ inline std::enable_if_t<std::is_floating_point_v<MathT>> predict_core(
   const MathT* dataset_norm,
   IdxT n_rows,
   LabelT* labels,
-  rmm::device_async_resource_ref mr)
+  rmm::device_async_resource mr)
 {
   auto stream = raft::resource::get_cuda_stream(handle);
   switch (params.metric) {
@@ -253,7 +253,7 @@ void calc_centers_and_sizes(const raft::resources& handle,
                             const LabelT* labels,
                             bool reset_counters,
                             MappingOpT mapping_op,
-                            rmm::device_async_resource_ref mr)
+                            rmm::device_async_resource mr)
 {
   auto stream = raft::resource::get_cuda_stream(handle);
 
@@ -316,12 +316,12 @@ void compute_norm(const raft::resources& handle,
                   IdxT n_rows,
                   MappingOpT mapping_op,
                   FinOpT norm_fin_op,
-                  std::optional<rmm::device_async_resource_ref> mr = std::nullopt)
+                  std::optional<rmm::device_async_resource> mr = std::nullopt)
 {
   raft::common::nvtx::range<cuvs::common::nvtx::domain::cuvs> fun_scope("compute_norm");
   auto stream = raft::resource::get_cuda_stream(handle);
   rmm::device_uvector<MathT> mapped_dataset(
-    0, stream, mr.value_or(raft::resource::get_workspace_resource_ref(handle)));
+    0, stream, mr.value_or(raft::resource::get_workspace_resource(handle)));
 
   const MathT* dataset_ptr = nullptr;
 
@@ -377,13 +377,13 @@ void predict(const raft::resources& handle,
              IdxT n_rows,
              LabelT* labels,
              MappingOpT mapping_op,
-             std::optional<rmm::device_async_resource_ref> mr = std::nullopt,
-             const MathT* dataset_norm                        = nullptr)
+             std::optional<rmm::device_async_resource> mr = std::nullopt,
+             const MathT* dataset_norm                    = nullptr)
 {
   auto stream = raft::resource::get_cuda_stream(handle);
   raft::common::nvtx::range<cuvs::common::nvtx::domain::cuvs> fun_scope(
     "predict(%zu, %u)", static_cast<size_t>(n_rows), n_clusters);
-  auto mem_res = mr.value_or(raft::resource::get_workspace_resource_ref(handle));
+  auto mem_res = mr.value_or(raft::resource::get_workspace_resource(handle));
   auto [max_minibatch_size, _mem_per_row] = calc_minibatch_size<MathT>(
     handle, n_clusters, n_rows, dim, params.metric, std::is_same_v<T, MathT>);
   rmm::device_uvector<MathT> cur_dataset(
@@ -551,7 +551,7 @@ auto adjust_centers(MathT* centers,
                     MathT threshold,
                     MappingOpT mapping_op,
                     rmm::cuda_stream_view stream,
-                    rmm::device_async_resource_ref device_memory) -> bool
+                    rmm::device_async_resource device_memory) -> bool
 {
   raft::common::nvtx::range<cuvs::common::nvtx::domain::cuvs> fun_scope(
     "adjust_centers(%zu, %u)", static_cast<size_t>(n_rows), n_clusters);
@@ -649,7 +649,7 @@ void balancing_em_iters(const raft::resources& handle,
                         uint32_t balancing_pullback,
                         MathT balancing_threshold,
                         MappingOpT mapping_op,
-                        rmm::device_async_resource_ref device_memory)
+                        rmm::device_async_resource device_memory)
 {
   auto stream                = raft::resource::get_cuda_stream(handle);
   uint32_t balancing_counter = balancing_pullback;
@@ -732,7 +732,7 @@ void build_clusters(const raft::resources& handle,
                     LabelT* cluster_labels,
                     CounterT* cluster_sizes,
                     MappingOpT mapping_op,
-                    rmm::device_async_resource_ref device_memory,
+                    rmm::device_async_resource device_memory,
                     const MathT* dataset_norm = nullptr)
 {
   auto stream = raft::resource::get_cuda_stream(handle);
@@ -874,8 +874,8 @@ auto build_fine_clusters(const raft::resources& handle,
                          IdxT fine_clusters_nums_max,
                          MathT* cluster_centers,
                          MappingOpT mapping_op,
-                         rmm::device_async_resource_ref managed_memory,
-                         rmm::device_async_resource_ref device_memory) -> IdxT
+                         rmm::device_async_resource managed_memory,
+                         rmm::device_async_resource device_memory) -> IdxT
 {
   auto stream = raft::resource::get_cuda_stream(handle);
   rmm::device_uvector<IdxT> mc_trainset_ids_buf(mesocluster_size_max, stream, managed_memory);
@@ -995,8 +995,8 @@ void build_hierarchical(const raft::resources& handle,
 
   // TODO: Remove the explicit managed memory- we shouldn't be creating this on the user's behalf.
   rmm::mr::managed_memory_resource managed_memory;
-  rmm::device_async_resource_ref device_memory = raft::resource::get_workspace_resource_ref(handle);
-  auto [max_minibatch_size, mem_per_row]       = calc_minibatch_size<MathT>(
+  rmm::device_async_resource device_memory = raft::resource::get_workspace_resource(handle);
+  auto [max_minibatch_size, mem_per_row]   = calc_minibatch_size<MathT>(
     handle, n_clusters, n_rows, dim, params.metric, std::is_same_v<T, MathT>);
 
   // Precompute the L2 norm of the dataset if relevant and not yet computed.
diff --git a/cpp/src/cluster/kmeans_balanced.cuh b/cpp/src/cluster/kmeans_balanced.cuh
index f3f52c2d8f..0c0df03397 100644
--- a/cpp/src/cluster/kmeans_balanced.cuh
+++ b/cpp/src/cluster/kmeans_balanced.cuh
@@ -154,7 +154,7 @@ void predict(const raft::resources& handle,
                                          X.extent(0),
                                          labels.data_handle(),
                                          mapping_op,
-                                         raft::resource::get_workspace_resource_ref(handle));
+                                         raft::resource::get_workspace_resource(handle));
 }
 
 namespace helpers {
@@ -305,7 +305,7 @@ void calc_centers_and_sizes(const raft::resources& handle,
     labels.data_handle(),
     reset_counters,
     mapping_op,
-    raft::resource::get_workspace_resource_ref(handle));
+    raft::resource::get_workspace_resource(handle));
 }
 
 }  // namespace helpers
diff --git a/cpp/src/cluster/kmeans_balanced_build_clusters_impl.cuh b/cpp/src/cluster/kmeans_balanced_build_clusters_impl.cuh
index 2bce856c6c..e1ace274e2 100644
--- a/cpp/src/cluster/kmeans_balanced_build_clusters_impl.cuh
+++ b/cpp/src/cluster/kmeans_balanced_build_clusters_impl.cuh
@@ -68,7 +68,7 @@ void build_clusters(const raft::resources& handle,
     labels.data_handle(),
     cluster_sizes.data_handle(),
     mapping_op,
-    raft::resource::get_workspace_resource_ref(handle),
+    raft::resource::get_workspace_resource(handle),
     X_norm.has_value() ? X_norm.value().data_handle() : nullptr);
 }
 
diff --git a/cpp/src/distance/detail/masked_nn.cuh b/cpp/src/distance/detail/masked_nn.cuh
index a3a187e8c4..4c2ac0a993 100644
--- a/cpp/src/distance/detail/masked_nn.cuh
+++ b/cpp/src/distance/detail/masked_nn.cuh
@@ -246,8 +246,8 @@ void masked_l2_nn_impl(raft::resources const& handle,
   static_assert(P::Mblk == 64, "masked_l2_nn_impl only supports a policy with 64 rows per block.");
 
   // Get stream and workspace memory resource
-  rmm::device_async_resource_ref ws_mr = raft::resource::get_workspace_resource_ref(handle);
-  auto stream                          = raft::resource::get_cuda_stream(handle);
+  rmm::device_async_resource ws_mr = raft::resource::get_workspace_resource(handle);
+  auto stream                      = raft::resource::get_cuda_stream(handle);
 
   // Acquire temporary buffers and initialize to zero:
   // 1) Adjacency matrix bitfield
diff --git a/cpp/src/neighbors/composite/index.cu b/cpp/src/neighbors/composite/index.cu
index 7c02d0e43e..57bd0816eb 100644
--- a/cpp/src/neighbors/composite/index.cu
+++ b/cpp/src/neighbors/composite/index.cu
@@ -40,7 +40,7 @@ void composite_index<T, IdxT, OutputIdxT>::search(
   size_t buffer_size = num_queries * K * num_indices;
 
   auto main_stream = raft::resource::get_cuda_stream(handle);
-  auto tmp_res     = raft::resource::get_workspace_resource_ref(handle);
+  auto tmp_res     = raft::resource::get_workspace_resource(handle);
 
   rmm::device_uvector<out_index_type> neighbors_buffer(buffer_size, main_stream, tmp_res);
   rmm::device_uvector<float> distances_buffer(buffer_size, main_stream, tmp_res);
diff --git a/cpp/src/neighbors/detail/ann_utils.cuh b/cpp/src/neighbors/detail/ann_utils.cuh
index bdb2c310c6..10aee6dee0 100644
--- a/cpp/src/neighbors/detail/ann_utils.cuh
+++ b/cpp/src/neighbors/detail/ann_utils.cuh
@@ -21,7 +21,7 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_scalar.hpp>
 #include <rmm/device_uvector.hpp>
-#include <rmm/resource_ref.hpp>
+#include <rmm/resource.hpp>
 
 #include <cuda_fp16.hpp>
 
@@ -593,7 +593,7 @@ struct batch_load_iterator {
           MdspanT input_view,
           size_type batch_size,
           rmm::cuda_stream_view copy_stream,
-          rmm::device_async_resource_ref mr,
+          rmm::device_async_resource mr,
           bool prefetch,
           bool initialize,
           bool host_writeback)
@@ -853,7 +853,7 @@ struct batch_load_iterator {
                       MdspanT input_view,
                       size_type batch_size,
                       rmm::cuda_stream_view copy_stream,
-                      rmm::device_async_resource_ref mr,
+                      rmm::device_async_resource mr,
                       bool prefetch       = false,
                       bool initialize     = true,
                       bool host_writeback = false)
@@ -864,7 +864,7 @@ struct batch_load_iterator {
   {
   }
 
-  /** Convenience overload that uses `get_workspace_resource_ref(res)` as the memory resource. */
+  /** Convenience overload that uses `get_workspace_resource(res)` as the memory resource. */
   batch_load_iterator(raft::resources const& res,
                       MdspanT input_view,
                       size_type batch_size,
@@ -876,7 +876,7 @@ struct batch_load_iterator {
                           input_view,
                           batch_size,
                           copy_stream,
-                          raft::resource::get_workspace_resource_ref(res),
+                          raft::resource::get_workspace_resource(res),
                           prefetch,
                           initialize,
                           host_writeback)
@@ -1018,7 +1018,7 @@ class batch_load_iterator_dyn {
                           IdxT row_width,
                           size_type batch_size,
                           rmm::cuda_stream_view copy_stream,
-                          rmm::device_async_resource_ref mr,
+                          rmm::device_async_resource mr,
                           bool prefetch       = false,
                           bool initialize     = true,
                           bool host_writeback = false)
@@ -1036,7 +1036,7 @@ class batch_load_iterator_dyn {
   {
   }
 
-  /** Convenience overload that uses `get_workspace_resource_ref(res)` as the memory resource. */
+  /** Convenience overload that uses `get_workspace_resource(res)` as the memory resource. */
   batch_load_iterator_dyn(raft::resources const& res,
                           T* ptr,
                           IdxT n_rows,
@@ -1052,7 +1052,7 @@ class batch_load_iterator_dyn {
                               row_width,
                               batch_size,
                               copy_stream,
-                              raft::resource::get_workspace_resource_ref(res),
+                              raft::resource::get_workspace_resource(res),
                               prefetch,
                               initialize,
                               host_writeback)
@@ -1149,7 +1149,7 @@ class batch_load_iterator_dyn {
                         IdxT row_width,
                         size_type batch_size,
                         rmm::cuda_stream_view copy_stream,
-                        rmm::device_async_resource_ref mr,
+                        rmm::device_async_resource mr,
                         bool prefetch,
                         bool initialize,
                         bool host_writeback) -> std::variant<HostIter, DeviceIter>
@@ -1212,7 +1212,7 @@ auto make_batch_load_iterator(raft::resources const& res,
                               detail::type_identity_t<IdxT> row_width,
                               size_t batch_size,
                               rmm::cuda_stream_view copy_stream,
-                              rmm::device_async_resource_ref mr,
+                              rmm::device_async_resource mr,
                               bool prefetch       = false,
                               bool initialize     = true,
                               bool host_writeback = false) -> batch_load_iterator_dyn<T, IdxT>
@@ -1229,7 +1229,7 @@ auto make_batch_load_iterator(raft::resources const& res,
                                           host_writeback);
 }
 
-/** Convenience overload that uses `get_workspace_resource_ref(res)` as the memory resource. */
+/** Convenience overload that uses `get_workspace_resource(res)` as the memory resource. */
 template <typename T, typename IdxT = int64_t>
 auto make_batch_load_iterator(raft::resources const& res,
                               T const* ptr,
@@ -1247,7 +1247,7 @@ auto make_batch_load_iterator(raft::resources const& res,
                                            row_width,
                                            batch_size,
                                            copy_stream,
-                                           raft::resource::get_workspace_resource_ref(res),
+                                           raft::resource::get_workspace_resource(res),
                                            prefetch,
                                            initialize,
                                            host_writeback);
diff --git a/cpp/src/neighbors/detail/cagra/add_nodes.cuh b/cpp/src/neighbors/detail/cagra/add_nodes.cuh
index 5d0a6654e9..169a667776 100644
--- a/cpp/src/neighbors/detail/cagra/add_nodes.cuh
+++ b/cpp/src/neighbors/detail/cagra/add_nodes.cuh
@@ -70,7 +70,7 @@ void add_node_core(
   params.itopk_size = std::max(base_degree * 2lu, 256lu);
 
   // Memory space for rank-based neighbor list
-  auto mr = raft::resource::get_workspace_resource_ref(handle);
+  auto mr = raft::resource::get_workspace_resource(handle);
 
   auto neighbor_indices = raft::make_device_mdarray<IdxT, std::int64_t>(
     handle, mr, raft::make_extents<std::int64_t>(max_search_batch_size, base_degree));
diff --git a/cpp/src/neighbors/detail/cagra/cagra_build.cuh b/cpp/src/neighbors/detail/cagra/cagra_build.cuh
index 96ff8344d3..f5221724ea 100644
--- a/cpp/src/neighbors/detail/cagra/cagra_build.cuh
+++ b/cpp/src/neighbors/detail/cagra/cagra_build.cuh
@@ -31,7 +31,7 @@
 // TODO: This shouldn't be calling spatial/knn APIs
 #include "../ann_utils.cuh"
 
-#include <rmm/resource_ref.hpp>
+#include <rmm/resource.hpp>
 
 #include <chrono>
 #include <cstdio>
@@ -1700,9 +1700,9 @@ void build_knn_graph(
   }
 
   // If the workspace is smaller than desired, put the I/O buffers into the large workspace.
-  rmm::device_async_resource_ref workspace_mr =
-    use_large_workspace ? raft::resource::get_large_workspace_resource_ref(res)
-                        : raft::resource::get_workspace_resource_ref(res);
+  rmm::device_async_resource workspace_mr = use_large_workspace
+                                              ? raft::resource::get_large_workspace_resource(res)
+                                              : raft::resource::get_workspace_resource(res);
 
   RAFT_LOG_DEBUG(
     "IVF-PQ search node_degree: %d, top_k: %d,  gpu_top_k: %d,  max_batch_size:: %d, n_probes: %u",
@@ -2123,7 +2123,7 @@ auto iterative_build_graph(
       static_cast<int64_t>(dev_query_view.extent(1)),
       max_chunk_size,
       raft::resource::get_cuda_stream(res),
-      raft::resource::get_workspace_resource_ref(res));
+      raft::resource::get_workspace_resource(res));
     for (const auto& batch : query_batch) {
       auto batch_dev_query_view = raft::make_device_matrix_view<const T, int64_t>(
         batch.data(), batch.size(), dev_query_view.extent(1));
diff --git a/cpp/src/neighbors/detail/cagra/cagra_merge.cuh b/cpp/src/neighbors/detail/cagra/cagra_merge.cuh
index 1dd4cbe075..fcce6f691c 100644
--- a/cpp/src/neighbors/detail/cagra/cagra_merge.cuh
+++ b/cpp/src/neighbors/detail/cagra/cagra_merge.cuh
@@ -21,7 +21,7 @@
 #include <cuvs/neighbors/ivf_pq.hpp>
 #include <cuvs/neighbors/refine.hpp>
 
-#include <rmm/resource_ref.hpp>
+#include <rmm/resource.hpp>
 
 #include <chrono>
 #include <cstdio>
diff --git a/cpp/src/neighbors/detail/cagra/graph_core.cuh b/cpp/src/neighbors/detail/cagra/graph_core.cuh
index 0e2ad6d769..f478cd3fa2 100644
--- a/cpp/src/neighbors/detail/cagra/graph_core.cuh
+++ b/cpp/src/neighbors/detail/cagra/graph_core.cuh
@@ -851,7 +851,7 @@ void merge_graph_gpu(
 
   namespace bli                       = cuvs::spatial::knn::detail::utils;
   auto [copy_stream, enable_prefetch] = bli::get_prefetch_stream(res);
-  auto workspace_mr                   = raft::resource::get_workspace_resource_ref(res);
+  auto workspace_mr                   = raft::resource::get_workspace_resource(res);
 
   bli::batch_load_iterator<
     raft::mdspan<IdxT, raft::matrix_extent<int64_t>, raft::row_major, AccessorOutputGraph>>
@@ -1001,7 +1001,7 @@ void sort_knn_graph(
   const uint64_t input_graph_degree = knn_graph.extent(1);
   IdxT* const input_graph_ptr       = knn_graph.data_handle();
 
-  auto large_tmp_mr = raft::resource::get_large_workspace_resource_ref(res);
+  auto large_tmp_mr = raft::resource::get_large_workspace_resource(res);
 
   auto d_input_graph = raft::make_device_mdarray<IdxT>(
     res, large_tmp_mr, raft::make_extents<int64_t>(graph_size, input_graph_degree));
@@ -1625,7 +1625,7 @@ void prune_graph_gpu(
 
   namespace bli                       = cuvs::spatial::knn::detail::utils;
   auto [copy_stream, enable_prefetch] = bli::get_prefetch_stream(res);
-  auto workspace_mr                   = raft::resource::get_workspace_resource_ref(res);
+  auto workspace_mr                   = raft::resource::get_workspace_resource(res);
 
   // Single-batch read-only iterator for the input graph (graph_size rows fit in one batch).
   bli::batch_load_iterator<
@@ -1722,9 +1722,9 @@ void optimize(
   raft::resources res{res_const};
 
   // large temporary memory for large arrays, e.g. everything >= O(graph_size)
-  auto large_tmp_mr = raft::resource::get_large_workspace_resource_ref(res);
+  auto large_tmp_mr = raft::resource::get_large_workspace_resource(res);
   // temporary memory for small arrays, e.g. everything <= O(batchsize * graph_degree)
-  auto default_ws_mr = raft::resource::get_workspace_resource_ref(res);
+  auto default_ws_mr = raft::resource::get_workspace_resource(res);
 
   // create a stream pool if not already present
   if (!res.has_resource_factory(raft::resource::resource_type::CUDA_STREAM_POOL) ||
diff --git a/cpp/src/neighbors/detail/cagra/search_plan.cuh b/cpp/src/neighbors/detail/cagra/search_plan.cuh
index 8a9d79e177..f7a5d15a6d 100644
--- a/cpp/src/neighbors/detail/cagra/search_plan.cuh
+++ b/cpp/src/neighbors/detail/cagra/search_plan.cuh
@@ -38,8 +38,8 @@ namespace cuvs::neighbors::cagra::detail {
 template <typename T>
 struct lightweight_uvector {
  private:
-  using raft_res_type = const raft::resources*;
-  using rmm_res_type  = std::tuple<rmm::device_async_resource_ref, rmm::cuda_stream_view>;
+  using raft_res_type            = const raft::resources*;
+  using rmm_res_type             = std::tuple<rmm::device_async_resource, rmm::cuda_stream_view>;
   static constexpr size_t kAlign = 256;
 
   std::variant<raft_res_type, rmm_res_type> res_;
@@ -58,7 +58,7 @@ struct lightweight_uvector {
     if (new_size == size_) { return; }
     if (std::holds_alternative<raft_res_type>(res_)) {
       auto& h = std::get<raft_res_type>(res_);
-      res_    = rmm_res_type{raft::resource::get_workspace_resource_ref(*h),
+      res_    = rmm_res_type{raft::resource::get_workspace_resource(*h),
                           raft::resource::get_cuda_stream(*h)};
     }
     auto& [r, s] = std::get<rmm_res_type>(res_);
@@ -80,7 +80,7 @@ struct lightweight_uvector {
     if (new_size == size_) { return; }
     if (std::holds_alternative<raft_res_type>(res_)) {
       auto& h = std::get<raft_res_type>(res_);
-      res_    = rmm_res_type{raft::resource::get_workspace_resource_ref(*h), stream};
+      res_    = rmm_res_type{raft::resource::get_workspace_resource(*h), stream};
     } else {
       std::get<rmm::cuda_stream_view>(std::get<rmm_res_type>(res_)) = stream;
     }
diff --git a/cpp/src/neighbors/detail/cagra/utils.hpp b/cpp/src/neighbors/detail/cagra/utils.hpp
index 58bf68bb43..16e84a5d95 100644
--- a/cpp/src/neighbors/detail/cagra/utils.hpp
+++ b/cpp/src/neighbors/detail/cagra/utils.hpp
@@ -18,7 +18,7 @@
 #include <raft/util/cudart_utils.hpp>
 #include <raft/util/integer_utils.hpp>
 #include <rmm/cuda_stream_pool.hpp>
-#include <rmm/resource_ref.hpp>
+#include <rmm/resource.hpp>
 
 #include <cuda.h>
 #include <cuda_fp16.h>
@@ -190,7 +190,7 @@ class device_matrix_view_from_host {
       // live on stack and not returned to a user.
       // The user may opt to set this resource to managed memory to allow large allocations.
       device_mem_.emplace(raft::make_device_mdarray<T, IdxT>(
-        res, raft::resource::get_large_workspace_resource_ref(res), host_view.extents()));
+        res, raft::resource::get_large_workspace_resource(res), host_view.extents()));
       raft::copy(res, device_mem_->view(), host_view);
       device_ptr = device_mem_->data_handle();
     }
@@ -276,7 +276,7 @@ void copy_with_padding(
   raft::resources const& res,
   raft::device_matrix<T, int64_t, raft::row_major>& dst,
   raft::mdspan<const T, raft::matrix_extent<int64_t>, raft::row_major, data_accessor> src,
-  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource_ref())
+  rmm::device_async_resource mr = rmm::mr::get_current_device_resource())
 {
   size_t padded_dim = raft::round_up_safe<size_t>(src.extent(1) * sizeof(T), 16) / sizeof(T);
 
diff --git a/cpp/src/neighbors/detail/vamana/greedy_search.cuh b/cpp/src/neighbors/detail/vamana/greedy_search.cuh
index 4e71c1189c..4bdbd4695f 100644
--- a/cpp/src/neighbors/detail/vamana/greedy_search.cuh
+++ b/cpp/src/neighbors/detail/vamana/greedy_search.cuh
@@ -14,7 +14,7 @@
 
 #include <cuvs/distance/distance.hpp>
 #include <raft/util/warp_primitives.cuh>
-#include <rmm/resource_ref.hpp>
+#include <rmm/resource.hpp>
 
 #include <chrono>
 #include <cstdio>
diff --git a/cpp/src/neighbors/detail/vamana/vamana_build.cuh b/cpp/src/neighbors/detail/vamana/vamana_build.cuh
index 336d81215b..ecfa809190 100644
--- a/cpp/src/neighbors/detail/vamana/vamana_build.cuh
+++ b/cpp/src/neighbors/detail/vamana/vamana_build.cuh
@@ -143,7 +143,7 @@ void batched_insert_vamana(
   auto query_ids      = raft::make_device_vector<IdxT>(res, max_batchsize);
   auto query_list_ptr = raft::make_device_mdarray<QueryCandidates<IdxT, accT>>(
     res,
-    raft::resource::get_large_workspace_resource_ref(res),
+    raft::resource::get_large_workspace_resource(res),
     raft::make_extents<int64_t>(max_batchsize + 1));
   QueryCandidates<IdxT, accT>* query_list =
     static_cast<QueryCandidates<IdxT, accT>*>(query_list_ptr.data_handle());
@@ -151,11 +151,11 @@ void batched_insert_vamana(
   // Results of each batch of inserts during build - Memory is used by query_list structure
   auto visited_ids =
     raft::make_device_mdarray<IdxT>(res,
-                                    raft::resource::get_large_workspace_resource_ref(res),
+                                    raft::resource::get_large_workspace_resource(res),
                                     raft::make_extents<int64_t>(max_batchsize, visited_size));
   auto visited_dists =
     raft::make_device_mdarray<accT>(res,
-                                    raft::resource::get_large_workspace_resource_ref(res),
+                                    raft::resource::get_large_workspace_resource(res),
                                     raft::make_extents<int64_t>(max_batchsize, visited_size));
 
   // Assign memory to query_list structures and initialize
@@ -167,14 +167,14 @@ void batched_insert_vamana(
                                                                     1);
   auto topk_pq_mem =
     raft::make_device_mdarray<Node<accT>>(res,
-                                          raft::resource::get_large_workspace_resource_ref(res),
+                                          raft::resource::get_large_workspace_resource(res),
                                           raft::make_extents<int64_t>(max_batchsize, visited_size));
 
   int align_padding = raft::alignTo(dim, 16) - dim;
 
   auto s_coords_mem = raft::make_device_mdarray<T>(
     res,
-    raft::resource::get_large_workspace_resource_ref(res),
+    raft::resource::get_large_workspace_resource(res),
     raft::make_extents<int64_t>(min(maxBlocks, max(max_batchsize, reverse_batch)),
                                 dim + align_padding));
 
@@ -318,7 +318,7 @@ void batched_insert_vamana(
 
     // compute prefix sums of query_list sizes - TODO parallelize prefix sums
     //    auto d_total_edges = raft::make_device_mdarray<int>(
-    //      res, raft::resource::get_workspace_resource_ref(res), raft::make_extents<int64_t>(1));
+    //      res, raft::resource::get_workspace_resource(res), raft::make_extents<int64_t>(1));
     rmm::device_scalar<int> d_total_edges(stream);
     prefix_sums_sizes<accT, IdxT><<<1, 1, 0, stream>>>(query_list, step_size, d_total_edges.data());
     RAFT_CUDA_TRY(cudaPeekAtLastError());
@@ -329,16 +329,16 @@ void batched_insert_vamana(
 
     auto edge_dist_pair = raft::make_device_mdarray<DistPair<IdxT, accT>>(
       res,
-      raft::resource::get_large_workspace_resource_ref(res),
+      raft::resource::get_large_workspace_resource(res),
       raft::make_extents<int64_t>(total_edges));
 
     auto edge_dest =
       raft::make_device_mdarray<IdxT>(res,
-                                      raft::resource::get_large_workspace_resource_ref(res),
+                                      raft::resource::get_large_workspace_resource(res),
                                       raft::make_extents<int64_t>(total_edges));
     auto edge_src =
       raft::make_device_mdarray<IdxT>(res,
-                                      raft::resource::get_large_workspace_resource_ref(res),
+                                      raft::resource::get_large_workspace_resource(res),
                                       raft::make_extents<int64_t>(total_edges));
 
     // Create reverse edge list
@@ -367,7 +367,7 @@ void batched_insert_vamana(
 
       auto temp_sort_storage = raft::make_device_mdarray<IdxT>(
         res,
-        raft::resource::get_large_workspace_resource_ref(res),
+        raft::resource::get_large_workspace_resource(res),
         raft::make_extents<int64_t>(temp_storage_bytes / sizeof(IdxT)));
 
       // Sort to group reverse edges by destination
@@ -406,7 +406,7 @@ void batched_insert_vamana(
 
     auto temp_sort_storage = raft::make_device_mdarray<IdxT>(
       res,
-      raft::resource::get_large_workspace_resource_ref(res),
+      raft::resource::get_large_workspace_resource(res),
       raft::make_extents<int64_t>(temp_storage_bytes / sizeof(IdxT)));
 
     // Sort to group reverse edges by destination
@@ -451,16 +451,16 @@ void batched_insert_vamana(
       // Allocate reverse QueryCandidate list based on number of unique destinations
       auto reverse_list_ptr = raft::make_device_mdarray<QueryCandidates<IdxT, accT>>(
         res,
-        raft::resource::get_large_workspace_resource_ref(res),
+        raft::resource::get_large_workspace_resource(res),
         raft::make_extents<int64_t>(reverse_batch));
       auto rev_ids =
         raft::make_device_mdarray<IdxT>(res,
-                                        raft::resource::get_large_workspace_resource_ref(res),
+                                        raft::resource::get_large_workspace_resource(res),
                                         raft::make_extents<int64_t>(reverse_batch, visited_size));
 
       auto rev_dists =
         raft::make_device_mdarray<accT>(res,
-                                        raft::resource::get_large_workspace_resource_ref(res),
+                                        raft::resource::get_large_workspace_resource(res),
                                         raft::make_extents<int64_t>(reverse_batch, visited_size));
 
       QueryCandidates<IdxT, accT>* reverse_list =
@@ -663,7 +663,7 @@ index<T, IdxT> build(
       static_cast<int64_t>(dim),
       static_cast<size_t>(max_batch_size),
       raft::resource::get_cuda_stream(res),
-      raft::resource::get_workspace_resource_ref(res));
+      raft::resource::get_workspace_resource(res));
     for (const auto& batch : _vamana_batches) {
       // perform rotation
       auto dataset_rotated = raft::make_device_matrix<float, int64_t>(res, batch.size(), dim);
diff --git a/cpp/src/neighbors/detail/vpq_dataset.cuh b/cpp/src/neighbors/detail/vpq_dataset.cuh
index ec4a684274..c1136db645 100644
--- a/cpp/src/neighbors/detail/vpq_dataset.cuh
+++ b/cpp/src/neighbors/detail/vpq_dataset.cuh
@@ -513,7 +513,7 @@ void process_and_fill_codes(
     static_cast<ix_t>(dim),
     static_cast<size_t>(max_batch_size),
     stream,
-    rmm::mr::get_current_device_resource_ref());
+    rmm::mr::get_current_device_resource());
   for (const auto& batch : _vpq_batches_codes) {
     auto batch_view        = raft::make_device_matrix_view(batch.data(), ix_t(batch.size()), dim);
     auto batch_labels_view = raft::make_device_vector_view<label_t, IdxT>(nullptr, 0);
@@ -910,7 +910,7 @@ void process_and_fill_codes_subspaces(
     static_cast<ix_t>(dim),
     static_cast<size_t>(max_batch_size),
     copy_stream,
-    raft::resource::get_workspace_resource_ref(res),
+    raft::resource::get_workspace_resource(res),
     enable_prefetch_stream);
   vec_batches.prefetch_next_batch();
   for (const auto& batch : vec_batches) {
diff --git a/cpp/src/neighbors/ivf_common.cu b/cpp/src/neighbors/ivf_common.cu
index b87a14f7c3..a7a1a2823a 100644
--- a/cpp/src/neighbors/ivf_common.cu
+++ b/cpp/src/neighbors/ivf_common.cu
@@ -79,7 +79,7 @@ void sort_cluster_sizes_descending(uint32_t* input,
                                    uint32_t* output,
                                    uint32_t n_lists,
                                    rmm::cuda_stream_view stream,
-                                   rmm::device_async_resource_ref tmp_res)
+                                   rmm::device_async_resource tmp_res)
 {
   int begin_bit             = 0;
   int end_bit               = sizeof(uint32_t) * 8;
diff --git a/cpp/src/neighbors/ivf_common.cuh b/cpp/src/neighbors/ivf_common.cuh
index e466a13fd7..b8fb54ca60 100644
--- a/cpp/src/neighbors/ivf_common.cuh
+++ b/cpp/src/neighbors/ivf_common.cuh
@@ -20,7 +20,7 @@ void sort_cluster_sizes_descending(uint32_t* input,
                                    uint32_t* output,
                                    uint32_t n_lists,
                                    rmm::cuda_stream_view stream,
-                                   rmm::device_async_resource_ref tmp_res);
+                                   rmm::device_async_resource tmp_res);
 
 /**
  * Default value returned by `search` when the `n_probes` is too small and top-k is too large.
@@ -256,7 +256,7 @@ template <typename Index>
 void recompute_internal_state(const raft::resources& res, Index& index)
 {
   auto stream  = raft::resource::get_cuda_stream(res);
-  auto tmp_res = raft::resource::get_workspace_resource_ref(res);
+  auto tmp_res = raft::resource::get_workspace_resource(res);
   rmm::device_uvector<uint32_t> sorted_sizes(index.n_lists(), stream, tmp_res);
 
   // Actualize the list pointers
diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh
index fffe5134ae..4165bd1087 100644
--- a/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh
+++ b/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh
@@ -180,10 +180,8 @@ void extend(raft::resources const& handle,
   RAFT_EXPECTS(new_indices != nullptr || index->size() == 0,
                "You must pass data indices when the index is non-empty.");
 
-  auto new_labels =
-    raft::make_device_mdarray<LabelT>(handle,
-                                      raft::resource::get_large_workspace_resource_ref(handle),
-                                      raft::make_extents<IdxT>(n_rows));
+  auto new_labels = raft::make_device_mdarray<LabelT>(
+    handle, raft::resource::get_large_workspace_resource(handle), raft::make_extents<IdxT>(n_rows));
   cuvs::cluster::kmeans::balanced_params kmeans_params;
   kmeans_params.metric = index->metric();
   auto orig_centroids_view =
@@ -210,7 +208,7 @@ void extend(raft::resources const& handle,
                                        IdxT{index->dim()},
                                        max_batch_size,
                                        copy_stream,
-                                       raft::resource::get_workspace_resource_ref(handle),
+                                       raft::resource::get_workspace_resource(handle),
                                        enable_prefetch);
   vec_batches.prefetch_next_batch();
 
@@ -229,7 +227,7 @@ void extend(raft::resources const& handle,
 
   auto* list_sizes_ptr    = index->list_sizes().data_handle();
   auto old_list_sizes_dev = raft::make_device_mdarray<uint32_t>(
-    handle, raft::resource::get_workspace_resource_ref(handle), raft::make_extents<IdxT>(n_lists));
+    handle, raft::resource::get_workspace_resource(handle), raft::make_extents<IdxT>(n_lists));
   raft::copy(handle,
              old_list_sizes_dev.view(),
              raft::make_device_vector_view<const uint32_t, IdxT>(list_sizes_ptr, n_lists));
@@ -305,7 +303,7 @@ void extend(raft::resources const& handle,
                                           IdxT{1},
                                           max_batch_size,
                                           stream,
-                                          raft::resource::get_workspace_resource_ref(handle));
+                                          raft::resource::get_workspace_resource(handle));
   vec_batches.reset();
   vec_batches.prefetch_next_batch();
   auto idx_batch            = vec_indices.begin();
@@ -420,7 +418,7 @@ inline auto build(raft::resources const& handle,
       1, n_rows / std::max<size_t>(params.kmeans_trainset_fraction * n_rows, index.n_lists()));
     auto n_rows_train = n_rows / trainset_ratio;
     rmm::device_uvector<T> trainset(
-      n_rows_train * index.dim(), stream, raft::resource::get_large_workspace_resource_ref(handle));
+      n_rows_train * index.dim(), stream, raft::resource::get_large_workspace_resource(handle));
     // TODO: a proper sampling
     raft::copy_matrix(trainset.data(),
                       index.dim(),
@@ -479,7 +477,7 @@ inline void fill_refinement_index(raft::resources const& handle,
     "ivf_flat::fill_refinement_index(%zu, %u)", size_t(n_queries));
 
   rmm::device_uvector<LabelT> new_labels(
-    n_queries * n_candidates, stream, raft::resource::get_workspace_resource_ref(handle));
+    n_queries * n_candidates, stream, raft::resource::get_workspace_resource(handle));
   auto new_labels_view =
     raft::make_device_vector_view<LabelT, IdxT>(new_labels.data(), n_queries * n_candidates);
   raft::linalg::map_offset(
diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh
index 960d48c818..6831c77d8c 100644
--- a/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh
+++ b/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh
@@ -24,7 +24,7 @@
 #include <raft/linalg/norm.cuh>     // raft::linalg::norm
 #include <raft/matrix/detail/select_warpsort.cuh>
 
-#include <rmm/resource_ref.hpp>
+#include <rmm/resource.hpp>
 
 #include <type_traits>
 
@@ -51,7 +51,7 @@ void search_impl(raft::resources const& handle,
                  bool select_min,
                  IdxT* neighbors,
                  AccT* distances,
-                 rmm::device_async_resource_ref search_mr,
+                 rmm::device_async_resource search_mr,
                  IvfSampleFilterT sample_filter)
 {
   auto stream = raft::resource::get_cuda_stream(handle);
@@ -355,22 +355,21 @@ inline void search_with_filtering(raft::resources const& handle,
   for (uint32_t offset_q = 0; offset_q < n_queries; offset_q += max_queries) {
     uint32_t queries_batch = raft::min(max_queries, n_queries - offset_q);
 
-    search_impl<T, float, IdxT, IvfSampleFilterT>(
-      handle,
-      index,
-      effective_metric,
-      params,
-      queries + offset_q * index.dim(),
-      queries_batch,
-      offset_q,
-      k,
-      n_probes,
-      max_samples,
-      cuvs::distance::is_min_close(effective_metric),
-      neighbors + offset_q * k,
-      distances + offset_q * k,
-      raft::resource::get_workspace_resource_ref(handle),
-      sample_filter);
+    search_impl<T, float, IdxT, IvfSampleFilterT>(handle,
+                                                  index,
+                                                  effective_metric,
+                                                  params,
+                                                  queries + offset_q * index.dim(),
+                                                  queries_batch,
+                                                  offset_q,
+                                                  k,
+                                                  n_probes,
+                                                  max_samples,
+                                                  cuvs::distance::is_min_close(effective_metric),
+                                                  neighbors + offset_q * k,
+                                                  distances + offset_q * k,
+                                                  raft::resource::get_workspace_resource(handle),
+                                                  sample_filter);
   }
 }
 
diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh b/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh
index c562ca9e00..a668fa3a4b 100644
--- a/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh
+++ b/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh
@@ -91,7 +91,7 @@ void select_residuals(raft::resources const& handle,
                       const float* center,           // [dim]
                       const T* dataset,              // [.., dim]
                       const IdxT* row_ids,           // [n_rows]
-                      rmm::device_async_resource_ref device_memory
+                      rmm::device_async_resource device_memory
 
 )
 {
@@ -147,7 +147,7 @@ void flat_compute_residuals(
   raft::device_matrix_view<const float, uint32_t, raft::row_major> centers,  // [n_lists, dim_ext]
   const T* dataset,                                                          // [n_rows, dim]
   std::variant<uint32_t, const uint32_t*> labels,                            // [n_rows]
-  rmm::device_async_resource_ref device_memory,
+  rmm::device_async_resource device_memory,
   cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Expanded)
 {
   auto stream  = raft::resource::get_cuda_stream(handle);
@@ -334,7 +334,7 @@ void train_per_subset(raft::resources const& handle,
                       uint32_t max_train_points_per_pq_code)
 {
   auto stream        = raft::resource::get_cuda_stream(handle);
-  auto device_memory = raft::resource::get_workspace_resource_ref(handle);
+  auto device_memory = raft::resource::get_workspace_resource(handle);
 
   rmm::device_uvector<float> pq_centers_tmp(impl->pq_centers().size(), stream, device_memory);
   // Subsampling the train set for codebook generation based on max_train_points_per_pq_code.
@@ -416,7 +416,7 @@ void train_per_cluster(raft::resources const& handle,
                        uint32_t max_train_points_per_pq_code)
 {
   auto stream        = raft::resource::get_cuda_stream(handle);
-  auto device_memory = raft::resource::get_workspace_resource_ref(handle);
+  auto device_memory = raft::resource::get_workspace_resource(handle);
   // NB: Managed memory is used for small arrays accessed from both device and host. There's no
   // performance reasoning behind this, just avoiding the boilerplate of explicit copies.
   rmm::mr::managed_memory_resource managed_memory;
@@ -596,7 +596,7 @@ void reconstruct_list_data(raft::resources const& res,
 
   auto tmp =
     raft::make_device_mdarray<float>(res,
-                                     raft::resource::get_workspace_resource_ref(res),
+                                     raft::resource::get_workspace_resource(res),
                                      raft::make_extents<uint32_t>(n_rows, index.rot_dim()));
 
   constexpr uint32_t kBlockSize = 256;
@@ -623,7 +623,7 @@ void reconstruct_list_data(raft::resources const& res,
 
   float* out_float_ptr = nullptr;
   rmm::device_uvector<float> out_float_buf(
-    0, raft::resource::get_cuda_stream(res), raft::resource::get_workspace_resource_ref(res));
+    0, raft::resource::get_cuda_stream(res), raft::resource::get_workspace_resource(res));
   if constexpr (std::is_same_v<T, float>) {
     out_float_ptr = out_vectors.data_handle();
   } else {
@@ -706,7 +706,7 @@ void encode_list_data(raft::resources const& res,
   auto n_rows = new_vectors.extent(0);
   if (n_rows == 0) { return; }
 
-  auto mr = raft::resource::get_workspace_resource_ref(res);
+  auto mr = raft::resource::get_workspace_resource(res);
 
   auto new_vectors_residual = raft::make_device_mdarray<float>(
     res, mr, raft::make_extents<uint32_t>(n_rows, index->rot_dim()));
@@ -805,7 +805,7 @@ void process_and_fill_codes(raft::resources const& handle,
                             std::variant<IdxT, const IdxT*> src_offset_or_indices,
                             const uint32_t* new_labels,
                             IdxT n_rows,
-                            rmm::device_async_resource_ref mr)
+                            rmm::device_async_resource mr)
 {
   auto new_vectors_residual =
     raft::make_device_mdarray<float>(handle, mr, raft::make_extents<IdxT>(n_rows, index.rot_dim()));
@@ -997,9 +997,8 @@ void extend(raft::resources const& handle,
                   std::is_same_v<T, int8_t>,
                 "Unsupported data type");
 
-  rmm::device_async_resource_ref device_memory = raft::resource::get_workspace_resource_ref(handle);
-  rmm::device_async_resource_ref large_memory =
-    raft::resource::get_large_workspace_resource_ref(handle);
+  rmm::device_async_resource device_memory = raft::resource::get_workspace_resource(handle);
+  rmm::device_async_resource large_memory  = raft::resource::get_large_workspace_resource(handle);
 
   // Try to allocate an index with the same parameters and the projected new size
   // (which can be slightly larger than index->size() + n_rows, due to padding for interleaved).
@@ -1028,8 +1027,8 @@ void extend(raft::resources const& handle,
   // `large_workspace_resource`, which does not have the explicit allocation limit. The user may opt
   // to populate the `large_workspace_resource` memory resource with managed memory for easier
   // scaling.
-  rmm::device_async_resource_ref labels_mr  = device_memory;
-  rmm::device_async_resource_ref batches_mr = device_memory;
+  rmm::device_async_resource labels_mr  = device_memory;
+  rmm::device_async_resource batches_mr = device_memory;
   if (n_rows * (index->dim() * sizeof(T) + index->pq_dim() + sizeof(IdxT) + sizeof(uint32_t)) >
       free_mem) {
     labels_mr = large_memory;
@@ -1269,14 +1268,13 @@ auto build(raft::resources const& handle,
       size_t(n_rows) / std::max<size_t>(params.kmeans_trainset_fraction * n_rows, impl->n_lists()));
     size_t n_rows_train = n_rows / trainset_ratio;
 
-    rmm::device_async_resource_ref device_memory =
-      raft::resource::get_workspace_resource_ref(handle);
+    rmm::device_async_resource device_memory = raft::resource::get_workspace_resource(handle);
 
     // If the trainset is small enough to comfortably fit into device memory, put it there.
     // Otherwise, use the managed memory.
     constexpr size_t kTolerableRatio = 4;
-    rmm::device_async_resource_ref big_memory_resource =
-      raft::resource::get_large_workspace_resource_ref(handle);
+    rmm::device_async_resource big_memory_resource =
+      raft::resource::get_large_workspace_resource(handle);
     if (sizeof(float) * n_rows_train * impl->dim() * kTolerableRatio <
         raft::resource::get_workspace_free_bytes(handle)) {
       big_memory_resource = device_memory;
diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh b/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh
index 08fcd1f09a..7a5fd198f9 100644
--- a/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh
+++ b/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh
@@ -69,7 +69,7 @@ void select_clusters(raft::resources const& handle,
                      cuvs::distance::DistanceType metric,
                      const T* queries,              // [n_queries, dim]
                      const float* cluster_centers,  // [n_lists, dim_ext]
-                     rmm::device_async_resource_ref mr)
+                     rmm::device_async_resource mr)
 {
   raft::common::nvtx::range<cuvs::common::nvtx::domain::cuvs> fun_scope(
     "ivf_pq::search::select_clusters(n_probes = %u, n_queries = %u, n_lists = %u, dim = %u)",
@@ -179,7 +179,7 @@ void select_clusters(raft::resources const& handle,
                      cuvs::distance::DistanceType metric,
                      const T* queries,               // [n_queries, dim]
                      const int8_t* cluster_centers,  // [n_lists, dim_ext]
-                     rmm::device_async_resource_ref mr)
+                     rmm::device_async_resource mr)
 {
   raft::common::nvtx::range<cuvs::common::nvtx::domain::cuvs> fun_scope(
     "ivf_pq::search::select_clusters(n_probes = %u, n_queries = %u, n_lists = %u, dim = %u)",
@@ -267,7 +267,7 @@ void select_clusters(raft::resources const& handle,
                      cuvs::distance::DistanceType metric,
                      const T* queries,             // [n_queries, dim]
                      const half* cluster_centers,  // [n_lists, dim_ext]
-                     rmm::device_async_resource_ref mr)
+                     rmm::device_async_resource mr)
 {
   raft::common::nvtx::range<cuvs::common::nvtx::domain::cuvs> fun_scope(
     "ivf_pq::search::select_clusters(n_probes = %u, n_queries = %u, n_lists = %u, dim = %u)",
@@ -440,7 +440,7 @@ void ivfpq_search_worker(raft::resources const& handle,
     topK,
     index.dim());
   auto stream = raft::resource::get_cuda_stream(handle);
-  auto mr     = raft::resource::get_workspace_resource_ref(handle);
+  auto mr     = raft::resource::get_workspace_resource(handle);
 
   bool manage_local_topk         = is_local_topk_feasible(topK, n_probes, n_queries);
   auto topk_len                  = manage_local_topk ? n_probes * topK : max_samples;
@@ -927,7 +927,7 @@ inline void search(raft::resources const& handle,
     max_samples = ms;
   }
 
-  auto mr = raft::resource::get_workspace_resource_ref(handle);
+  auto mr = raft::resource::get_workspace_resource(handle);
 
   // Maximum number of query vectors to search at the same time.
   // Number of queries in the outer loop, which includes query transform and coarse search.
diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_transform.cuh b/cpp/src/neighbors/ivf_pq/ivf_pq_transform.cuh
index e0032a4ac5..07045b772b 100644
--- a/cpp/src/neighbors/ivf_pq/ivf_pq_transform.cuh
+++ b/cpp/src/neighbors/ivf_pq/ivf_pq_transform.cuh
@@ -50,8 +50,8 @@ void transform_batch(raft::resources const& res,
                      raft::device_vector_view<uint32_t, IdxT> output_labels,
                      raft::device_matrix_view<uint8_t, IdxT, raft::row_major> output_dataset)
 {
-  IdxT n_rows                       = dataset.extent(0);
-  rmm::device_async_resource_ref mr = raft::resource::get_workspace_resource_ref(res);
+  IdxT n_rows                   = dataset.extent(0);
+  rmm::device_async_resource mr = raft::resource::get_workspace_resource(res);
 
   // Compute the labels for each vector
   cuvs::cluster::kmeans::balanced_params kmeans_params;
@@ -115,7 +115,7 @@ void transform(raft::resources const& res,
   raft::common::nvtx::range<cuvs::common::nvtx::domain::cuvs> fun_scope(
     "ivf_pq::transform(n_rows = %u, dim = %u)", n_rows, dataset.extent(1));
 
-  rmm::device_async_resource_ref mr = raft::resource::get_workspace_resource_ref(res);
+  rmm::device_async_resource mr = raft::resource::get_workspace_resource(res);
 
   // The cluster centers in the index are stored padded, which is not acceptable by
   // the kmeans_balanced::predict. Thus, we need the restructuring raft::copy.
@@ -137,8 +137,8 @@ void transform(raft::resources const& res,
     }
   }
 
-  constexpr size_t max_batch_size              = 65536;
-  rmm::device_async_resource_ref device_memory = raft::resource::get_workspace_resource_ref(res);
+  constexpr size_t max_batch_size          = 65536;
+  rmm::device_async_resource device_memory = raft::resource::get_workspace_resource(res);
 
   auto vec_batches = utils::make_batch_load_iterator<T>(res,
                                                         dataset.data_handle(),
diff --git a/cpp/src/neighbors/scann/detail/scann_avq.cuh b/cpp/src/neighbors/scann/detail/scann_avq.cuh
index 0e138ce1a3..e4a2042991 100644
--- a/cpp/src/neighbors/scann/detail/scann_avq.cuh
+++ b/cpp/src/neighbors/scann/detail/scann_avq.cuh
@@ -57,9 +57,8 @@ void compute_cluster_offsets(raft::resources const& dev_resources,
                              raft::device_vector_view<LabelT, int64_t> cluster_sizes,
                              int64_t& max_cluster_size)
 {
-  cudaStream_t stream = raft::resource::get_cuda_stream(dev_resources);
-  rmm::device_async_resource_ref device_memory =
-    raft::resource::get_workspace_resource_ref(dev_resources);
+  cudaStream_t stream                      = raft::resource::get_cuda_stream(dev_resources);
+  rmm::device_async_resource device_memory = raft::resource::get_workspace_resource(dev_resources);
 
   // Histrogram to compute cluster sizes
   int num_levels  = cluster_sizes.extent(0) + 1;
@@ -136,9 +135,8 @@ void sum_reduce_vector(raft::resources const& dev_resources,
                        raft::device_vector_view<T, int64_t> v,
                        raft::device_scalar_view<T> s)
 {
-  cudaStream_t stream = raft::resource::get_cuda_stream(dev_resources);
-  rmm::device_async_resource_ref device_memory =
-    raft::resource::get_workspace_resource_ref(dev_resources);
+  cudaStream_t stream                      = raft::resource::get_cuda_stream(dev_resources);
+  rmm::device_async_resource device_memory = raft::resource::get_workspace_resource(dev_resources);
 
   size_t temp_storage_bytes = 0;
 
@@ -163,10 +161,9 @@ void cholesky_solver(raft::resources const& dev_resources,
                      raft::device_vector_view<T, int64_t> b,
                      raft::device_vector_view<T, int64_t> x)
 {
-  cudaStream_t stream          = raft::resource::get_cuda_stream(dev_resources);
-  cusolverDnHandle_t cusolverH = raft::resource::get_cusolver_dn_handle(dev_resources);
-  rmm::device_async_resource_ref device_memory =
-    raft::resource::get_workspace_resource_ref(dev_resources);
+  cudaStream_t stream                      = raft::resource::get_cuda_stream(dev_resources);
+  cusolverDnHandle_t cusolverH             = raft::resource::get_cusolver_dn_handle(dev_resources);
+  rmm::device_async_resource device_memory = raft::resource::get_workspace_resource(dev_resources);
 
   // RAFT_CUSOLVER_TRY(cusolverDnSetStream(cusolverH, stream));
 
diff --git a/cpp/src/neighbors/scann/detail/scann_build.cuh b/cpp/src/neighbors/scann/detail/scann_build.cuh
index c01e50bc83..41d0e2cc95 100644
--- a/cpp/src/neighbors/scann/detail/scann_build.cuh
+++ b/cpp/src/neighbors/scann/detail/scann_build.cuh
@@ -87,7 +87,7 @@ index<T, IdxT> build(
   raft::device_vector_view<uint32_t, int64_t> labels_view = idx.labels();
 
   // setup batching for kmeans prediction + quantization
-  auto device_memory = raft::resource::get_workspace_resource_ref(res);
+  auto device_memory = raft::resource::get_workspace_resource(res);
 
   constexpr size_t kReasonableMaxBatchSize = 65536;
   size_t max_batch_size = std::min<size_t>(dataset.extent(0), kReasonableMaxBatchSize);
diff --git a/cpp/src/preprocessing/quantize/detail/binary.cuh b/cpp/src/preprocessing/quantize/detail/binary.cuh
index 6695b8b126..fe8288a7cb 100644
--- a/cpp/src/preprocessing/quantize/detail/binary.cuh
+++ b/cpp/src/preprocessing/quantize/detail/binary.cuh
@@ -146,7 +146,7 @@ void mean_f16_in_f32(raft::resources const& res,
                      const size_t dataset_size,
                      cudaStream_t cuda_stream)
 {
-  auto mr = raft::resource::get_workspace_resource_ref(res);
+  auto mr = raft::resource::get_workspace_resource(res);
   auto f32_result_vec =
     raft::make_device_mdarray<float, int64_t>(res, mr, raft::make_extents<int64_t>(dataset_dim));
   raft::matrix::fill(res, f32_result_vec.view(), float(0));
@@ -212,7 +212,7 @@ auto train(raft::resources const& res,
                static_cast<std::size_t>(dataset_dim));
 
     raft::random::RngState rng(29837lu);
-    auto mr                    = raft::resource::get_workspace_resource_ref(res);
+    auto mr                    = raft::resource::get_workspace_resource(res);
     auto sampled_dataset_chunk = raft::make_device_mdarray<T, int64_t>(
       res, mr, raft::make_extents<int64_t>(num_samples, max_dim_chunk));
     auto transposed_sampled_dataset_chunk = raft::make_device_mdarray<T, int64_t>(
@@ -331,7 +331,7 @@ auto train(raft::resources const& res,
       raft::make_host_vector_view<const compute_t>(host_threshold_vec.data(),
                                                    (int64_t)dataset_dim));
   } else {
-    auto mr         = raft::resource::get_workspace_resource_ref(res);
+    auto mr         = raft::resource::get_workspace_resource(res);
     auto casted_vec = raft::make_device_mdarray<compute_t, int64_t>(
       res, mr, raft::make_extents<int64_t>(dataset_dim));
     raft::copy(res,
@@ -425,7 +425,7 @@ void transform(raft::resources const& res,
                  raft::make_device_vector_view<const T>(quantizer.threshold.data_handle(),
                                                         (int64_t)dataset_dim));
     } else {
-      auto mr         = raft::resource::get_workspace_resource_ref(res);
+      auto mr         = raft::resource::get_workspace_resource(res);
       auto casted_vec = raft::make_device_mdarray<float, int64_t>(
         res, mr, raft::make_extents<int64_t>(dataset_dim));
       raft::linalg::map(res,
diff --git a/cpp/src/preprocessing/quantize/detail/pq.cuh b/cpp/src/preprocessing/quantize/detail/pq.cuh
index 5d77e2dd44..2f417e6281 100644
--- a/cpp/src/preprocessing/quantize/detail/pq.cuh
+++ b/cpp/src/preprocessing/quantize/detail/pq.cuh
@@ -119,7 +119,7 @@ auto train_pq_subspaces(
   auto trainset_ptr     = !vq_centers.empty() ? pq_trainset.data_handle() : dataset.data_handle();
   auto sub_labels       = raft::make_device_vector<uint32_t, ix_t>(res, 0);
   auto pq_cluster_sizes = raft::make_device_vector<uint32_t, ix_t>(res, 0);
-  auto device_memory    = raft::resource::get_workspace_resource_ref(res);
+  auto device_memory    = raft::resource::get_workspace_resource(res);
   if (is_balanced_kmeans(params)) {
     sub_labels = raft::make_device_mdarray<uint32_t>(
       res, device_memory, raft::make_extents<ix_t>(n_rows_train));
diff --git a/cpp/tests/neighbors/ann_cagra/test_batch_load_iterator.cu b/cpp/tests/neighbors/ann_cagra/test_batch_load_iterator.cu
index c0df2b8826..0f78805b5e 100644
--- a/cpp/tests/neighbors/ann_cagra/test_batch_load_iterator.cu
+++ b/cpp/tests/neighbors/ann_cagra/test_batch_load_iterator.cu
@@ -85,7 +85,7 @@ class BatchLoadIteratorTest : public ::testing::Test {
     int64_t total_processed = 0;
 
     auto [copy_stream, enable_prefetch] = bli::get_prefetch_stream(res);
-    auto workspace_mr                   = raft::resource::get_workspace_resource_ref(res);
+    auto workspace_mr                   = raft::resource::get_workspace_resource(res);
 
     {
       bli::batch_load_iterator<mdspan_t> iter(res,
@@ -154,7 +154,7 @@ TEST_F(BatchLoadIteratorTest, EmptyViewFromHost)
   auto host_view  = host_empty.view();
 
   auto [copy_stream, enable_prefetch] = bli::get_prefetch_stream(res);
-  auto workspace_mr                   = raft::resource::get_workspace_resource_ref(res);
+  auto workspace_mr                   = raft::resource::get_workspace_resource(res);
 
   bli::batch_load_iterator<raft::host_matrix_view<IdxT, int64_t>> iter(
     res, host_view, /*batch_size=*/128, copy_stream, workspace_mr, enable_prefetch);
@@ -167,7 +167,7 @@ TEST_F(BatchLoadIteratorTest, EmptyViewFromDevice)
   auto device_view  = device_empty.view();
 
   auto [copy_stream, enable_prefetch] = bli::get_prefetch_stream(res);
-  auto workspace_mr                   = raft::resource::get_workspace_resource_ref(res);
+  auto workspace_mr                   = raft::resource::get_workspace_resource(res);
 
   bli::batch_load_iterator<raft::device_matrix_view<IdxT, int64_t>> iter(
     res, device_view, /*batch_size=*/128, copy_stream, workspace_mr, enable_prefetch);
@@ -290,7 +290,7 @@ TEST_F(BatchLoadIteratorTest, MakeBatchLoadIteratorHostPtr)
                                                   n_cols,
                                                   batch_size_rows,
                                                   copy_stream,
-                                                  raft::resource::get_workspace_resource_ref(res),
+                                                  raft::resource::get_workspace_resource(res),
                                                   enable_prefetch);
   EXPECT_TRUE(iter.does_copy());
 
@@ -329,7 +329,7 @@ TEST_F(BatchLoadIteratorTest, MakeBatchLoadIteratorDevicePtr)
                                                   n_cols,
                                                   batch_size_rows,
                                                   copy_stream,
-                                                  raft::resource::get_workspace_resource_ref(res),
+                                                  raft::resource::get_workspace_resource(res),
                                                   enable_prefetch);
   EXPECT_FALSE(iter.does_copy());
 
diff --git a/cpp/tests/neighbors/naive_knn.cuh b/cpp/tests/neighbors/naive_knn.cuh
index d21e982902..3ce7a51b06 100644
--- a/cpp/tests/neighbors/naive_knn.cuh
+++ b/cpp/tests/neighbors/naive_knn.cuh
@@ -14,7 +14,7 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
 #include <rmm/mr/per_device_resource.hpp>
-#include <rmm/resource_ref.hpp>
+#include <rmm/resource.hpp>
 
 namespace cuvs::neighbors {
 
@@ -104,7 +104,7 @@ void naive_knn(raft::resources const& handle,
                uint32_t k,
                cuvs::distance::DistanceType type)
 {
-  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource_ref();
+  rmm::device_async_resource mr = rmm::mr::get_current_device_resource();
 
   auto stream = raft::resource::get_cuda_stream(handle);
   dim3 block_dim(16, 32, 1);
diff --git a/docs/source/api_basics.rst b/docs/source/api_basics.rst
index 5ffb1da630..8c0aadb09a 100644
--- a/docs/source/api_basics.rst
+++ b/docs/source/api_basics.rst
@@ -24,7 +24,7 @@ Here's an example of configuring RMM to use a pool allocator in C++ (derived fro
     auto initial_size = rmm::percent_of_free_device_memory(50);
     rmm::mr::pool_memory_resource pool_mr{cuda_mr, initial_size};
     rmm::mr::set_current_device_resource(pool_mr);
-    auto mr = rmm::mr::get_current_device_resource_ref();
+    auto mr = rmm::mr::get_current_device_resource();
 
 Python
 ^^^^^^
diff --git a/examples/cpp/src/cagra_example.cu b/examples/cpp/src/cagra_example.cu
index 856030c520..85be6e4035 100644
--- a/examples/cpp/src/cagra_example.cu
+++ b/examples/cpp/src/cagra_example.cu
@@ -53,7 +53,7 @@ int main()
   raft::device_resources dev_resources;
 
   // Set pool memory resource with 1 GiB initial pool size. All allocations use the same pool.
-  rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource_ref(),
+  rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource(),
                                         1024 * 1024 * 1024ull);
   rmm::mr::set_current_device_resource(pool_mr);
 
diff --git a/examples/cpp/src/cagra_hnsw_ace_example.cu b/examples/cpp/src/cagra_hnsw_ace_example.cu
index d1bde25ad6..79f08a7098 100644
--- a/examples/cpp/src/cagra_hnsw_ace_example.cu
+++ b/examples/cpp/src/cagra_hnsw_ace_example.cu
@@ -156,7 +156,7 @@ int main()
   raft::device_resources dev_resources;
 
   // Set pool memory resource with 1 GiB initial pool size. All allocations use the same pool.
-  rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource_ref(),
+  rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource(),
                                         1024 * 1024 * 1024ull);
   rmm::mr::set_current_device_resource(pool_mr);
 
diff --git a/examples/cpp/src/cagra_persistent_example.cu b/examples/cpp/src/cagra_persistent_example.cu
index ded3a287b2..a413858a9c 100644
--- a/examples/cpp/src/cagra_persistent_example.cu
+++ b/examples/cpp/src/cagra_persistent_example.cu
@@ -260,7 +260,7 @@ int main()
   // This is important because we run the async loop with a very large number of jobs,
   // which would otherwise swamp a normal pool memory resource.
   // (the non-persistent implementation would hang forever).
-  rmm::mr::arena_memory_resource mr(rmm::mr::get_current_device_resource_ref(), mem_size);
+  rmm::mr::arena_memory_resource mr(rmm::mr::get_current_device_resource(), mem_size);
   rmm::mr::set_current_device_resource(mr);
   std::cout << "GPU Arena memory resource size: " << mem_size / (1024ll * 1024ll) << " MiB"
             << std::endl;
diff --git a/examples/cpp/src/hnsw_ace_example.cu b/examples/cpp/src/hnsw_ace_example.cu
index 22354193e5..6ede6b50ea 100644
--- a/examples/cpp/src/hnsw_ace_example.cu
+++ b/examples/cpp/src/hnsw_ace_example.cu
@@ -138,7 +138,7 @@ int main()
   raft::device_resources dev_resources;
 
   // Set pool memory resource with 1 GiB initial pool size. All allocations use the same pool.
-  rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource_ref(),
+  rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource(),
                                         1024 * 1024 * 1024ull);
   rmm::mr::set_current_device_resource(pool_mr);
 
diff --git a/examples/cpp/src/ivf_flat_example.cu b/examples/cpp/src/ivf_flat_example.cu
index 163e3b4719..ed9b04e6f1 100644
--- a/examples/cpp/src/ivf_flat_example.cu
+++ b/examples/cpp/src/ivf_flat_example.cu
@@ -112,7 +112,7 @@ int main()
   raft::device_resources dev_resources;
 
   // Set pool memory resource with 1 GiB initial pool size. All allocations use the same pool.
-  rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource_ref(),
+  rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource(),
                                         1024 * 1024 * 1024ull);
   rmm::mr::set_current_device_resource(pool_mr);
 
diff --git a/examples/cpp/src/ivf_pq_example.cu b/examples/cpp/src/ivf_pq_example.cu
index f27507489e..f1b4e8af7c 100644
--- a/examples/cpp/src/ivf_pq_example.cu
+++ b/examples/cpp/src/ivf_pq_example.cu
@@ -78,7 +78,7 @@ int main()
   raft::device_resources dev_resources;
 
   // Set pool memory resource with 1 GiB initial pool size. All allocations use the same pool.
-  rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource_ref(),
+  rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource(),
                                         1024 * 1024 * 1024ull);
   rmm::mr::set_current_device_resource(pool_mr);
 
diff --git a/examples/cpp/src/scann_example.cu b/examples/cpp/src/scann_example.cu
index 70aff11b44..a857fd9ff4 100644
--- a/examples/cpp/src/scann_example.cu
+++ b/examples/cpp/src/scann_example.cu
@@ -54,7 +54,7 @@ int main(int argc, char* argv[])
 
   // Set pool memory resource with 1 GiB initial pool size. All allocations use
   // the same pool.
-  rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource_ref(),
+  rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource(),
                                         1024 * 1024 * 1024ull);
   rmm::mr::set_current_device_resource(pool_mr);
 
diff --git a/examples/cpp/src/vamana_example.cu b/examples/cpp/src/vamana_example.cu
index b43eb20b1d..c23a49e735 100644
--- a/examples/cpp/src/vamana_example.cu
+++ b/examples/cpp/src/vamana_example.cu
@@ -86,7 +86,7 @@ int main(int argc, char* argv[])
 
   // Set pool memory resource with 1 GiB initial pool size. All allocations use
   // the same pool.
-  rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource_ref(),
+  rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource(),
                                         1024 * 1024 * 1024ull);
   rmm::mr::set_current_device_resource(pool_mr);
 

From ce1cdc1ddb4da4fde2fdd1e47bf07f062d89a5eb Mon Sep 17 00:00:00 2001
From: tarang-jain <tarangj@nvidia.com>
Date: Wed, 20 May 2026 08:38:30 -0700
Subject: [PATCH 2/4] revert references to resource_ref

---
 c/src/core/c_api.cpp                          |  2 +-
 .../src/common/cuda_huge_page_resource.hpp    |  2 +-
 cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h |  4 +--
 cpp/bench/ann/src/cuvs/cuvs_cagra_diskann.cu  |  4 +--
 cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu  |  4 +--
 cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h   |  4 +--
 cpp/bench/ann/src/cuvs/cuvs_vamana.cu         |  4 +--
 .../cuvs_internal/neighbors/naive_knn.cuh     |  4 +--
 cpp/src/cluster/detail/kmeans_balanced.cuh    | 26 +++++++++----------
 cpp/src/distance/detail/masked_nn.cuh         |  4 +--
 cpp/src/neighbors/detail/ann_utils.cuh        | 12 ++++-----
 .../neighbors/detail/cagra/cagra_build.cuh    |  8 +++---
 .../neighbors/detail/cagra/cagra_merge.cuh    |  2 +-
 .../neighbors/detail/cagra/search_plan.cuh    |  4 +--
 cpp/src/neighbors/detail/cagra/utils.hpp      |  4 +--
 .../neighbors/detail/vamana/greedy_search.cuh |  2 +-
 cpp/src/neighbors/ivf_common.cu               |  2 +-
 cpp/src/neighbors/ivf_common.cuh              |  2 +-
 .../neighbors/ivf_flat/ivf_flat_search.cuh    |  4 +--
 cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh     | 19 +++++++-------
 cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh    |  6 ++---
 cpp/src/neighbors/ivf_pq/ivf_pq_transform.cuh | 10 +++----
 cpp/src/neighbors/scann/detail/scann_avq.cuh  | 17 +++++++-----
 cpp/tests/neighbors/naive_knn.cuh             |  4 +--
 24 files changed, 79 insertions(+), 75 deletions(-)

diff --git a/c/src/core/c_api.cpp b/c/src/core/c_api.cpp
index 3847875aa4..9e7aefe93c 100644
--- a/c/src/core/c_api.cpp
+++ b/c/src/core/c_api.cpp
@@ -19,7 +19,7 @@
 #include <rmm/mr/per_device_resource.hpp>
 #include <rmm/mr/pinned_host_memory_resource.hpp>
 #include <rmm/mr/pool_memory_resource.hpp>
-#include <rmm/resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include "../core/exceptions.hpp"
 
diff --git a/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp b/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp
index aae71605a5..e5555132bb 100644
--- a/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp
+++ b/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp
@@ -8,7 +8,7 @@
 #include <raft/core/logger_macros.hpp>
 
 #include <rmm/detail/aligned.hpp>
-#include <rmm/resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <cuda/memory_resource>
 
diff --git a/cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h b/cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h
index f818a1e208..99a0ae6f76 100644
--- a/cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h
+++ b/cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h
@@ -24,7 +24,7 @@
 #include <rmm/mr/managed_memory_resource.hpp>
 #include <rmm/mr/per_device_resource.hpp>
 #include <rmm/mr/pool_memory_resource.hpp>
-#include <rmm/resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <memory>
 #include <type_traits>
@@ -98,7 +98,7 @@ class shared_raft_resources {
 
   ~shared_raft_resources() noexcept { rmm::mr::set_current_device_resource(orig_resource_); }
 
-  auto get_large_memory_resource() noexcept -> rmm::device_async_resource { return large_mr_; }
+  auto get_large_memory_resource() noexcept -> rmm::device_async_resource_ref { return large_mr_; }
 
  private:
   cuda::mr::any_resource<cuda::mr::device_accessible> orig_resource_;
diff --git a/cpp/bench/ann/src/cuvs/cuvs_cagra_diskann.cu b/cpp/bench/ann/src/cuvs/cuvs_cagra_diskann.cu
index e67adf9005..1521333c5e 100644
--- a/cpp/bench/ann/src/cuvs/cuvs_cagra_diskann.cu
+++ b/cpp/bench/ann/src/cuvs/cuvs_cagra_diskann.cu
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
+ * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
  * SPDX-License-Identifier: Apache-2.0
  */
 
@@ -9,7 +9,7 @@
 
 #include <rmm/cuda_device.hpp>
 #include <rmm/mr/pool_memory_resource.hpp>
-#include <rmm/resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 namespace cuvs::bench {
 
diff --git a/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu b/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu
index 9645e5df63..26028b6d98 100644
--- a/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu
+++ b/cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION.
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION.
  * SPDX-License-Identifier: Apache-2.0
  */
 
@@ -9,7 +9,7 @@
 
 #include <rmm/cuda_device.hpp>
 #include <rmm/mr/pool_memory_resource.hpp>
-#include <rmm/resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 namespace cuvs::bench {
 
diff --git a/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h
index d872fd6aab..022d8a0934 100644
--- a/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h
+++ b/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h
@@ -25,7 +25,7 @@
 #include <raft/util/cudart_utils.hpp>
 
 #include <rmm/device_uvector.hpp>
-#include <rmm/resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <atomic>
 #include <cassert>
@@ -190,7 +190,7 @@ class cuvs_cagra : public algo<T>, public algo_gpu {
   std::shared_ptr<cuvs::neighbors::filtering::base_filter> filter_;
   std::vector<std::shared_ptr<cuvs::neighbors::cagra::index<T, IdxT>>> sub_indices_;
 
-  inline rmm::device_async_resource get_mr(AllocatorType mem_type)
+  inline rmm::device_async_resource_ref get_mr(AllocatorType mem_type)
   {
     switch (mem_type) {
       case (AllocatorType::kHostPinned): return mr_pinned_;
diff --git a/cpp/bench/ann/src/cuvs/cuvs_vamana.cu b/cpp/bench/ann/src/cuvs/cuvs_vamana.cu
index 14eb64b55a..185095d5b4 100644
--- a/cpp/bench/ann/src/cuvs/cuvs_vamana.cu
+++ b/cpp/bench/ann/src/cuvs/cuvs_vamana.cu
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
+ * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
  * SPDX-License-Identifier: Apache-2.0
  */
 
@@ -8,7 +8,7 @@
 
 #include <rmm/cuda_device.hpp>
 #include <rmm/mr/pool_memory_resource.hpp>
-#include <rmm/resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 namespace cuvs::bench {
 
diff --git a/cpp/internal/cuvs_internal/neighbors/naive_knn.cuh b/cpp/internal/cuvs_internal/neighbors/naive_knn.cuh
index 9c893587dc..c9f39b82bb 100644
--- a/cpp/internal/cuvs_internal/neighbors/naive_knn.cuh
+++ b/cpp/internal/cuvs_internal/neighbors/naive_knn.cuh
@@ -14,7 +14,7 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
 #include <rmm/mr/per_device_resource.hpp>
-#include <rmm/resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 namespace cuvs::neighbors {
 
@@ -88,7 +88,7 @@ void naive_knn(raft::resources const& handle,
                uint32_t k,
                cuvs::distance::DistanceType type)
 {
-  rmm::device_async_resource mr = rmm::mr::get_current_device_resource();
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource();
 
   auto stream = raft::resource::get_cuda_stream(handle);
   dim3 block_dim(16, 32, 1);
diff --git a/cpp/src/cluster/detail/kmeans_balanced.cuh b/cpp/src/cluster/detail/kmeans_balanced.cuh
index cd5c6f1aa0..7003587e7a 100644
--- a/cpp/src/cluster/detail/kmeans_balanced.cuh
+++ b/cpp/src/cluster/detail/kmeans_balanced.cuh
@@ -36,7 +36,7 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_scalar.hpp>
 #include <rmm/mr/managed_memory_resource.hpp>
-#include <rmm/resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <thrust/gather.h>
 #include <thrust/iterator/transform_iterator.h>
@@ -83,7 +83,7 @@ inline std::enable_if_t<std::is_floating_point_v<MathT>> predict_core(
   const MathT* dataset_norm,
   IdxT n_rows,
   LabelT* labels,
-  rmm::device_async_resource mr)
+  rmm::device_async_resource_ref mr)
 {
   auto stream = raft::resource::get_cuda_stream(handle);
   switch (params.metric) {
@@ -253,7 +253,7 @@ void calc_centers_and_sizes(const raft::resources& handle,
                             const LabelT* labels,
                             bool reset_counters,
                             MappingOpT mapping_op,
-                            rmm::device_async_resource mr)
+                            rmm::device_async_resource_ref mr)
 {
   auto stream = raft::resource::get_cuda_stream(handle);
 
@@ -316,7 +316,7 @@ void compute_norm(const raft::resources& handle,
                   IdxT n_rows,
                   MappingOpT mapping_op,
                   FinOpT norm_fin_op,
-                  std::optional<rmm::device_async_resource> mr = std::nullopt)
+                  std::optional<rmm::device_async_resource_ref> mr = std::nullopt)
 {
   raft::common::nvtx::range<cuvs::common::nvtx::domain::cuvs> fun_scope("compute_norm");
   auto stream = raft::resource::get_cuda_stream(handle);
@@ -377,8 +377,8 @@ void predict(const raft::resources& handle,
              IdxT n_rows,
              LabelT* labels,
              MappingOpT mapping_op,
-             std::optional<rmm::device_async_resource> mr = std::nullopt,
-             const MathT* dataset_norm                    = nullptr)
+             std::optional<rmm::device_async_resource_ref> mr = std::nullopt,
+             const MathT* dataset_norm                        = nullptr)
 {
   auto stream = raft::resource::get_cuda_stream(handle);
   raft::common::nvtx::range<cuvs::common::nvtx::domain::cuvs> fun_scope(
@@ -551,7 +551,7 @@ auto adjust_centers(MathT* centers,
                     MathT threshold,
                     MappingOpT mapping_op,
                     rmm::cuda_stream_view stream,
-                    rmm::device_async_resource device_memory) -> bool
+                    rmm::device_async_resource_ref device_memory) -> bool
 {
   raft::common::nvtx::range<cuvs::common::nvtx::domain::cuvs> fun_scope(
     "adjust_centers(%zu, %u)", static_cast<size_t>(n_rows), n_clusters);
@@ -649,7 +649,7 @@ void balancing_em_iters(const raft::resources& handle,
                         uint32_t balancing_pullback,
                         MathT balancing_threshold,
                         MappingOpT mapping_op,
-                        rmm::device_async_resource device_memory)
+                        rmm::device_async_resource_ref device_memory)
 {
   auto stream                = raft::resource::get_cuda_stream(handle);
   uint32_t balancing_counter = balancing_pullback;
@@ -732,7 +732,7 @@ void build_clusters(const raft::resources& handle,
                     LabelT* cluster_labels,
                     CounterT* cluster_sizes,
                     MappingOpT mapping_op,
-                    rmm::device_async_resource device_memory,
+                    rmm::device_async_resource_ref device_memory,
                     const MathT* dataset_norm = nullptr)
 {
   auto stream = raft::resource::get_cuda_stream(handle);
@@ -874,8 +874,8 @@ auto build_fine_clusters(const raft::resources& handle,
                          IdxT fine_clusters_nums_max,
                          MathT* cluster_centers,
                          MappingOpT mapping_op,
-                         rmm::device_async_resource managed_memory,
-                         rmm::device_async_resource device_memory) -> IdxT
+                         rmm::device_async_resource_ref managed_memory,
+                         rmm::device_async_resource_ref device_memory) -> IdxT
 {
   auto stream = raft::resource::get_cuda_stream(handle);
   rmm::device_uvector<IdxT> mc_trainset_ids_buf(mesocluster_size_max, stream, managed_memory);
@@ -995,8 +995,8 @@ void build_hierarchical(const raft::resources& handle,
 
   // TODO: Remove the explicit managed memory- we shouldn't be creating this on the user's behalf.
   rmm::mr::managed_memory_resource managed_memory;
-  rmm::device_async_resource device_memory = raft::resource::get_workspace_resource(handle);
-  auto [max_minibatch_size, mem_per_row]   = calc_minibatch_size<MathT>(
+  rmm::device_async_resource_ref device_memory = raft::resource::get_workspace_resource(handle);
+  auto [max_minibatch_size, mem_per_row]       = calc_minibatch_size<MathT>(
     handle, n_clusters, n_rows, dim, params.metric, std::is_same_v<T, MathT>);
 
   // Precompute the L2 norm of the dataset if relevant and not yet computed.
diff --git a/cpp/src/distance/detail/masked_nn.cuh b/cpp/src/distance/detail/masked_nn.cuh
index 4c2ac0a993..5381b433ec 100644
--- a/cpp/src/distance/detail/masked_nn.cuh
+++ b/cpp/src/distance/detail/masked_nn.cuh
@@ -246,8 +246,8 @@ void masked_l2_nn_impl(raft::resources const& handle,
   static_assert(P::Mblk == 64, "masked_l2_nn_impl only supports a policy with 64 rows per block.");
 
   // Get stream and workspace memory resource
-  rmm::device_async_resource ws_mr = raft::resource::get_workspace_resource(handle);
-  auto stream                      = raft::resource::get_cuda_stream(handle);
+  rmm::device_async_resource_ref ws_mr = raft::resource::get_workspace_resource(handle);
+  auto stream                          = raft::resource::get_cuda_stream(handle);
 
   // Acquire temporary buffers and initialize to zero:
   // 1) Adjacency matrix bitfield
diff --git a/cpp/src/neighbors/detail/ann_utils.cuh b/cpp/src/neighbors/detail/ann_utils.cuh
index 10aee6dee0..dda5dd8bc2 100644
--- a/cpp/src/neighbors/detail/ann_utils.cuh
+++ b/cpp/src/neighbors/detail/ann_utils.cuh
@@ -21,7 +21,7 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_scalar.hpp>
 #include <rmm/device_uvector.hpp>
-#include <rmm/resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <cuda_fp16.hpp>
 
@@ -593,7 +593,7 @@ struct batch_load_iterator {
           MdspanT input_view,
           size_type batch_size,
           rmm::cuda_stream_view copy_stream,
-          rmm::device_async_resource mr,
+          rmm::device_async_resource_ref mr,
           bool prefetch,
           bool initialize,
           bool host_writeback)
@@ -853,7 +853,7 @@ struct batch_load_iterator {
                       MdspanT input_view,
                       size_type batch_size,
                       rmm::cuda_stream_view copy_stream,
-                      rmm::device_async_resource mr,
+                      rmm::device_async_resource_ref mr,
                       bool prefetch       = false,
                       bool initialize     = true,
                       bool host_writeback = false)
@@ -1018,7 +1018,7 @@ class batch_load_iterator_dyn {
                           IdxT row_width,
                           size_type batch_size,
                           rmm::cuda_stream_view copy_stream,
-                          rmm::device_async_resource mr,
+                          rmm::device_async_resource_ref mr,
                           bool prefetch       = false,
                           bool initialize     = true,
                           bool host_writeback = false)
@@ -1149,7 +1149,7 @@ class batch_load_iterator_dyn {
                         IdxT row_width,
                         size_type batch_size,
                         rmm::cuda_stream_view copy_stream,
-                        rmm::device_async_resource mr,
+                        rmm::device_async_resource_ref mr,
                         bool prefetch,
                         bool initialize,
                         bool host_writeback) -> std::variant<HostIter, DeviceIter>
@@ -1212,7 +1212,7 @@ auto make_batch_load_iterator(raft::resources const& res,
                               detail::type_identity_t<IdxT> row_width,
                               size_t batch_size,
                               rmm::cuda_stream_view copy_stream,
-                              rmm::device_async_resource mr,
+                              rmm::device_async_resource_ref mr,
                               bool prefetch       = false,
                               bool initialize     = true,
                               bool host_writeback = false) -> batch_load_iterator_dyn<T, IdxT>
diff --git a/cpp/src/neighbors/detail/cagra/cagra_build.cuh b/cpp/src/neighbors/detail/cagra/cagra_build.cuh
index f5221724ea..5ee9a0a59c 100644
--- a/cpp/src/neighbors/detail/cagra/cagra_build.cuh
+++ b/cpp/src/neighbors/detail/cagra/cagra_build.cuh
@@ -31,7 +31,7 @@
 // TODO: This shouldn't be calling spatial/knn APIs
 #include "../ann_utils.cuh"
 
-#include <rmm/resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <chrono>
 #include <cstdio>
@@ -1700,9 +1700,9 @@ void build_knn_graph(
   }
 
   // If the workspace is smaller than desired, put the I/O buffers into the large workspace.
-  rmm::device_async_resource workspace_mr = use_large_workspace
-                                              ? raft::resource::get_large_workspace_resource(res)
-                                              : raft::resource::get_workspace_resource(res);
+  rmm::device_async_resource_ref workspace_mr =
+    use_large_workspace ? raft::resource::get_large_workspace_resource(res)
+                        : raft::resource::get_workspace_resource(res);
 
   RAFT_LOG_DEBUG(
     "IVF-PQ search node_degree: %d, top_k: %d,  gpu_top_k: %d,  max_batch_size:: %d, n_probes: %u",
diff --git a/cpp/src/neighbors/detail/cagra/cagra_merge.cuh b/cpp/src/neighbors/detail/cagra/cagra_merge.cuh
index fcce6f691c..1dd4cbe075 100644
--- a/cpp/src/neighbors/detail/cagra/cagra_merge.cuh
+++ b/cpp/src/neighbors/detail/cagra/cagra_merge.cuh
@@ -21,7 +21,7 @@
 #include <cuvs/neighbors/ivf_pq.hpp>
 #include <cuvs/neighbors/refine.hpp>
 
-#include <rmm/resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <chrono>
 #include <cstdio>
diff --git a/cpp/src/neighbors/detail/cagra/search_plan.cuh b/cpp/src/neighbors/detail/cagra/search_plan.cuh
index f7a5d15a6d..03b2962e4c 100644
--- a/cpp/src/neighbors/detail/cagra/search_plan.cuh
+++ b/cpp/src/neighbors/detail/cagra/search_plan.cuh
@@ -38,8 +38,8 @@ namespace cuvs::neighbors::cagra::detail {
 template <typename T>
 struct lightweight_uvector {
  private:
-  using raft_res_type            = const raft::resources*;
-  using rmm_res_type             = std::tuple<rmm::device_async_resource, rmm::cuda_stream_view>;
+  using raft_res_type = const raft::resources*;
+  using rmm_res_type  = std::tuple<rmm::device_async_resource_ref, rmm::cuda_stream_view>;
   static constexpr size_t kAlign = 256;
 
   std::variant<raft_res_type, rmm_res_type> res_;
diff --git a/cpp/src/neighbors/detail/cagra/utils.hpp b/cpp/src/neighbors/detail/cagra/utils.hpp
index 16e84a5d95..edb325ca51 100644
--- a/cpp/src/neighbors/detail/cagra/utils.hpp
+++ b/cpp/src/neighbors/detail/cagra/utils.hpp
@@ -18,7 +18,7 @@
 #include <raft/util/cudart_utils.hpp>
 #include <raft/util/integer_utils.hpp>
 #include <rmm/cuda_stream_pool.hpp>
-#include <rmm/resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <cuda.h>
 #include <cuda_fp16.h>
@@ -276,7 +276,7 @@ void copy_with_padding(
   raft::resources const& res,
   raft::device_matrix<T, int64_t, raft::row_major>& dst,
   raft::mdspan<const T, raft::matrix_extent<int64_t>, raft::row_major, data_accessor> src,
-  rmm::device_async_resource mr = rmm::mr::get_current_device_resource())
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource())
 {
   size_t padded_dim = raft::round_up_safe<size_t>(src.extent(1) * sizeof(T), 16) / sizeof(T);
 
diff --git a/cpp/src/neighbors/detail/vamana/greedy_search.cuh b/cpp/src/neighbors/detail/vamana/greedy_search.cuh
index 4bdbd4695f..4e71c1189c 100644
--- a/cpp/src/neighbors/detail/vamana/greedy_search.cuh
+++ b/cpp/src/neighbors/detail/vamana/greedy_search.cuh
@@ -14,7 +14,7 @@
 
 #include <cuvs/distance/distance.hpp>
 #include <raft/util/warp_primitives.cuh>
-#include <rmm/resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <chrono>
 #include <cstdio>
diff --git a/cpp/src/neighbors/ivf_common.cu b/cpp/src/neighbors/ivf_common.cu
index a7a1a2823a..b87a14f7c3 100644
--- a/cpp/src/neighbors/ivf_common.cu
+++ b/cpp/src/neighbors/ivf_common.cu
@@ -79,7 +79,7 @@ void sort_cluster_sizes_descending(uint32_t* input,
                                    uint32_t* output,
                                    uint32_t n_lists,
                                    rmm::cuda_stream_view stream,
-                                   rmm::device_async_resource tmp_res)
+                                   rmm::device_async_resource_ref tmp_res)
 {
   int begin_bit             = 0;
   int end_bit               = sizeof(uint32_t) * 8;
diff --git a/cpp/src/neighbors/ivf_common.cuh b/cpp/src/neighbors/ivf_common.cuh
index b8fb54ca60..303098176c 100644
--- a/cpp/src/neighbors/ivf_common.cuh
+++ b/cpp/src/neighbors/ivf_common.cuh
@@ -20,7 +20,7 @@ void sort_cluster_sizes_descending(uint32_t* input,
                                    uint32_t* output,
                                    uint32_t n_lists,
                                    rmm::cuda_stream_view stream,
-                                   rmm::device_async_resource tmp_res);
+                                   rmm::device_async_resource_ref tmp_res);
 
 /**
  * Default value returned by `search` when the `n_probes` is too small and top-k is too large.
diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh
index 6831c77d8c..f42ffdc837 100644
--- a/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh
+++ b/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh
@@ -24,7 +24,7 @@
 #include <raft/linalg/norm.cuh>     // raft::linalg::norm
 #include <raft/matrix/detail/select_warpsort.cuh>
 
-#include <rmm/resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <type_traits>
 
@@ -51,7 +51,7 @@ void search_impl(raft::resources const& handle,
                  bool select_min,
                  IdxT* neighbors,
                  AccT* distances,
-                 rmm::device_async_resource search_mr,
+                 rmm::device_async_resource_ref search_mr,
                  IvfSampleFilterT sample_filter)
 {
   auto stream = raft::resource::get_cuda_stream(handle);
diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh b/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh
index a668fa3a4b..f0a878f2ec 100644
--- a/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh
+++ b/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh
@@ -91,7 +91,7 @@ void select_residuals(raft::resources const& handle,
                       const float* center,           // [dim]
                       const T* dataset,              // [.., dim]
                       const IdxT* row_ids,           // [n_rows]
-                      rmm::device_async_resource device_memory
+                      rmm::device_async_resource_ref device_memory
 
 )
 {
@@ -147,7 +147,7 @@ void flat_compute_residuals(
   raft::device_matrix_view<const float, uint32_t, raft::row_major> centers,  // [n_lists, dim_ext]
   const T* dataset,                                                          // [n_rows, dim]
   std::variant<uint32_t, const uint32_t*> labels,                            // [n_rows]
-  rmm::device_async_resource device_memory,
+  rmm::device_async_resource_ref device_memory,
   cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Expanded)
 {
   auto stream  = raft::resource::get_cuda_stream(handle);
@@ -805,7 +805,7 @@ void process_and_fill_codes(raft::resources const& handle,
                             std::variant<IdxT, const IdxT*> src_offset_or_indices,
                             const uint32_t* new_labels,
                             IdxT n_rows,
-                            rmm::device_async_resource mr)
+                            rmm::device_async_resource_ref mr)
 {
   auto new_vectors_residual =
     raft::make_device_mdarray<float>(handle, mr, raft::make_extents<IdxT>(n_rows, index.rot_dim()));
@@ -997,8 +997,9 @@ void extend(raft::resources const& handle,
                   std::is_same_v<T, int8_t>,
                 "Unsupported data type");
 
-  rmm::device_async_resource device_memory = raft::resource::get_workspace_resource(handle);
-  rmm::device_async_resource large_memory  = raft::resource::get_large_workspace_resource(handle);
+  rmm::device_async_resource_ref device_memory = raft::resource::get_workspace_resource(handle);
+  rmm::device_async_resource_ref large_memory =
+    raft::resource::get_large_workspace_resource(handle);
 
   // Try to allocate an index with the same parameters and the projected new size
   // (which can be slightly larger than index->size() + n_rows, due to padding for interleaved).
@@ -1027,8 +1028,8 @@ void extend(raft::resources const& handle,
   // `large_workspace_resource`, which does not have the explicit allocation limit. The user may opt
   // to populate the `large_workspace_resource` memory resource with managed memory for easier
   // scaling.
-  rmm::device_async_resource labels_mr  = device_memory;
-  rmm::device_async_resource batches_mr = device_memory;
+  rmm::device_async_resource_ref labels_mr  = device_memory;
+  rmm::device_async_resource_ref batches_mr = device_memory;
   if (n_rows * (index->dim() * sizeof(T) + index->pq_dim() + sizeof(IdxT) + sizeof(uint32_t)) >
       free_mem) {
     labels_mr = large_memory;
@@ -1268,12 +1269,12 @@ auto build(raft::resources const& handle,
       size_t(n_rows) / std::max<size_t>(params.kmeans_trainset_fraction * n_rows, impl->n_lists()));
     size_t n_rows_train = n_rows / trainset_ratio;
 
-    rmm::device_async_resource device_memory = raft::resource::get_workspace_resource(handle);
+    rmm::device_async_resource_ref device_memory = raft::resource::get_workspace_resource(handle);
 
     // If the trainset is small enough to comfortably fit into device memory, put it there.
     // Otherwise, use the managed memory.
     constexpr size_t kTolerableRatio = 4;
-    rmm::device_async_resource big_memory_resource =
+    rmm::device_async_resource_ref big_memory_resource =
       raft::resource::get_large_workspace_resource(handle);
     if (sizeof(float) * n_rows_train * impl->dim() * kTolerableRatio <
         raft::resource::get_workspace_free_bytes(handle)) {
diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh b/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh
index 7a5fd198f9..5799c4bbb4 100644
--- a/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh
+++ b/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh
@@ -69,7 +69,7 @@ void select_clusters(raft::resources const& handle,
                      cuvs::distance::DistanceType metric,
                      const T* queries,              // [n_queries, dim]
                      const float* cluster_centers,  // [n_lists, dim_ext]
-                     rmm::device_async_resource mr)
+                     rmm::device_async_resource_ref mr)
 {
   raft::common::nvtx::range<cuvs::common::nvtx::domain::cuvs> fun_scope(
     "ivf_pq::search::select_clusters(n_probes = %u, n_queries = %u, n_lists = %u, dim = %u)",
@@ -179,7 +179,7 @@ void select_clusters(raft::resources const& handle,
                      cuvs::distance::DistanceType metric,
                      const T* queries,               // [n_queries, dim]
                      const int8_t* cluster_centers,  // [n_lists, dim_ext]
-                     rmm::device_async_resource mr)
+                     rmm::device_async_resource_ref mr)
 {
   raft::common::nvtx::range<cuvs::common::nvtx::domain::cuvs> fun_scope(
     "ivf_pq::search::select_clusters(n_probes = %u, n_queries = %u, n_lists = %u, dim = %u)",
@@ -267,7 +267,7 @@ void select_clusters(raft::resources const& handle,
                      cuvs::distance::DistanceType metric,
                      const T* queries,             // [n_queries, dim]
                      const half* cluster_centers,  // [n_lists, dim_ext]
-                     rmm::device_async_resource mr)
+                     rmm::device_async_resource_ref mr)
 {
   raft::common::nvtx::range<cuvs::common::nvtx::domain::cuvs> fun_scope(
     "ivf_pq::search::select_clusters(n_probes = %u, n_queries = %u, n_lists = %u, dim = %u)",
diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_transform.cuh b/cpp/src/neighbors/ivf_pq/ivf_pq_transform.cuh
index 07045b772b..9a8b5d7166 100644
--- a/cpp/src/neighbors/ivf_pq/ivf_pq_transform.cuh
+++ b/cpp/src/neighbors/ivf_pq/ivf_pq_transform.cuh
@@ -50,8 +50,8 @@ void transform_batch(raft::resources const& res,
                      raft::device_vector_view<uint32_t, IdxT> output_labels,
                      raft::device_matrix_view<uint8_t, IdxT, raft::row_major> output_dataset)
 {
-  IdxT n_rows                   = dataset.extent(0);
-  rmm::device_async_resource mr = raft::resource::get_workspace_resource(res);
+  IdxT n_rows                       = dataset.extent(0);
+  rmm::device_async_resource_ref mr = raft::resource::get_workspace_resource(res);
 
   // Compute the labels for each vector
   cuvs::cluster::kmeans::balanced_params kmeans_params;
@@ -115,7 +115,7 @@ void transform(raft::resources const& res,
   raft::common::nvtx::range<cuvs::common::nvtx::domain::cuvs> fun_scope(
     "ivf_pq::transform(n_rows = %u, dim = %u)", n_rows, dataset.extent(1));
 
-  rmm::device_async_resource mr = raft::resource::get_workspace_resource(res);
+  rmm::device_async_resource_ref mr = raft::resource::get_workspace_resource(res);
 
   // The cluster centers in the index are stored padded, which is not acceptable by
   // the kmeans_balanced::predict. Thus, we need the restructuring raft::copy.
@@ -137,8 +137,8 @@ void transform(raft::resources const& res,
     }
   }
 
-  constexpr size_t max_batch_size          = 65536;
-  rmm::device_async_resource device_memory = raft::resource::get_workspace_resource(res);
+  constexpr size_t max_batch_size              = 65536;
+  rmm::device_async_resource_ref device_memory = raft::resource::get_workspace_resource(res);
 
   auto vec_batches = utils::make_batch_load_iterator<T>(res,
                                                         dataset.data_handle(),
diff --git a/cpp/src/neighbors/scann/detail/scann_avq.cuh b/cpp/src/neighbors/scann/detail/scann_avq.cuh
index e4a2042991..6c3bb045e4 100644
--- a/cpp/src/neighbors/scann/detail/scann_avq.cuh
+++ b/cpp/src/neighbors/scann/detail/scann_avq.cuh
@@ -57,8 +57,9 @@ void compute_cluster_offsets(raft::resources const& dev_resources,
                              raft::device_vector_view<LabelT, int64_t> cluster_sizes,
                              int64_t& max_cluster_size)
 {
-  cudaStream_t stream                      = raft::resource::get_cuda_stream(dev_resources);
-  rmm::device_async_resource device_memory = raft::resource::get_workspace_resource(dev_resources);
+  cudaStream_t stream = raft::resource::get_cuda_stream(dev_resources);
+  rmm::device_async_resource_ref device_memory =
+    raft::resource::get_workspace_resource(dev_resources);
 
   // Histrogram to compute cluster sizes
   int num_levels  = cluster_sizes.extent(0) + 1;
@@ -135,8 +136,9 @@ void sum_reduce_vector(raft::resources const& dev_resources,
                        raft::device_vector_view<T, int64_t> v,
                        raft::device_scalar_view<T> s)
 {
-  cudaStream_t stream                      = raft::resource::get_cuda_stream(dev_resources);
-  rmm::device_async_resource device_memory = raft::resource::get_workspace_resource(dev_resources);
+  cudaStream_t stream = raft::resource::get_cuda_stream(dev_resources);
+  rmm::device_async_resource_ref device_memory =
+    raft::resource::get_workspace_resource(dev_resources);
 
   size_t temp_storage_bytes = 0;
 
@@ -161,9 +163,10 @@ void cholesky_solver(raft::resources const& dev_resources,
                      raft::device_vector_view<T, int64_t> b,
                      raft::device_vector_view<T, int64_t> x)
 {
-  cudaStream_t stream                      = raft::resource::get_cuda_stream(dev_resources);
-  cusolverDnHandle_t cusolverH             = raft::resource::get_cusolver_dn_handle(dev_resources);
-  rmm::device_async_resource device_memory = raft::resource::get_workspace_resource(dev_resources);
+  cudaStream_t stream          = raft::resource::get_cuda_stream(dev_resources);
+  cusolverDnHandle_t cusolverH = raft::resource::get_cusolver_dn_handle(dev_resources);
+  rmm::device_async_resource_ref device_memory =
+    raft::resource::get_workspace_resource(dev_resources);
 
   // RAFT_CUSOLVER_TRY(cusolverDnSetStream(cusolverH, stream));
 
diff --git a/cpp/tests/neighbors/naive_knn.cuh b/cpp/tests/neighbors/naive_knn.cuh
index 3ce7a51b06..b8b4c791a7 100644
--- a/cpp/tests/neighbors/naive_knn.cuh
+++ b/cpp/tests/neighbors/naive_knn.cuh
@@ -14,7 +14,7 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
 #include <rmm/mr/per_device_resource.hpp>
-#include <rmm/resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 namespace cuvs::neighbors {
 
@@ -104,7 +104,7 @@ void naive_knn(raft::resources const& handle,
                uint32_t k,
                cuvs::distance::DistanceType type)
 {
-  rmm::device_async_resource mr = rmm::mr::get_current_device_resource();
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource();
 
   auto stream = raft::resource::get_cuda_stream(handle);
   dim3 block_dim(16, 32, 1);

From ed94de8bf0340272ea8fb68a1bd2120056fa8bc2 Mon Sep 17 00:00:00 2001
From: tarang-jain <tarangj@nvidia.com>
Date: Wed, 20 May 2026 08:43:31 -0700
Subject: [PATCH 3/4] fix compilation errors

---
 c/src/core/c_api.cpp                               | 4 ++--
 cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h      | 2 +-
 cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h        | 2 +-
 cpp/internal/cuvs_internal/neighbors/naive_knn.cuh | 2 +-
 cpp/src/neighbors/detail/cagra/utils.hpp           | 2 +-
 cpp/src/neighbors/detail/vpq_dataset.cuh           | 2 +-
 cpp/tests/neighbors/naive_knn.cuh                  | 2 +-
 docs/source/api_basics.rst                         | 2 +-
 examples/cpp/src/cagra_example.cu                  | 2 +-
 examples/cpp/src/cagra_hnsw_ace_example.cu         | 2 +-
 examples/cpp/src/cagra_persistent_example.cu       | 2 +-
 examples/cpp/src/hnsw_ace_example.cu               | 2 +-
 examples/cpp/src/ivf_flat_example.cu               | 2 +-
 examples/cpp/src/ivf_pq_example.cu                 | 2 +-
 examples/cpp/src/scann_example.cu                  | 2 +-
 examples/cpp/src/vamana_example.cu                 | 2 +-
 16 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/c/src/core/c_api.cpp b/c/src/core/c_api.cpp
index 9e7aefe93c..f4e3664482 100644
--- a/c/src/core/c_api.cpp
+++ b/c/src/core/c_api.cpp
@@ -132,7 +132,7 @@ extern "C" cuvsError_t cuvsRMMAlloc(cuvsResources_t res, void** ptr, size_t byte
 {
   return cuvs::core::translate_exceptions([=] {
     auto res_ptr = reinterpret_cast<raft::resources*>(res);
-    auto mr      = rmm::mr::get_current_device_resource();
+    auto mr      = rmm::mr::get_current_device_resource_ref();
     *ptr         = mr.allocate(raft::resource::get_cuda_stream(*res_ptr), bytes);
   });
 }
@@ -141,7 +141,7 @@ extern "C" cuvsError_t cuvsRMMFree(cuvsResources_t res, void* ptr, size_t bytes)
 {
   return cuvs::core::translate_exceptions([=] {
     auto res_ptr = reinterpret_cast<raft::resources*>(res);
-    auto mr      = rmm::mr::get_current_device_resource();
+    auto mr      = rmm::mr::get_current_device_resource_ref();
     mr.deallocate(raft::resource::get_cuda_stream(*res_ptr), ptr, bytes);
   });
 }
diff --git a/cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h b/cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h
index 99a0ae6f76..1a276e8cc8 100644
--- a/cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h
+++ b/cpp/bench/ann/src/cuvs/cuvs_ann_bench_utils.h
@@ -73,7 +73,7 @@ class shared_raft_resources {
   try : large_mr_() {
     orig_resource_ =
       rmm::mr::set_current_device_resource(rmm::mr::failure_callback_resource_adaptor<>{
-        rmm::mr::pool_memory_resource{rmm::mr::get_current_device_resource(),
+        rmm::mr::pool_memory_resource{rmm::mr::get_current_device_resource_ref(),
                                       1024 * 1024 * 1024ull},
         rmm_oom_callback,
         nullptr});
diff --git a/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h
index 022d8a0934..87111e4761 100644
--- a/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h
+++ b/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h
@@ -195,7 +195,7 @@ class cuvs_cagra : public algo<T>, public algo_gpu {
     switch (mem_type) {
       case (AllocatorType::kHostPinned): return mr_pinned_;
       case (AllocatorType::kHostHugePage): return mr_huge_page_;
-      default: return rmm::mr::get_current_device_resource();
+      default: return rmm::mr::get_current_device_resource_ref();
     }
   }
 };
diff --git a/cpp/internal/cuvs_internal/neighbors/naive_knn.cuh b/cpp/internal/cuvs_internal/neighbors/naive_knn.cuh
index c9f39b82bb..7bc37193a0 100644
--- a/cpp/internal/cuvs_internal/neighbors/naive_knn.cuh
+++ b/cpp/internal/cuvs_internal/neighbors/naive_knn.cuh
@@ -88,7 +88,7 @@ void naive_knn(raft::resources const& handle,
                uint32_t k,
                cuvs::distance::DistanceType type)
 {
-  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource();
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource_ref();
 
   auto stream = raft::resource::get_cuda_stream(handle);
   dim3 block_dim(16, 32, 1);
diff --git a/cpp/src/neighbors/detail/cagra/utils.hpp b/cpp/src/neighbors/detail/cagra/utils.hpp
index edb325ca51..07417a3f09 100644
--- a/cpp/src/neighbors/detail/cagra/utils.hpp
+++ b/cpp/src/neighbors/detail/cagra/utils.hpp
@@ -276,7 +276,7 @@ void copy_with_padding(
   raft::resources const& res,
   raft::device_matrix<T, int64_t, raft::row_major>& dst,
   raft::mdspan<const T, raft::matrix_extent<int64_t>, raft::row_major, data_accessor> src,
-  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource())
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource_ref())
 {
   size_t padded_dim = raft::round_up_safe<size_t>(src.extent(1) * sizeof(T), 16) / sizeof(T);
 
diff --git a/cpp/src/neighbors/detail/vpq_dataset.cuh b/cpp/src/neighbors/detail/vpq_dataset.cuh
index c1136db645..49b5e12455 100644
--- a/cpp/src/neighbors/detail/vpq_dataset.cuh
+++ b/cpp/src/neighbors/detail/vpq_dataset.cuh
@@ -513,7 +513,7 @@ void process_and_fill_codes(
     static_cast<ix_t>(dim),
     static_cast<size_t>(max_batch_size),
     stream,
-    rmm::mr::get_current_device_resource());
+    rmm::mr::get_current_device_resource_ref());
   for (const auto& batch : _vpq_batches_codes) {
     auto batch_view        = raft::make_device_matrix_view(batch.data(), ix_t(batch.size()), dim);
     auto batch_labels_view = raft::make_device_vector_view<label_t, IdxT>(nullptr, 0);
diff --git a/cpp/tests/neighbors/naive_knn.cuh b/cpp/tests/neighbors/naive_knn.cuh
index b8b4c791a7..d21e982902 100644
--- a/cpp/tests/neighbors/naive_knn.cuh
+++ b/cpp/tests/neighbors/naive_knn.cuh
@@ -104,7 +104,7 @@ void naive_knn(raft::resources const& handle,
                uint32_t k,
                cuvs::distance::DistanceType type)
 {
-  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource();
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource_ref();
 
   auto stream = raft::resource::get_cuda_stream(handle);
   dim3 block_dim(16, 32, 1);
diff --git a/docs/source/api_basics.rst b/docs/source/api_basics.rst
index 8c0aadb09a..5ffb1da630 100644
--- a/docs/source/api_basics.rst
+++ b/docs/source/api_basics.rst
@@ -24,7 +24,7 @@ Here's an example of configuring RMM to use a pool allocator in C++ (derived fro
     auto initial_size = rmm::percent_of_free_device_memory(50);
     rmm::mr::pool_memory_resource pool_mr{cuda_mr, initial_size};
     rmm::mr::set_current_device_resource(pool_mr);
-    auto mr = rmm::mr::get_current_device_resource();
+    auto mr = rmm::mr::get_current_device_resource_ref();
 
 Python
 ^^^^^^
diff --git a/examples/cpp/src/cagra_example.cu b/examples/cpp/src/cagra_example.cu
index 85be6e4035..856030c520 100644
--- a/examples/cpp/src/cagra_example.cu
+++ b/examples/cpp/src/cagra_example.cu
@@ -53,7 +53,7 @@ int main()
   raft::device_resources dev_resources;
 
   // Set pool memory resource with 1 GiB initial pool size. All allocations use the same pool.
-  rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource(),
+  rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource_ref(),
                                         1024 * 1024 * 1024ull);
   rmm::mr::set_current_device_resource(pool_mr);
 
diff --git a/examples/cpp/src/cagra_hnsw_ace_example.cu b/examples/cpp/src/cagra_hnsw_ace_example.cu
index 79f08a7098..d1bde25ad6 100644
--- a/examples/cpp/src/cagra_hnsw_ace_example.cu
+++ b/examples/cpp/src/cagra_hnsw_ace_example.cu
@@ -156,7 +156,7 @@ int main()
   raft::device_resources dev_resources;
 
   // Set pool memory resource with 1 GiB initial pool size. All allocations use the same pool.
-  rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource(),
+  rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource_ref(),
                                         1024 * 1024 * 1024ull);
   rmm::mr::set_current_device_resource(pool_mr);
 
diff --git a/examples/cpp/src/cagra_persistent_example.cu b/examples/cpp/src/cagra_persistent_example.cu
index a413858a9c..ded3a287b2 100644
--- a/examples/cpp/src/cagra_persistent_example.cu
+++ b/examples/cpp/src/cagra_persistent_example.cu
@@ -260,7 +260,7 @@ int main()
   // This is important because we run the async loop with a very large number of jobs,
   // which would otherwise swamp a normal pool memory resource.
   // (the non-persistent implementation would hang forever).
-  rmm::mr::arena_memory_resource mr(rmm::mr::get_current_device_resource(), mem_size);
+  rmm::mr::arena_memory_resource mr(rmm::mr::get_current_device_resource_ref(), mem_size);
   rmm::mr::set_current_device_resource(mr);
   std::cout << "GPU Arena memory resource size: " << mem_size / (1024ll * 1024ll) << " MiB"
             << std::endl;
diff --git a/examples/cpp/src/hnsw_ace_example.cu b/examples/cpp/src/hnsw_ace_example.cu
index 6ede6b50ea..22354193e5 100644
--- a/examples/cpp/src/hnsw_ace_example.cu
+++ b/examples/cpp/src/hnsw_ace_example.cu
@@ -138,7 +138,7 @@ int main()
   raft::device_resources dev_resources;
 
   // Set pool memory resource with 1 GiB initial pool size. All allocations use the same pool.
-  rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource(),
+  rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource_ref(),
                                         1024 * 1024 * 1024ull);
   rmm::mr::set_current_device_resource(pool_mr);
 
diff --git a/examples/cpp/src/ivf_flat_example.cu b/examples/cpp/src/ivf_flat_example.cu
index ed9b04e6f1..163e3b4719 100644
--- a/examples/cpp/src/ivf_flat_example.cu
+++ b/examples/cpp/src/ivf_flat_example.cu
@@ -112,7 +112,7 @@ int main()
   raft::device_resources dev_resources;
 
   // Set pool memory resource with 1 GiB initial pool size. All allocations use the same pool.
-  rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource(),
+  rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource_ref(),
                                         1024 * 1024 * 1024ull);
   rmm::mr::set_current_device_resource(pool_mr);
 
diff --git a/examples/cpp/src/ivf_pq_example.cu b/examples/cpp/src/ivf_pq_example.cu
index f1b4e8af7c..f27507489e 100644
--- a/examples/cpp/src/ivf_pq_example.cu
+++ b/examples/cpp/src/ivf_pq_example.cu
@@ -78,7 +78,7 @@ int main()
   raft::device_resources dev_resources;
 
   // Set pool memory resource with 1 GiB initial pool size. All allocations use the same pool.
-  rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource(),
+  rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource_ref(),
                                         1024 * 1024 * 1024ull);
   rmm::mr::set_current_device_resource(pool_mr);
 
diff --git a/examples/cpp/src/scann_example.cu b/examples/cpp/src/scann_example.cu
index a857fd9ff4..70aff11b44 100644
--- a/examples/cpp/src/scann_example.cu
+++ b/examples/cpp/src/scann_example.cu
@@ -54,7 +54,7 @@ int main(int argc, char* argv[])
 
   // Set pool memory resource with 1 GiB initial pool size. All allocations use
   // the same pool.
-  rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource(),
+  rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource_ref(),
                                         1024 * 1024 * 1024ull);
   rmm::mr::set_current_device_resource(pool_mr);
 
diff --git a/examples/cpp/src/vamana_example.cu b/examples/cpp/src/vamana_example.cu
index c23a49e735..b43eb20b1d 100644
--- a/examples/cpp/src/vamana_example.cu
+++ b/examples/cpp/src/vamana_example.cu
@@ -86,7 +86,7 @@ int main(int argc, char* argv[])
 
   // Set pool memory resource with 1 GiB initial pool size. All allocations use
   // the same pool.
-  rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource(),
+  rmm::mr::pool_memory_resource pool_mr(rmm::mr::get_current_device_resource_ref(),
                                         1024 * 1024 * 1024ull);
   rmm::mr::set_current_device_resource(pool_mr);
 

From 668746ca53a4501846b1dbced80538d644af7f9f Mon Sep 17 00:00:00 2001
From: tarang-jain <tarangj@nvidia.com>
Date: Wed, 20 May 2026 08:50:35 -0700
Subject: [PATCH 4/4] revert workspace_resource_ref

---
 cpp/src/cluster/detail/kmeans_balanced.cuh    |  6 ++--
 cpp/src/cluster/kmeans_balanced.cuh           |  4 +--
 .../kmeans_balanced_build_clusters_impl.cuh   |  2 +-
 cpp/src/distance/detail/masked_nn.cuh         |  2 +-
 cpp/src/neighbors/composite/index.cu          |  2 +-
 cpp/src/neighbors/detail/ann_utils.cuh        | 12 +++----
 cpp/src/neighbors/detail/cagra/add_nodes.cuh  |  2 +-
 .../neighbors/detail/cagra/cagra_build.cuh    |  6 ++--
 cpp/src/neighbors/detail/cagra/graph_core.cuh | 10 +++---
 .../neighbors/detail/cagra/search_plan.cuh    |  4 +--
 cpp/src/neighbors/detail/cagra/utils.hpp      |  2 +-
 .../neighbors/detail/vamana/vamana_build.cuh  | 30 +++++++++---------
 cpp/src/neighbors/detail/vpq_dataset.cuh      |  2 +-
 cpp/src/neighbors/ivf_common.cuh              |  2 +-
 cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh | 16 +++++-----
 .../neighbors/ivf_flat/ivf_flat_search.cuh    | 31 ++++++++++---------
 cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh     | 19 ++++++------
 cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh    |  4 +--
 cpp/src/neighbors/ivf_pq/ivf_pq_transform.cuh |  6 ++--
 cpp/src/neighbors/scann/detail/scann_avq.cuh  |  6 ++--
 .../neighbors/scann/detail/scann_build.cuh    |  2 +-
 .../preprocessing/quantize/detail/binary.cuh  |  8 ++---
 cpp/src/preprocessing/quantize/detail/pq.cuh  |  2 +-
 .../ann_cagra/test_batch_load_iterator.cu     | 10 +++---
 24 files changed, 97 insertions(+), 93 deletions(-)

diff --git a/cpp/src/cluster/detail/kmeans_balanced.cuh b/cpp/src/cluster/detail/kmeans_balanced.cuh
index 7003587e7a..26ce110158 100644
--- a/cpp/src/cluster/detail/kmeans_balanced.cuh
+++ b/cpp/src/cluster/detail/kmeans_balanced.cuh
@@ -321,7 +321,7 @@ void compute_norm(const raft::resources& handle,
   raft::common::nvtx::range<cuvs::common::nvtx::domain::cuvs> fun_scope("compute_norm");
   auto stream = raft::resource::get_cuda_stream(handle);
   rmm::device_uvector<MathT> mapped_dataset(
-    0, stream, mr.value_or(raft::resource::get_workspace_resource(handle)));
+    0, stream, mr.value_or(raft::resource::get_workspace_resource_ref(handle)));
 
   const MathT* dataset_ptr = nullptr;
 
@@ -383,7 +383,7 @@ void predict(const raft::resources& handle,
   auto stream = raft::resource::get_cuda_stream(handle);
   raft::common::nvtx::range<cuvs::common::nvtx::domain::cuvs> fun_scope(
     "predict(%zu, %u)", static_cast<size_t>(n_rows), n_clusters);
-  auto mem_res = mr.value_or(raft::resource::get_workspace_resource(handle));
+  auto mem_res = mr.value_or(raft::resource::get_workspace_resource_ref(handle));
   auto [max_minibatch_size, _mem_per_row] = calc_minibatch_size<MathT>(
     handle, n_clusters, n_rows, dim, params.metric, std::is_same_v<T, MathT>);
   rmm::device_uvector<MathT> cur_dataset(
@@ -995,7 +995,7 @@ void build_hierarchical(const raft::resources& handle,
 
   // TODO: Remove the explicit managed memory- we shouldn't be creating this on the user's behalf.
   rmm::mr::managed_memory_resource managed_memory;
-  rmm::device_async_resource_ref device_memory = raft::resource::get_workspace_resource(handle);
+  rmm::device_async_resource_ref device_memory = raft::resource::get_workspace_resource_ref(handle);
   auto [max_minibatch_size, mem_per_row]       = calc_minibatch_size<MathT>(
     handle, n_clusters, n_rows, dim, params.metric, std::is_same_v<T, MathT>);
 
diff --git a/cpp/src/cluster/kmeans_balanced.cuh b/cpp/src/cluster/kmeans_balanced.cuh
index 0c0df03397..f3f52c2d8f 100644
--- a/cpp/src/cluster/kmeans_balanced.cuh
+++ b/cpp/src/cluster/kmeans_balanced.cuh
@@ -154,7 +154,7 @@ void predict(const raft::resources& handle,
                                          X.extent(0),
                                          labels.data_handle(),
                                          mapping_op,
-                                         raft::resource::get_workspace_resource(handle));
+                                         raft::resource::get_workspace_resource_ref(handle));
 }
 
 namespace helpers {
@@ -305,7 +305,7 @@ void calc_centers_and_sizes(const raft::resources& handle,
     labels.data_handle(),
     reset_counters,
     mapping_op,
-    raft::resource::get_workspace_resource(handle));
+    raft::resource::get_workspace_resource_ref(handle));
 }
 
 }  // namespace helpers
diff --git a/cpp/src/cluster/kmeans_balanced_build_clusters_impl.cuh b/cpp/src/cluster/kmeans_balanced_build_clusters_impl.cuh
index e1ace274e2..2bce856c6c 100644
--- a/cpp/src/cluster/kmeans_balanced_build_clusters_impl.cuh
+++ b/cpp/src/cluster/kmeans_balanced_build_clusters_impl.cuh
@@ -68,7 +68,7 @@ void build_clusters(const raft::resources& handle,
     labels.data_handle(),
     cluster_sizes.data_handle(),
     mapping_op,
-    raft::resource::get_workspace_resource(handle),
+    raft::resource::get_workspace_resource_ref(handle),
     X_norm.has_value() ? X_norm.value().data_handle() : nullptr);
 }
 
diff --git a/cpp/src/distance/detail/masked_nn.cuh b/cpp/src/distance/detail/masked_nn.cuh
index 5381b433ec..a3a187e8c4 100644
--- a/cpp/src/distance/detail/masked_nn.cuh
+++ b/cpp/src/distance/detail/masked_nn.cuh
@@ -246,7 +246,7 @@ void masked_l2_nn_impl(raft::resources const& handle,
   static_assert(P::Mblk == 64, "masked_l2_nn_impl only supports a policy with 64 rows per block.");
 
   // Get stream and workspace memory resource
-  rmm::device_async_resource_ref ws_mr = raft::resource::get_workspace_resource(handle);
+  rmm::device_async_resource_ref ws_mr = raft::resource::get_workspace_resource_ref(handle);
   auto stream                          = raft::resource::get_cuda_stream(handle);
 
   // Acquire temporary buffers and initialize to zero:
diff --git a/cpp/src/neighbors/composite/index.cu b/cpp/src/neighbors/composite/index.cu
index 57bd0816eb..7c02d0e43e 100644
--- a/cpp/src/neighbors/composite/index.cu
+++ b/cpp/src/neighbors/composite/index.cu
@@ -40,7 +40,7 @@ void composite_index<T, IdxT, OutputIdxT>::search(
   size_t buffer_size = num_queries * K * num_indices;
 
   auto main_stream = raft::resource::get_cuda_stream(handle);
-  auto tmp_res     = raft::resource::get_workspace_resource(handle);
+  auto tmp_res     = raft::resource::get_workspace_resource_ref(handle);
 
   rmm::device_uvector<out_index_type> neighbors_buffer(buffer_size, main_stream, tmp_res);
   rmm::device_uvector<float> distances_buffer(buffer_size, main_stream, tmp_res);
diff --git a/cpp/src/neighbors/detail/ann_utils.cuh b/cpp/src/neighbors/detail/ann_utils.cuh
index dda5dd8bc2..bdb2c310c6 100644
--- a/cpp/src/neighbors/detail/ann_utils.cuh
+++ b/cpp/src/neighbors/detail/ann_utils.cuh
@@ -864,7 +864,7 @@ struct batch_load_iterator {
   {
   }
 
-  /** Convenience overload that uses `get_workspace_resource(res)` as the memory resource. */
+  /** Convenience overload that uses `get_workspace_resource_ref(res)` as the memory resource. */
   batch_load_iterator(raft::resources const& res,
                       MdspanT input_view,
                       size_type batch_size,
@@ -876,7 +876,7 @@ struct batch_load_iterator {
                           input_view,
                           batch_size,
                           copy_stream,
-                          raft::resource::get_workspace_resource(res),
+                          raft::resource::get_workspace_resource_ref(res),
                           prefetch,
                           initialize,
                           host_writeback)
@@ -1036,7 +1036,7 @@ class batch_load_iterator_dyn {
   {
   }
 
-  /** Convenience overload that uses `get_workspace_resource(res)` as the memory resource. */
+  /** Convenience overload that uses `get_workspace_resource_ref(res)` as the memory resource. */
   batch_load_iterator_dyn(raft::resources const& res,
                           T* ptr,
                           IdxT n_rows,
@@ -1052,7 +1052,7 @@ class batch_load_iterator_dyn {
                               row_width,
                               batch_size,
                               copy_stream,
-                              raft::resource::get_workspace_resource(res),
+                              raft::resource::get_workspace_resource_ref(res),
                               prefetch,
                               initialize,
                               host_writeback)
@@ -1229,7 +1229,7 @@ auto make_batch_load_iterator(raft::resources const& res,
                                           host_writeback);
 }
 
-/** Convenience overload that uses `get_workspace_resource(res)` as the memory resource. */
+/** Convenience overload that uses `get_workspace_resource_ref(res)` as the memory resource. */
 template <typename T, typename IdxT = int64_t>
 auto make_batch_load_iterator(raft::resources const& res,
                               T const* ptr,
@@ -1247,7 +1247,7 @@ auto make_batch_load_iterator(raft::resources const& res,
                                            row_width,
                                            batch_size,
                                            copy_stream,
-                                           raft::resource::get_workspace_resource(res),
+                                           raft::resource::get_workspace_resource_ref(res),
                                            prefetch,
                                            initialize,
                                            host_writeback);
diff --git a/cpp/src/neighbors/detail/cagra/add_nodes.cuh b/cpp/src/neighbors/detail/cagra/add_nodes.cuh
index 169a667776..5d0a6654e9 100644
--- a/cpp/src/neighbors/detail/cagra/add_nodes.cuh
+++ b/cpp/src/neighbors/detail/cagra/add_nodes.cuh
@@ -70,7 +70,7 @@ void add_node_core(
   params.itopk_size = std::max(base_degree * 2lu, 256lu);
 
   // Memory space for rank-based neighbor list
-  auto mr = raft::resource::get_workspace_resource(handle);
+  auto mr = raft::resource::get_workspace_resource_ref(handle);
 
   auto neighbor_indices = raft::make_device_mdarray<IdxT, std::int64_t>(
     handle, mr, raft::make_extents<std::int64_t>(max_search_batch_size, base_degree));
diff --git a/cpp/src/neighbors/detail/cagra/cagra_build.cuh b/cpp/src/neighbors/detail/cagra/cagra_build.cuh
index 5ee9a0a59c..96ff8344d3 100644
--- a/cpp/src/neighbors/detail/cagra/cagra_build.cuh
+++ b/cpp/src/neighbors/detail/cagra/cagra_build.cuh
@@ -1701,8 +1701,8 @@ void build_knn_graph(
 
   // If the workspace is smaller than desired, put the I/O buffers into the large workspace.
   rmm::device_async_resource_ref workspace_mr =
-    use_large_workspace ? raft::resource::get_large_workspace_resource(res)
-                        : raft::resource::get_workspace_resource(res);
+    use_large_workspace ? raft::resource::get_large_workspace_resource_ref(res)
+                        : raft::resource::get_workspace_resource_ref(res);
 
   RAFT_LOG_DEBUG(
     "IVF-PQ search node_degree: %d, top_k: %d,  gpu_top_k: %d,  max_batch_size:: %d, n_probes: %u",
@@ -2123,7 +2123,7 @@ auto iterative_build_graph(
       static_cast<int64_t>(dev_query_view.extent(1)),
       max_chunk_size,
       raft::resource::get_cuda_stream(res),
-      raft::resource::get_workspace_resource(res));
+      raft::resource::get_workspace_resource_ref(res));
     for (const auto& batch : query_batch) {
       auto batch_dev_query_view = raft::make_device_matrix_view<const T, int64_t>(
         batch.data(), batch.size(), dev_query_view.extent(1));
diff --git a/cpp/src/neighbors/detail/cagra/graph_core.cuh b/cpp/src/neighbors/detail/cagra/graph_core.cuh
index f478cd3fa2..0e2ad6d769 100644
--- a/cpp/src/neighbors/detail/cagra/graph_core.cuh
+++ b/cpp/src/neighbors/detail/cagra/graph_core.cuh
@@ -851,7 +851,7 @@ void merge_graph_gpu(
 
   namespace bli                       = cuvs::spatial::knn::detail::utils;
   auto [copy_stream, enable_prefetch] = bli::get_prefetch_stream(res);
-  auto workspace_mr                   = raft::resource::get_workspace_resource(res);
+  auto workspace_mr                   = raft::resource::get_workspace_resource_ref(res);
 
   bli::batch_load_iterator<
     raft::mdspan<IdxT, raft::matrix_extent<int64_t>, raft::row_major, AccessorOutputGraph>>
@@ -1001,7 +1001,7 @@ void sort_knn_graph(
   const uint64_t input_graph_degree = knn_graph.extent(1);
   IdxT* const input_graph_ptr       = knn_graph.data_handle();
 
-  auto large_tmp_mr = raft::resource::get_large_workspace_resource(res);
+  auto large_tmp_mr = raft::resource::get_large_workspace_resource_ref(res);
 
   auto d_input_graph = raft::make_device_mdarray<IdxT>(
     res, large_tmp_mr, raft::make_extents<int64_t>(graph_size, input_graph_degree));
@@ -1625,7 +1625,7 @@ void prune_graph_gpu(
 
   namespace bli                       = cuvs::spatial::knn::detail::utils;
   auto [copy_stream, enable_prefetch] = bli::get_prefetch_stream(res);
-  auto workspace_mr                   = raft::resource::get_workspace_resource(res);
+  auto workspace_mr                   = raft::resource::get_workspace_resource_ref(res);
 
   // Single-batch read-only iterator for the input graph (graph_size rows fit in one batch).
   bli::batch_load_iterator<
@@ -1722,9 +1722,9 @@ void optimize(
   raft::resources res{res_const};
 
   // large temporary memory for large arrays, e.g. everything >= O(graph_size)
-  auto large_tmp_mr = raft::resource::get_large_workspace_resource(res);
+  auto large_tmp_mr = raft::resource::get_large_workspace_resource_ref(res);
   // temporary memory for small arrays, e.g. everything <= O(batchsize * graph_degree)
-  auto default_ws_mr = raft::resource::get_workspace_resource(res);
+  auto default_ws_mr = raft::resource::get_workspace_resource_ref(res);
 
   // create a stream pool if not already present
   if (!res.has_resource_factory(raft::resource::resource_type::CUDA_STREAM_POOL) ||
diff --git a/cpp/src/neighbors/detail/cagra/search_plan.cuh b/cpp/src/neighbors/detail/cagra/search_plan.cuh
index 03b2962e4c..8a9d79e177 100644
--- a/cpp/src/neighbors/detail/cagra/search_plan.cuh
+++ b/cpp/src/neighbors/detail/cagra/search_plan.cuh
@@ -58,7 +58,7 @@ struct lightweight_uvector {
     if (new_size == size_) { return; }
     if (std::holds_alternative<raft_res_type>(res_)) {
       auto& h = std::get<raft_res_type>(res_);
-      res_    = rmm_res_type{raft::resource::get_workspace_resource(*h),
+      res_    = rmm_res_type{raft::resource::get_workspace_resource_ref(*h),
                           raft::resource::get_cuda_stream(*h)};
     }
     auto& [r, s] = std::get<rmm_res_type>(res_);
@@ -80,7 +80,7 @@ struct lightweight_uvector {
     if (new_size == size_) { return; }
     if (std::holds_alternative<raft_res_type>(res_)) {
       auto& h = std::get<raft_res_type>(res_);
-      res_    = rmm_res_type{raft::resource::get_workspace_resource(*h), stream};
+      res_    = rmm_res_type{raft::resource::get_workspace_resource_ref(*h), stream};
     } else {
       std::get<rmm::cuda_stream_view>(std::get<rmm_res_type>(res_)) = stream;
     }
diff --git a/cpp/src/neighbors/detail/cagra/utils.hpp b/cpp/src/neighbors/detail/cagra/utils.hpp
index 07417a3f09..58bf68bb43 100644
--- a/cpp/src/neighbors/detail/cagra/utils.hpp
+++ b/cpp/src/neighbors/detail/cagra/utils.hpp
@@ -190,7 +190,7 @@ class device_matrix_view_from_host {
       // live on stack and not returned to a user.
       // The user may opt to set this resource to managed memory to allow large allocations.
       device_mem_.emplace(raft::make_device_mdarray<T, IdxT>(
-        res, raft::resource::get_large_workspace_resource(res), host_view.extents()));
+        res, raft::resource::get_large_workspace_resource_ref(res), host_view.extents()));
       raft::copy(res, device_mem_->view(), host_view);
       device_ptr = device_mem_->data_handle();
     }
diff --git a/cpp/src/neighbors/detail/vamana/vamana_build.cuh b/cpp/src/neighbors/detail/vamana/vamana_build.cuh
index ecfa809190..336d81215b 100644
--- a/cpp/src/neighbors/detail/vamana/vamana_build.cuh
+++ b/cpp/src/neighbors/detail/vamana/vamana_build.cuh
@@ -143,7 +143,7 @@ void batched_insert_vamana(
   auto query_ids      = raft::make_device_vector<IdxT>(res, max_batchsize);
   auto query_list_ptr = raft::make_device_mdarray<QueryCandidates<IdxT, accT>>(
     res,
-    raft::resource::get_large_workspace_resource(res),
+    raft::resource::get_large_workspace_resource_ref(res),
     raft::make_extents<int64_t>(max_batchsize + 1));
   QueryCandidates<IdxT, accT>* query_list =
     static_cast<QueryCandidates<IdxT, accT>*>(query_list_ptr.data_handle());
@@ -151,11 +151,11 @@ void batched_insert_vamana(
   // Results of each batch of inserts during build - Memory is used by query_list structure
   auto visited_ids =
     raft::make_device_mdarray<IdxT>(res,
-                                    raft::resource::get_large_workspace_resource(res),
+                                    raft::resource::get_large_workspace_resource_ref(res),
                                     raft::make_extents<int64_t>(max_batchsize, visited_size));
   auto visited_dists =
     raft::make_device_mdarray<accT>(res,
-                                    raft::resource::get_large_workspace_resource(res),
+                                    raft::resource::get_large_workspace_resource_ref(res),
                                     raft::make_extents<int64_t>(max_batchsize, visited_size));
 
   // Assign memory to query_list structures and initialize
@@ -167,14 +167,14 @@ void batched_insert_vamana(
                                                                     1);
   auto topk_pq_mem =
     raft::make_device_mdarray<Node<accT>>(res,
-                                          raft::resource::get_large_workspace_resource(res),
+                                          raft::resource::get_large_workspace_resource_ref(res),
                                           raft::make_extents<int64_t>(max_batchsize, visited_size));
 
   int align_padding = raft::alignTo(dim, 16) - dim;
 
   auto s_coords_mem = raft::make_device_mdarray<T>(
     res,
-    raft::resource::get_large_workspace_resource(res),
+    raft::resource::get_large_workspace_resource_ref(res),
     raft::make_extents<int64_t>(min(maxBlocks, max(max_batchsize, reverse_batch)),
                                 dim + align_padding));
 
@@ -318,7 +318,7 @@ void batched_insert_vamana(
 
     // compute prefix sums of query_list sizes - TODO parallelize prefix sums
     //    auto d_total_edges = raft::make_device_mdarray<int>(
-    //      res, raft::resource::get_workspace_resource(res), raft::make_extents<int64_t>(1));
+    //      res, raft::resource::get_workspace_resource_ref(res), raft::make_extents<int64_t>(1));
     rmm::device_scalar<int> d_total_edges(stream);
     prefix_sums_sizes<accT, IdxT><<<1, 1, 0, stream>>>(query_list, step_size, d_total_edges.data());
     RAFT_CUDA_TRY(cudaPeekAtLastError());
@@ -329,16 +329,16 @@ void batched_insert_vamana(
 
     auto edge_dist_pair = raft::make_device_mdarray<DistPair<IdxT, accT>>(
       res,
-      raft::resource::get_large_workspace_resource(res),
+      raft::resource::get_large_workspace_resource_ref(res),
       raft::make_extents<int64_t>(total_edges));
 
     auto edge_dest =
       raft::make_device_mdarray<IdxT>(res,
-                                      raft::resource::get_large_workspace_resource(res),
+                                      raft::resource::get_large_workspace_resource_ref(res),
                                       raft::make_extents<int64_t>(total_edges));
     auto edge_src =
       raft::make_device_mdarray<IdxT>(res,
-                                      raft::resource::get_large_workspace_resource(res),
+                                      raft::resource::get_large_workspace_resource_ref(res),
                                       raft::make_extents<int64_t>(total_edges));
 
     // Create reverse edge list
@@ -367,7 +367,7 @@ void batched_insert_vamana(
 
       auto temp_sort_storage = raft::make_device_mdarray<IdxT>(
         res,
-        raft::resource::get_large_workspace_resource(res),
+        raft::resource::get_large_workspace_resource_ref(res),
         raft::make_extents<int64_t>(temp_storage_bytes / sizeof(IdxT)));
 
       // Sort to group reverse edges by destination
@@ -406,7 +406,7 @@ void batched_insert_vamana(
 
     auto temp_sort_storage = raft::make_device_mdarray<IdxT>(
       res,
-      raft::resource::get_large_workspace_resource(res),
+      raft::resource::get_large_workspace_resource_ref(res),
       raft::make_extents<int64_t>(temp_storage_bytes / sizeof(IdxT)));
 
     // Sort to group reverse edges by destination
@@ -451,16 +451,16 @@ void batched_insert_vamana(
       // Allocate reverse QueryCandidate list based on number of unique destinations
       auto reverse_list_ptr = raft::make_device_mdarray<QueryCandidates<IdxT, accT>>(
         res,
-        raft::resource::get_large_workspace_resource(res),
+        raft::resource::get_large_workspace_resource_ref(res),
         raft::make_extents<int64_t>(reverse_batch));
       auto rev_ids =
         raft::make_device_mdarray<IdxT>(res,
-                                        raft::resource::get_large_workspace_resource(res),
+                                        raft::resource::get_large_workspace_resource_ref(res),
                                         raft::make_extents<int64_t>(reverse_batch, visited_size));
 
       auto rev_dists =
         raft::make_device_mdarray<accT>(res,
-                                        raft::resource::get_large_workspace_resource(res),
+                                        raft::resource::get_large_workspace_resource_ref(res),
                                         raft::make_extents<int64_t>(reverse_batch, visited_size));
 
       QueryCandidates<IdxT, accT>* reverse_list =
@@ -663,7 +663,7 @@ index<T, IdxT> build(
       static_cast<int64_t>(dim),
       static_cast<size_t>(max_batch_size),
       raft::resource::get_cuda_stream(res),
-      raft::resource::get_workspace_resource(res));
+      raft::resource::get_workspace_resource_ref(res));
     for (const auto& batch : _vamana_batches) {
       // perform rotation
       auto dataset_rotated = raft::make_device_matrix<float, int64_t>(res, batch.size(), dim);
diff --git a/cpp/src/neighbors/detail/vpq_dataset.cuh b/cpp/src/neighbors/detail/vpq_dataset.cuh
index 49b5e12455..ec4a684274 100644
--- a/cpp/src/neighbors/detail/vpq_dataset.cuh
+++ b/cpp/src/neighbors/detail/vpq_dataset.cuh
@@ -910,7 +910,7 @@ void process_and_fill_codes_subspaces(
     static_cast<ix_t>(dim),
     static_cast<size_t>(max_batch_size),
     copy_stream,
-    raft::resource::get_workspace_resource(res),
+    raft::resource::get_workspace_resource_ref(res),
     enable_prefetch_stream);
   vec_batches.prefetch_next_batch();
   for (const auto& batch : vec_batches) {
diff --git a/cpp/src/neighbors/ivf_common.cuh b/cpp/src/neighbors/ivf_common.cuh
index 303098176c..e466a13fd7 100644
--- a/cpp/src/neighbors/ivf_common.cuh
+++ b/cpp/src/neighbors/ivf_common.cuh
@@ -256,7 +256,7 @@ template <typename Index>
 void recompute_internal_state(const raft::resources& res, Index& index)
 {
   auto stream  = raft::resource::get_cuda_stream(res);
-  auto tmp_res = raft::resource::get_workspace_resource(res);
+  auto tmp_res = raft::resource::get_workspace_resource_ref(res);
   rmm::device_uvector<uint32_t> sorted_sizes(index.n_lists(), stream, tmp_res);
 
   // Actualize the list pointers
diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh
index 4165bd1087..fffe5134ae 100644
--- a/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh
+++ b/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh
@@ -180,8 +180,10 @@ void extend(raft::resources const& handle,
   RAFT_EXPECTS(new_indices != nullptr || index->size() == 0,
                "You must pass data indices when the index is non-empty.");
 
-  auto new_labels = raft::make_device_mdarray<LabelT>(
-    handle, raft::resource::get_large_workspace_resource(handle), raft::make_extents<IdxT>(n_rows));
+  auto new_labels =
+    raft::make_device_mdarray<LabelT>(handle,
+                                      raft::resource::get_large_workspace_resource_ref(handle),
+                                      raft::make_extents<IdxT>(n_rows));
   cuvs::cluster::kmeans::balanced_params kmeans_params;
   kmeans_params.metric = index->metric();
   auto orig_centroids_view =
@@ -208,7 +210,7 @@ void extend(raft::resources const& handle,
                                        IdxT{index->dim()},
                                        max_batch_size,
                                        copy_stream,
-                                       raft::resource::get_workspace_resource(handle),
+                                       raft::resource::get_workspace_resource_ref(handle),
                                        enable_prefetch);
   vec_batches.prefetch_next_batch();
 
@@ -227,7 +229,7 @@ void extend(raft::resources const& handle,
 
   auto* list_sizes_ptr    = index->list_sizes().data_handle();
   auto old_list_sizes_dev = raft::make_device_mdarray<uint32_t>(
-    handle, raft::resource::get_workspace_resource(handle), raft::make_extents<IdxT>(n_lists));
+    handle, raft::resource::get_workspace_resource_ref(handle), raft::make_extents<IdxT>(n_lists));
   raft::copy(handle,
              old_list_sizes_dev.view(),
              raft::make_device_vector_view<const uint32_t, IdxT>(list_sizes_ptr, n_lists));
@@ -303,7 +305,7 @@ void extend(raft::resources const& handle,
                                           IdxT{1},
                                           max_batch_size,
                                           stream,
-                                          raft::resource::get_workspace_resource(handle));
+                                          raft::resource::get_workspace_resource_ref(handle));
   vec_batches.reset();
   vec_batches.prefetch_next_batch();
   auto idx_batch            = vec_indices.begin();
@@ -418,7 +420,7 @@ inline auto build(raft::resources const& handle,
       1, n_rows / std::max<size_t>(params.kmeans_trainset_fraction * n_rows, index.n_lists()));
     auto n_rows_train = n_rows / trainset_ratio;
     rmm::device_uvector<T> trainset(
-      n_rows_train * index.dim(), stream, raft::resource::get_large_workspace_resource(handle));
+      n_rows_train * index.dim(), stream, raft::resource::get_large_workspace_resource_ref(handle));
     // TODO: a proper sampling
     raft::copy_matrix(trainset.data(),
                       index.dim(),
@@ -477,7 +479,7 @@ inline void fill_refinement_index(raft::resources const& handle,
     "ivf_flat::fill_refinement_index(%zu, %u)", size_t(n_queries));
 
   rmm::device_uvector<LabelT> new_labels(
-    n_queries * n_candidates, stream, raft::resource::get_workspace_resource(handle));
+    n_queries * n_candidates, stream, raft::resource::get_workspace_resource_ref(handle));
   auto new_labels_view =
     raft::make_device_vector_view<LabelT, IdxT>(new_labels.data(), n_queries * n_candidates);
   raft::linalg::map_offset(
diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh
index f42ffdc837..960d48c818 100644
--- a/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh
+++ b/cpp/src/neighbors/ivf_flat/ivf_flat_search.cuh
@@ -355,21 +355,22 @@ inline void search_with_filtering(raft::resources const& handle,
   for (uint32_t offset_q = 0; offset_q < n_queries; offset_q += max_queries) {
     uint32_t queries_batch = raft::min(max_queries, n_queries - offset_q);
 
-    search_impl<T, float, IdxT, IvfSampleFilterT>(handle,
-                                                  index,
-                                                  effective_metric,
-                                                  params,
-                                                  queries + offset_q * index.dim(),
-                                                  queries_batch,
-                                                  offset_q,
-                                                  k,
-                                                  n_probes,
-                                                  max_samples,
-                                                  cuvs::distance::is_min_close(effective_metric),
-                                                  neighbors + offset_q * k,
-                                                  distances + offset_q * k,
-                                                  raft::resource::get_workspace_resource(handle),
-                                                  sample_filter);
+    search_impl<T, float, IdxT, IvfSampleFilterT>(
+      handle,
+      index,
+      effective_metric,
+      params,
+      queries + offset_q * index.dim(),
+      queries_batch,
+      offset_q,
+      k,
+      n_probes,
+      max_samples,
+      cuvs::distance::is_min_close(effective_metric),
+      neighbors + offset_q * k,
+      distances + offset_q * k,
+      raft::resource::get_workspace_resource_ref(handle),
+      sample_filter);
   }
 }
 
diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh b/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh
index f0a878f2ec..c562ca9e00 100644
--- a/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh
+++ b/cpp/src/neighbors/ivf_pq/ivf_pq_build.cuh
@@ -334,7 +334,7 @@ void train_per_subset(raft::resources const& handle,
                       uint32_t max_train_points_per_pq_code)
 {
   auto stream        = raft::resource::get_cuda_stream(handle);
-  auto device_memory = raft::resource::get_workspace_resource(handle);
+  auto device_memory = raft::resource::get_workspace_resource_ref(handle);
 
   rmm::device_uvector<float> pq_centers_tmp(impl->pq_centers().size(), stream, device_memory);
   // Subsampling the train set for codebook generation based on max_train_points_per_pq_code.
@@ -416,7 +416,7 @@ void train_per_cluster(raft::resources const& handle,
                        uint32_t max_train_points_per_pq_code)
 {
   auto stream        = raft::resource::get_cuda_stream(handle);
-  auto device_memory = raft::resource::get_workspace_resource(handle);
+  auto device_memory = raft::resource::get_workspace_resource_ref(handle);
   // NB: Managed memory is used for small arrays accessed from both device and host. There's no
   // performance reasoning behind this, just avoiding the boilerplate of explicit copies.
   rmm::mr::managed_memory_resource managed_memory;
@@ -596,7 +596,7 @@ void reconstruct_list_data(raft::resources const& res,
 
   auto tmp =
     raft::make_device_mdarray<float>(res,
-                                     raft::resource::get_workspace_resource(res),
+                                     raft::resource::get_workspace_resource_ref(res),
                                      raft::make_extents<uint32_t>(n_rows, index.rot_dim()));
 
   constexpr uint32_t kBlockSize = 256;
@@ -623,7 +623,7 @@ void reconstruct_list_data(raft::resources const& res,
 
   float* out_float_ptr = nullptr;
   rmm::device_uvector<float> out_float_buf(
-    0, raft::resource::get_cuda_stream(res), raft::resource::get_workspace_resource(res));
+    0, raft::resource::get_cuda_stream(res), raft::resource::get_workspace_resource_ref(res));
   if constexpr (std::is_same_v<T, float>) {
     out_float_ptr = out_vectors.data_handle();
   } else {
@@ -706,7 +706,7 @@ void encode_list_data(raft::resources const& res,
   auto n_rows = new_vectors.extent(0);
   if (n_rows == 0) { return; }
 
-  auto mr = raft::resource::get_workspace_resource(res);
+  auto mr = raft::resource::get_workspace_resource_ref(res);
 
   auto new_vectors_residual = raft::make_device_mdarray<float>(
     res, mr, raft::make_extents<uint32_t>(n_rows, index->rot_dim()));
@@ -997,9 +997,9 @@ void extend(raft::resources const& handle,
                   std::is_same_v<T, int8_t>,
                 "Unsupported data type");
 
-  rmm::device_async_resource_ref device_memory = raft::resource::get_workspace_resource(handle);
+  rmm::device_async_resource_ref device_memory = raft::resource::get_workspace_resource_ref(handle);
   rmm::device_async_resource_ref large_memory =
-    raft::resource::get_large_workspace_resource(handle);
+    raft::resource::get_large_workspace_resource_ref(handle);
 
   // Try to allocate an index with the same parameters and the projected new size
   // (which can be slightly larger than index->size() + n_rows, due to padding for interleaved).
@@ -1269,13 +1269,14 @@ auto build(raft::resources const& handle,
       size_t(n_rows) / std::max<size_t>(params.kmeans_trainset_fraction * n_rows, impl->n_lists()));
     size_t n_rows_train = n_rows / trainset_ratio;
 
-    rmm::device_async_resource_ref device_memory = raft::resource::get_workspace_resource(handle);
+    rmm::device_async_resource_ref device_memory =
+      raft::resource::get_workspace_resource_ref(handle);
 
     // If the trainset is small enough to comfortably fit into device memory, put it there.
     // Otherwise, use the managed memory.
     constexpr size_t kTolerableRatio = 4;
     rmm::device_async_resource_ref big_memory_resource =
-      raft::resource::get_large_workspace_resource(handle);
+      raft::resource::get_large_workspace_resource_ref(handle);
     if (sizeof(float) * n_rows_train * impl->dim() * kTolerableRatio <
         raft::resource::get_workspace_free_bytes(handle)) {
       big_memory_resource = device_memory;
diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh b/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh
index 5799c4bbb4..08fcd1f09a 100644
--- a/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh
+++ b/cpp/src/neighbors/ivf_pq/ivf_pq_search.cuh
@@ -440,7 +440,7 @@ void ivfpq_search_worker(raft::resources const& handle,
     topK,
     index.dim());
   auto stream = raft::resource::get_cuda_stream(handle);
-  auto mr     = raft::resource::get_workspace_resource(handle);
+  auto mr     = raft::resource::get_workspace_resource_ref(handle);
 
   bool manage_local_topk         = is_local_topk_feasible(topK, n_probes, n_queries);
   auto topk_len                  = manage_local_topk ? n_probes * topK : max_samples;
@@ -927,7 +927,7 @@ inline void search(raft::resources const& handle,
     max_samples = ms;
   }
 
-  auto mr = raft::resource::get_workspace_resource(handle);
+  auto mr = raft::resource::get_workspace_resource_ref(handle);
 
   // Maximum number of query vectors to search at the same time.
   // Number of queries in the outer loop, which includes query transform and coarse search.
diff --git a/cpp/src/neighbors/ivf_pq/ivf_pq_transform.cuh b/cpp/src/neighbors/ivf_pq/ivf_pq_transform.cuh
index 9a8b5d7166..e0032a4ac5 100644
--- a/cpp/src/neighbors/ivf_pq/ivf_pq_transform.cuh
+++ b/cpp/src/neighbors/ivf_pq/ivf_pq_transform.cuh
@@ -51,7 +51,7 @@ void transform_batch(raft::resources const& res,
                      raft::device_matrix_view<uint8_t, IdxT, raft::row_major> output_dataset)
 {
   IdxT n_rows                       = dataset.extent(0);
-  rmm::device_async_resource_ref mr = raft::resource::get_workspace_resource(res);
+  rmm::device_async_resource_ref mr = raft::resource::get_workspace_resource_ref(res);
 
   // Compute the labels for each vector
   cuvs::cluster::kmeans::balanced_params kmeans_params;
@@ -115,7 +115,7 @@ void transform(raft::resources const& res,
   raft::common::nvtx::range<cuvs::common::nvtx::domain::cuvs> fun_scope(
     "ivf_pq::transform(n_rows = %u, dim = %u)", n_rows, dataset.extent(1));
 
-  rmm::device_async_resource_ref mr = raft::resource::get_workspace_resource(res);
+  rmm::device_async_resource_ref mr = raft::resource::get_workspace_resource_ref(res);
 
   // The cluster centers in the index are stored padded, which is not acceptable by
   // the kmeans_balanced::predict. Thus, we need the restructuring raft::copy.
@@ -138,7 +138,7 @@ void transform(raft::resources const& res,
   }
 
   constexpr size_t max_batch_size              = 65536;
-  rmm::device_async_resource_ref device_memory = raft::resource::get_workspace_resource(res);
+  rmm::device_async_resource_ref device_memory = raft::resource::get_workspace_resource_ref(res);
 
   auto vec_batches = utils::make_batch_load_iterator<T>(res,
                                                         dataset.data_handle(),
diff --git a/cpp/src/neighbors/scann/detail/scann_avq.cuh b/cpp/src/neighbors/scann/detail/scann_avq.cuh
index 6c3bb045e4..0e138ce1a3 100644
--- a/cpp/src/neighbors/scann/detail/scann_avq.cuh
+++ b/cpp/src/neighbors/scann/detail/scann_avq.cuh
@@ -59,7 +59,7 @@ void compute_cluster_offsets(raft::resources const& dev_resources,
 {
   cudaStream_t stream = raft::resource::get_cuda_stream(dev_resources);
   rmm::device_async_resource_ref device_memory =
-    raft::resource::get_workspace_resource(dev_resources);
+    raft::resource::get_workspace_resource_ref(dev_resources);
 
   // Histrogram to compute cluster sizes
   int num_levels  = cluster_sizes.extent(0) + 1;
@@ -138,7 +138,7 @@ void sum_reduce_vector(raft::resources const& dev_resources,
 {
   cudaStream_t stream = raft::resource::get_cuda_stream(dev_resources);
   rmm::device_async_resource_ref device_memory =
-    raft::resource::get_workspace_resource(dev_resources);
+    raft::resource::get_workspace_resource_ref(dev_resources);
 
   size_t temp_storage_bytes = 0;
 
@@ -166,7 +166,7 @@ void cholesky_solver(raft::resources const& dev_resources,
   cudaStream_t stream          = raft::resource::get_cuda_stream(dev_resources);
   cusolverDnHandle_t cusolverH = raft::resource::get_cusolver_dn_handle(dev_resources);
   rmm::device_async_resource_ref device_memory =
-    raft::resource::get_workspace_resource(dev_resources);
+    raft::resource::get_workspace_resource_ref(dev_resources);
 
   // RAFT_CUSOLVER_TRY(cusolverDnSetStream(cusolverH, stream));
 
diff --git a/cpp/src/neighbors/scann/detail/scann_build.cuh b/cpp/src/neighbors/scann/detail/scann_build.cuh
index 41d0e2cc95..c01e50bc83 100644
--- a/cpp/src/neighbors/scann/detail/scann_build.cuh
+++ b/cpp/src/neighbors/scann/detail/scann_build.cuh
@@ -87,7 +87,7 @@ index<T, IdxT> build(
   raft::device_vector_view<uint32_t, int64_t> labels_view = idx.labels();
 
   // setup batching for kmeans prediction + quantization
-  auto device_memory = raft::resource::get_workspace_resource(res);
+  auto device_memory = raft::resource::get_workspace_resource_ref(res);
 
   constexpr size_t kReasonableMaxBatchSize = 65536;
   size_t max_batch_size = std::min<size_t>(dataset.extent(0), kReasonableMaxBatchSize);
diff --git a/cpp/src/preprocessing/quantize/detail/binary.cuh b/cpp/src/preprocessing/quantize/detail/binary.cuh
index fe8288a7cb..6695b8b126 100644
--- a/cpp/src/preprocessing/quantize/detail/binary.cuh
+++ b/cpp/src/preprocessing/quantize/detail/binary.cuh
@@ -146,7 +146,7 @@ void mean_f16_in_f32(raft::resources const& res,
                      const size_t dataset_size,
                      cudaStream_t cuda_stream)
 {
-  auto mr = raft::resource::get_workspace_resource(res);
+  auto mr = raft::resource::get_workspace_resource_ref(res);
   auto f32_result_vec =
     raft::make_device_mdarray<float, int64_t>(res, mr, raft::make_extents<int64_t>(dataset_dim));
   raft::matrix::fill(res, f32_result_vec.view(), float(0));
@@ -212,7 +212,7 @@ auto train(raft::resources const& res,
                static_cast<std::size_t>(dataset_dim));
 
     raft::random::RngState rng(29837lu);
-    auto mr                    = raft::resource::get_workspace_resource(res);
+    auto mr                    = raft::resource::get_workspace_resource_ref(res);
     auto sampled_dataset_chunk = raft::make_device_mdarray<T, int64_t>(
       res, mr, raft::make_extents<int64_t>(num_samples, max_dim_chunk));
     auto transposed_sampled_dataset_chunk = raft::make_device_mdarray<T, int64_t>(
@@ -331,7 +331,7 @@ auto train(raft::resources const& res,
       raft::make_host_vector_view<const compute_t>(host_threshold_vec.data(),
                                                    (int64_t)dataset_dim));
   } else {
-    auto mr         = raft::resource::get_workspace_resource(res);
+    auto mr         = raft::resource::get_workspace_resource_ref(res);
     auto casted_vec = raft::make_device_mdarray<compute_t, int64_t>(
       res, mr, raft::make_extents<int64_t>(dataset_dim));
     raft::copy(res,
@@ -425,7 +425,7 @@ void transform(raft::resources const& res,
                  raft::make_device_vector_view<const T>(quantizer.threshold.data_handle(),
                                                         (int64_t)dataset_dim));
     } else {
-      auto mr         = raft::resource::get_workspace_resource(res);
+      auto mr         = raft::resource::get_workspace_resource_ref(res);
       auto casted_vec = raft::make_device_mdarray<float, int64_t>(
         res, mr, raft::make_extents<int64_t>(dataset_dim));
       raft::linalg::map(res,
diff --git a/cpp/src/preprocessing/quantize/detail/pq.cuh b/cpp/src/preprocessing/quantize/detail/pq.cuh
index 2f417e6281..5d77e2dd44 100644
--- a/cpp/src/preprocessing/quantize/detail/pq.cuh
+++ b/cpp/src/preprocessing/quantize/detail/pq.cuh
@@ -119,7 +119,7 @@ auto train_pq_subspaces(
   auto trainset_ptr     = !vq_centers.empty() ? pq_trainset.data_handle() : dataset.data_handle();
   auto sub_labels       = raft::make_device_vector<uint32_t, ix_t>(res, 0);
   auto pq_cluster_sizes = raft::make_device_vector<uint32_t, ix_t>(res, 0);
-  auto device_memory    = raft::resource::get_workspace_resource(res);
+  auto device_memory    = raft::resource::get_workspace_resource_ref(res);
   if (is_balanced_kmeans(params)) {
     sub_labels = raft::make_device_mdarray<uint32_t>(
       res, device_memory, raft::make_extents<ix_t>(n_rows_train));
diff --git a/cpp/tests/neighbors/ann_cagra/test_batch_load_iterator.cu b/cpp/tests/neighbors/ann_cagra/test_batch_load_iterator.cu
index 0f78805b5e..c0df2b8826 100644
--- a/cpp/tests/neighbors/ann_cagra/test_batch_load_iterator.cu
+++ b/cpp/tests/neighbors/ann_cagra/test_batch_load_iterator.cu
@@ -85,7 +85,7 @@ class BatchLoadIteratorTest : public ::testing::Test {
     int64_t total_processed = 0;
 
     auto [copy_stream, enable_prefetch] = bli::get_prefetch_stream(res);
-    auto workspace_mr                   = raft::resource::get_workspace_resource(res);
+    auto workspace_mr                   = raft::resource::get_workspace_resource_ref(res);
 
     {
       bli::batch_load_iterator<mdspan_t> iter(res,
@@ -154,7 +154,7 @@ TEST_F(BatchLoadIteratorTest, EmptyViewFromHost)
   auto host_view  = host_empty.view();
 
   auto [copy_stream, enable_prefetch] = bli::get_prefetch_stream(res);
-  auto workspace_mr                   = raft::resource::get_workspace_resource(res);
+  auto workspace_mr                   = raft::resource::get_workspace_resource_ref(res);
 
   bli::batch_load_iterator<raft::host_matrix_view<IdxT, int64_t>> iter(
     res, host_view, /*batch_size=*/128, copy_stream, workspace_mr, enable_prefetch);
@@ -167,7 +167,7 @@ TEST_F(BatchLoadIteratorTest, EmptyViewFromDevice)
   auto device_view  = device_empty.view();
 
   auto [copy_stream, enable_prefetch] = bli::get_prefetch_stream(res);
-  auto workspace_mr                   = raft::resource::get_workspace_resource(res);
+  auto workspace_mr                   = raft::resource::get_workspace_resource_ref(res);
 
   bli::batch_load_iterator<raft::device_matrix_view<IdxT, int64_t>> iter(
     res, device_view, /*batch_size=*/128, copy_stream, workspace_mr, enable_prefetch);
@@ -290,7 +290,7 @@ TEST_F(BatchLoadIteratorTest, MakeBatchLoadIteratorHostPtr)
                                                   n_cols,
                                                   batch_size_rows,
                                                   copy_stream,
-                                                  raft::resource::get_workspace_resource(res),
+                                                  raft::resource::get_workspace_resource_ref(res),
                                                   enable_prefetch);
   EXPECT_TRUE(iter.does_copy());
 
@@ -329,7 +329,7 @@ TEST_F(BatchLoadIteratorTest, MakeBatchLoadIteratorDevicePtr)
                                                   n_cols,
                                                   batch_size_rows,
                                                   copy_stream,
-                                                  raft::resource::get_workspace_resource(res),
+                                                  raft::resource::get_workspace_resource_ref(res),
                                                   enable_prefetch);
   EXPECT_FALSE(iter.does_copy());