Skip to content

Commit c6e85ee

Browse files
authored
Migrate RMM usage to CCCL MR design (#7951)
## Summary - Migrate all raw RMM `allocate`/`deallocate` calls to the new CCCL 3-argument API that requires explicit alignment - Replace removed `rmm.librmm.per_device_resource` Cython import with `rmm.pylibrmm.memory_resource` and use `make_any_device_resource` to obtain the resource for `device_buffer` construction Depends on rapidsai/rmm#2361. Depends on rapidsai/ucxx#636. Depends on rapidsai/raft#2996. Depends on rapidsai/cuvs#1990. ## Changes - **`cpp/src/genetic/genetic.cu`**: Add explicit `alignof(node)` / `alignof(program)` to all `allocate` and `deallocate` calls in `parallel_evolve` and `symFit`; fix deallocation bug in `parallel_evolve` where `h_nextprogs[i].len` was incorrectly used instead of `tmp.len` to compute the buffer size being freed - **`cpp/examples/symreg/symreg_example.cpp`**: Use `params.population_size * sizeof(cg::program)` and `alignof(cg::program)` for `allocate`/`deallocate` calls, fixing incorrect byte-size computation; remove unused `<rmm/aligned.hpp>` include - **`cpp/tests/sg/genetic/evolution_test.cu`**: Add alignment arguments to allocate/deallocate in `SymReg` test - **`cpp/tests/sg/genetic/program_test.cu`**: Add alignment arguments to `SetUp`/`TearDown` allocate/deallocate calls - **`python/cuml/cuml/manifold/umap/umap.pyx`**: Replace `get_current_device_resource()` with `make_any_device_resource(get_current_device_resource().get_mr())` for `device_buffer` construction Authors: - Bradley Dice (https://github.com/bdice) Approvers: - Simon Adorf (https://github.com/csadorf) - Divye Gala (https://github.com/divyegala) - Victor Lafargue (https://github.com/viclafargue) URL: #7951
1 parent 9f83155 commit c6e85ee

5 files changed

Lines changed: 26 additions & 18 deletions

File tree

cpp/examples/symreg/symreg_example.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,8 @@ int main(int argc, char* argv[])
235235

236236
// Initialize AST
237237
auto curr_mr = rmm::mr::get_current_device_resource_ref();
238-
d_finalprogs = static_cast<cg::program_t>(curr_mr.allocate(stream, params.population_size));
238+
d_finalprogs = static_cast<cg::program_t>(
239+
curr_mr.allocate(stream, params.population_size * sizeof(cg::program), alignof(cg::program)));
239240

240241
std::vector<std::vector<cg::program>> history;
241242
history.reserve(params.generations);
@@ -327,7 +328,8 @@ int main(int argc, char* argv[])
327328

328329
/* ======================= Reset data ======================= */
329330

330-
curr_mr.deallocate(stream, d_finalprogs, params.population_size);
331+
curr_mr.deallocate(
332+
stream, d_finalprogs, params.population_size * sizeof(cg::program), alignof(cg::program));
331333
CUDA_RT_CALL(cudaEventDestroy(start));
332334
CUDA_RT_CALL(cudaEventDestroy(stop));
333335
return 0;

cpp/src/genetic/genetic.cu

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -221,15 +221,15 @@ void parallel_evolve(const raft::handle_t& h,
221221

222222
// Set current generation device nodes
223223
tmp.nodes = (node*)rmm::mr::get_current_device_resource_ref().allocate(
224-
stream, h_nextprogs[i].len * sizeof(node));
224+
stream, h_nextprogs[i].len * sizeof(node), alignof(node));
225225
raft::copy(tmp.nodes, h_nextprogs[i].nodes, h_nextprogs[i].len, stream);
226226
raft::copy(d_nextprogs + i, &tmp, 1, stream);
227227

228228
if (generation > 1) {
229229
// Free device memory allocated to program nodes in previous generation
230230
raft::copy(&tmp, d_oldprogs + i, 1, stream);
231231
rmm::mr::get_current_device_resource_ref().deallocate(
232-
stream, tmp.nodes, h_nextprogs[i].len * sizeof(node));
232+
stream, tmp.nodes, tmp.len * sizeof(node), alignof(node));
233233
}
234234

235235
tmp.nodes = nullptr;
@@ -399,7 +399,7 @@ void symFit(const raft::handle_t& handle,
399399

400400
program_t d_currprogs; // pointer to current programs
401401
d_currprogs = (program_t)rmm::mr::get_current_device_resource_ref().allocate(
402-
stream, params.population_size * sizeof(program));
402+
stream, params.population_size * sizeof(program), alignof(program));
403403
program_t d_nextprogs = final_progs; // Reuse memory already allocated for final_progs
404404
final_progs = nullptr;
405405

@@ -481,7 +481,7 @@ void symFit(const raft::handle_t& handle,
481481

482482
// Deallocate the previous generation device memory
483483
rmm::mr::get_current_device_resource_ref().deallocate(
484-
stream, d_nextprogs, params.population_size * sizeof(program));
484+
stream, d_nextprogs, params.population_size * sizeof(program), alignof(program));
485485
d_currprogs = nullptr;
486486
d_nextprogs = nullptr;
487487
}

cpp/tests/sg/genetic/evolution_test.cu

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ TEST_F(GeneticEvolutionTest, SymReg)
259259
MLCommon::CompareApprox<float> compApprox(tolerance);
260260
program_t final_progs;
261261
final_progs = (program_t)rmm::mr::get_current_device_resource_ref().allocate(
262-
stream, hyper_params.population_size * sizeof(program));
262+
stream, hyper_params.population_size * sizeof(program), alignof(program));
263263
std::vector<std::vector<program>> history;
264264
history.reserve(hyper_params.generations);
265265

@@ -327,12 +327,12 @@ TEST_F(GeneticEvolutionTest, SymReg)
327327
program tmp = program();
328328
raft::copy(&tmp, final_progs + i, 1, stream);
329329
rmm::mr::get_current_device_resource_ref().deallocate(
330-
stream, tmp.nodes, tmp.len * sizeof(node));
330+
stream, tmp.nodes, tmp.len * sizeof(node), alignof(node));
331331
tmp.nodes = nullptr;
332332
}
333333
// deallocate the final programs from device memory
334334
rmm::mr::get_current_device_resource_ref().deallocate(
335-
stream, final_progs, hyper_params.population_size * sizeof(program));
335+
stream, final_progs, hyper_params.population_size * sizeof(program), alignof(program));
336336

337337
ASSERT_TRUE(compApprox(history[n_gen - 1][best_idx].raw_fitness_, 0.0036f));
338338
std::cout << "Some Predicted test values:" << std::endl;

cpp/tests/sg/genetic/program_test.cu

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -90,10 +90,12 @@ class GeneticProgramTest : public ::testing::Test {
9090
d_lY.resize(250, stream);
9191
d_lunitW.resize(250, stream);
9292
d_lW.resize(250, stream);
93-
d_nodes1 = (node*)rmm::mr::get_current_device_resource_ref().allocate(stream, 7 * sizeof(node));
94-
d_nodes2 = (node*)rmm::mr::get_current_device_resource_ref().allocate(stream, 7 * sizeof(node));
95-
d_progs =
96-
(program_t)rmm::mr::get_current_device_resource_ref().allocate(stream, 2 * sizeof(program));
93+
d_nodes1 = (node*)rmm::mr::get_current_device_resource_ref().allocate(
94+
stream, 7 * sizeof(node), alignof(node));
95+
d_nodes2 = (node*)rmm::mr::get_current_device_resource_ref().allocate(
96+
stream, 7 * sizeof(node), alignof(node));
97+
d_progs = (program_t)rmm::mr::get_current_device_resource_ref().allocate(
98+
stream, 2 * sizeof(program), alignof(program));
9799

98100
RAFT_CUDA_TRY(cudaMemcpyAsync(
99101
d_lYpred.data(), h_lYpred.data(), 500 * sizeof(float), cudaMemcpyHostToDevice, stream));
@@ -146,9 +148,12 @@ class GeneticProgramTest : public ::testing::Test {
146148

147149
void TearDown() override
148150
{
149-
rmm::mr::get_current_device_resource_ref().deallocate(stream, d_nodes1, 7 * sizeof(node));
150-
rmm::mr::get_current_device_resource_ref().deallocate(stream, d_nodes2, 7 * sizeof(node));
151-
rmm::mr::get_current_device_resource_ref().deallocate(stream, d_progs, 2 * sizeof(program));
151+
rmm::mr::get_current_device_resource_ref().deallocate(
152+
stream, d_nodes1, 7 * sizeof(node), alignof(node));
153+
rmm::mr::get_current_device_resource_ref().deallocate(
154+
stream, d_nodes2, 7 * sizeof(node), alignof(node));
155+
rmm::mr::get_current_device_resource_ref().deallocate(
156+
stream, d_progs, 2 * sizeof(program), alignof(program));
152157
}
153158

154159
raft::handle_t handle;

python/cuml/cuml/manifold/umap/umap.pyx

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,9 @@ from libcpp.memory cimport unique_ptr
4242
from libcpp.utility cimport move
4343
from pylibraft.common.handle cimport handle_t
4444
from rmm.librmm.device_buffer cimport device_buffer
45-
from rmm.librmm.per_device_resource cimport get_current_device_resource
45+
from rmm.librmm.memory_resource cimport make_any_device_resource
4646
from rmm.pylibrmm.device_buffer cimport DeviceBuffer
47+
from rmm.pylibrmm.memory_resource cimport get_current_device_resource
4748

4849
cimport cuml.manifold.umap.lib as lib
4950
from cuml.metrics.distance_type cimport DistanceType
@@ -1286,7 +1287,7 @@ class UMAP(Base, InteropMixin, CMajorInputTagMixin, SparseInputTagMixin):
12861287
<const void*><uintptr_t>init_m.ptr,
12871288
init_m.size,
12881289
handle_.get_stream(),
1289-
get_current_device_resource()
1290+
make_any_device_resource(get_current_device_resource().get_mr())
12901291
)
12911292
)
12921293

0 commit comments

Comments
 (0)