@@ -22,7 +22,8 @@ void* MemoryManagerCUDA::Malloc(size_t byte_size, const Device& device) {
2222#if CUDART_VERSION >= 11020
2323 if (cuda::SupportsMemoryPools (device)) {
2424 OPEN3D_CUDA_CHECK (cudaMallocAsync (static_cast <void **>(&ptr),
25- byte_size, cuda::GetStream ()));
25+ byte_size,
26+ CUDAStream::GetInstance ().Get ()));
2627 } else {
2728 OPEN3D_CUDA_CHECK (cudaMalloc (static_cast <void **>(&ptr), byte_size));
2829 }
@@ -43,7 +44,8 @@ void MemoryManagerCUDA::Free(void* ptr, const Device& device) {
4344 if (ptr && IsCUDAPointer (ptr, device)) {
4445#if CUDART_VERSION >= 11020
4546 if (cuda::SupportsMemoryPools (device)) {
46- OPEN3D_CUDA_CHECK (cudaFreeAsync (ptr, cuda::GetStream ()));
47+ OPEN3D_CUDA_CHECK (
48+ cudaFreeAsync (ptr, CUDAStream::GetInstance ().Get ()));
4749 } else {
4850 OPEN3D_CUDA_CHECK (cudaFree (ptr));
4951 }
@@ -62,22 +64,26 @@ void MemoryManagerCUDA::Memcpy(void* dst_ptr,
6264 const void * src_ptr,
6365 const Device& src_device,
6466 size_t num_bytes) {
67+ const CUDAStream& current_stream = CUDAStream::GetInstance ();
6568 if (dst_device.IsCUDA () && src_device.IsCPU ()) {
6669 if (!IsCUDAPointer (dst_ptr, dst_device)) {
6770 utility::LogError (" dst_ptr is not a CUDA pointer." );
6871 }
6972 CUDAScopedDevice scoped_device (dst_device);
7073 OPEN3D_CUDA_CHECK (cudaMemcpyAsync (dst_ptr, src_ptr, num_bytes,
7174 cudaMemcpyHostToDevice,
72- cuda::GetStream ()));
75+ current_stream. Get ()));
7376 } else if (dst_device.IsCPU () && src_device.IsCUDA ()) {
7477 if (!IsCUDAPointer (src_ptr, src_device)) {
7578 utility::LogError (" src_ptr is not a CUDA pointer." );
7679 }
7780 CUDAScopedDevice scoped_device (src_device);
7881 OPEN3D_CUDA_CHECK (cudaMemcpyAsync (dst_ptr, src_ptr, num_bytes,
7982 cudaMemcpyDeviceToHost,
80- cuda::GetStream ()));
83+ current_stream.Get ()));
84+ if (current_stream.ShouldSyncMemcpyFromDeviceToHost ()) {
85+ OPEN3D_CUDA_CHECK (cudaStreamSynchronize (current_stream.Get ()));
86+ }
8187 } else if (dst_device.IsCUDA () && src_device.IsCUDA ()) {
8288 if (!IsCUDAPointer (dst_ptr, dst_device)) {
8389 utility::LogError (" dst_ptr is not a CUDA pointer." );
@@ -90,25 +96,25 @@ void MemoryManagerCUDA::Memcpy(void* dst_ptr,
9096 CUDAScopedDevice scoped_device (src_device);
9197 OPEN3D_CUDA_CHECK (cudaMemcpyAsync (dst_ptr, src_ptr, num_bytes,
9298 cudaMemcpyDeviceToDevice,
93- cuda::GetStream ()));
99+ current_stream. Get ()));
94100 } else if (CUDAState::GetInstance ().IsP2PEnabled (src_device.GetID (),
95101 dst_device.GetID ())) {
96102 OPEN3D_CUDA_CHECK (cudaMemcpyPeerAsync (
97103 dst_ptr, dst_device.GetID (), src_ptr, src_device.GetID (),
98- num_bytes, cuda::GetStream ()));
104+ num_bytes, current_stream. Get ()));
99105 } else {
100106 void * cpu_buf = MemoryManager::Malloc (num_bytes, Device (" CPU:0" ));
101107 {
102108 CUDAScopedDevice scoped_device (src_device);
103109 OPEN3D_CUDA_CHECK (cudaMemcpyAsync (cpu_buf, src_ptr, num_bytes,
104110 cudaMemcpyDeviceToHost,
105- cuda::GetStream ()));
111+ current_stream. Get ()));
106112 }
107113 {
108114 CUDAScopedDevice scoped_device (dst_device);
109115 OPEN3D_CUDA_CHECK (cudaMemcpyAsync (dst_ptr, cpu_buf, num_bytes,
110116 cudaMemcpyHostToDevice,
111- cuda::GetStream ()));
117+ current_stream. Get ()));
112118 }
113119 MemoryManager::Free (cpu_buf, Device (" CPU:0" ));
114120 }
0 commit comments