@@ -200,20 +200,21 @@ template <typename Key, typename Hash, typename Eq>
200200std::vector<int64_t > SlabHashBackend<Key, Hash, Eq>::BucketSizes() const {
201201 CUDAScopedDevice scoped_device (this ->device_ );
202202 thrust::device_vector<int64_t > elems_per_bucket (impl_.bucket_count_ );
203- thrust::fill (elems_per_bucket.begin (), elems_per_bucket.end (), 0 );
203+ thrust::fill (thrust::cuda::par. on ( CUDAStream::GetInstance (). Get ()), elems_per_bucket.begin (), elems_per_bucket.end (), 0 );
204204
205205 const int64_t num_blocks =
206206 (impl_.buffer_accessor_ .capacity_ + kThreadsPerBlock - 1 ) /
207207 kThreadsPerBlock ;
208208 CountElemsPerBucketKernel<<<num_blocks, kThreadsPerBlock , 0 ,
209- core:: CUDAStream::GetInstance ().Get ()>>>(
209+ CUDAStream::GetInstance ().Get ()>>>(
210210 impl_, thrust::raw_pointer_cast (elems_per_bucket.data ()));
211211 cuda::Synchronize (CUDAStream::GetInstance ());
212212 OPEN3D_CUDA_CHECK (cudaGetLastError ());
213213
214214 std::vector<int64_t > result (impl_.bucket_count_ );
215- thrust::copy (elems_per_bucket.begin (), elems_per_bucket.end (),
215+ thrust::copy (thrust::cuda::par. on ( CUDAStream::GetInstance (). Get ()), elems_per_bucket.begin (), elems_per_bucket.end (),
216216 result.begin ());
217+ cuda::Synchronize (CUDAStream::GetInstance ());
217218 return result;
218219}
219220
@@ -236,8 +237,8 @@ void SlabHashBackend<Key, Hash, Eq>::Insert(
236237 // / Increase heap_top to pre-allocate potential memory increment and
237238 // / avoid atomicAdd in kernel.
238239 int prev_heap_top = this ->buffer_ ->GetHeapTopIndex ();
239- *thrust::device_ptr< int >(impl_. buffer_accessor_ . heap_top_ ) =
240- prev_heap_top + count ;
240+ int new_value = prev_heap_top + count;
241+ thrust::fill_n (thrust::cuda::par. on ( CUDAStream::GetInstance (). Get ()), thrust::device_pointer_cast (impl_. buffer_accessor_ . heap_top_ ), 1 , new_value) ;
241242
242243 const int64_t num_blocks =
243244 (count + kThreadsPerBlock - 1 ) / kThreadsPerBlock ;
@@ -248,8 +249,9 @@ void SlabHashBackend<Key, Hash, Eq>::Insert(
248249 core::CUDAStream::GetInstance ().Get ()>>>(
249250 impl_, input_keys, output_buf_indices, output_masks, count);
250251
251- thrust::device_vector<const void *> input_values_soa_device (
252- input_values_soa.begin (), input_values_soa.end ());
252+ thrust::device_vector<const void *> input_values_soa_device (input_values_soa.size ());
253+ thrust::copy (thrust::cuda::par.on (CUDAStream::GetInstance ().Get ()),
254+ input_values_soa.begin (), input_values_soa.end (), input_values_soa_device.begin ());
253255
254256 int64_t n_values = input_values_soa.size ();
255257 const void * const * ptr_input_values_soa =
0 commit comments