Skip to content

Commit 1addcf9

Browse files
authored
Merge pull request #234 from sleeepyjack/bugfix/cudart-version
2 parents a767052 + d664744 commit 1addcf9

3 files changed

Lines changed: 18 additions & 11 deletions

File tree

include/cuco/detail/__config

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
#define CUCO_HAS_CUDA_BARRIER
2626
#endif
2727

28-
#if defined(CUDART_VERSION) && (CUDART_VERSION >= 11100)
28+
#if defined(CUDART_VERSION) && (CUDART_VERSION >= 11010)
2929
#define CUCO_HAS_CG_MEMCPY_ASYNC
3030
#endif
3131

include/cuco/detail/static_multimap/device_view_impl.inl

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include <thrust/type_traits/is_contiguous_iterator.h>
2323

2424
#include <cooperative_groups.h>
25+
#include <cooperative_groups/memcpy_async.h>
2526

2627
namespace cuco {
2728
template <typename Key,
@@ -497,23 +498,31 @@ class static_multimap<Key, Value, Scope, Allocator, ProbeSequence>::device_view_
497498
}
498499
offset = g.shfl(offset, 0);
499500

500-
if constexpr (thrust::is_contiguous_iterator_v<OutputIt>) {
501501
#if defined(CUCO_HAS_CG_MEMCPY_ASYNC)
502+
constexpr bool uses_memcpy_async = thrust::is_contiguous_iterator_v<OutputIt>;
503+
#else
504+
constexpr bool uses_memcpy_async = false;
505+
#endif // end CUCO_HAS_CG_MEMCPY_ASYNC
506+
507+
if constexpr (uses_memcpy_async) {
502508
#if defined(CUCO_HAS_CUDA_BARRIER)
503509
cooperative_groups::memcpy_async(
504510
g,
505-
output_begin + offset,
511+
&thrust::raw_reference_cast(*(output_begin + offset)),
506512
output_buffer,
507513
cuda::aligned_size_t<alignof(value_type)>(sizeof(value_type) * num_outputs));
508514
#else
509-
cooperative_groups::memcpy_async(
510-
g, output_begin + offset, output_buffer, sizeof(value_type) * num_outputs);
515+
cooperative_groups::memcpy_async(g,
516+
&thrust::raw_reference_cast(*(output_begin + offset)),
517+
output_buffer,
518+
sizeof(value_type) * num_outputs);
511519
#endif // end CUCO_HAS_CUDA_BARRIER
512-
return;
513-
#endif // end CUCO_HAS_CG_MEMCPY_ASYNC
514520
}
515-
for (auto index = lane_id; index < num_outputs; index += g.size()) {
516-
*(output_begin + offset + index) = output_buffer[index];
521+
522+
if constexpr (not uses_memcpy_async) {
523+
for (auto index = lane_id; index < num_outputs; index += g.size()) {
524+
*(output_begin + offset + index) = output_buffer[index];
525+
}
517526
}
518527
}
519528

include/cuco/detail/static_multimap/kernels.cuh

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,6 @@
2323

2424
#include <cuda/std/atomic>
2525

26-
#include <cooperative_groups/memcpy_async.h>
27-
2826
#include <iterator>
2927

3028
namespace cuco {

0 commit comments

Comments
 (0)