Reset CUDA error after cudaFuncSetAttribute (#504)

sleeepyjack · web-flow · commit f166d1869a4c · 2024-06-12T04:10:29.000+02:00
This PR fixes a bug which occures when `cudaFuncSetAttribute` is used to conditionally check if there's enough shmem available to fit the HLL sketch in. If it doesn't fit then the CUDA error returned by the function is sticky and may resurface in downstream calls, e.g., Thrust. The fix consists of flushing the CUDA error after the call to `cudaFuncSetAttribute`. Big thanks to @gevtushenko for tracking down this bug.
diff --git a/include/cuco/detail/hyperloglog/hyperloglog_ref.cuh b/include/cuco/detail/hyperloglog/hyperloglog_ref.cuh
@@ -550,9 +550,12 @@ class hyperloglog_ref {
   [[nodiscard]] __host__ constexpr bool try_reserve_shmem(Kernel kernel,
                                                           int shmem_bytes) const noexcept
   {
-    return cudaSuccess == cudaFuncSetAttribute(reinterpret_cast<void const*>(kernel),
-                                               cudaFuncAttributeMaxDynamicSharedMemorySize,
-                                               shmem_bytes);
+    bool const ret =
+      cudaSuccess == cudaFuncSetAttribute(reinterpret_cast<void const*>(kernel),
+                                          cudaFuncAttributeMaxDynamicSharedMemorySize,
+                                          shmem_bytes);
+    cudaGetLastError();  // flush CUDA error
+    return ret;
   }
 
   hasher hash_;                            ///< Hash function used to hash items