Skip to content

Commit 9188983

Browse files
committed
Align HyperLogLog add() bit handling with Spark
1 parent d3701ae commit 9188983

1 file changed

Lines changed: 11 additions & 18 deletions

File tree

include/cuco/detail/hyperloglog/hyperloglog_impl.cuh

Lines changed: 11 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,9 @@
3030
#include <cuda/std/__algorithm/max.h> // TODO #include <cuda/std/algorithm> once available
3131
#include <cuda/std/bit>
3232
#include <cuda/std/cstddef>
33+
#include <cuda/std/limits>
3334
#include <cuda/std/span>
35+
#include <cuda/std/type_traits>
3436
#include <cuda/std/utility>
3537
#include <cuda/stream_ref>
3638
#include <thrust/iterator/constant_iterator.h>
@@ -62,6 +64,9 @@ class hyperloglog_impl {
6264
using fp_type = double; ///< Floating point type used for reduction
6365
using hash_value_type = cuda::std::remove_cvref_t<decltype(cuda::std::declval<Hash>()(
6466
cuda::std::declval<T>()))>; ///< Hash value type
67+
static_assert(cuda::std::is_unsigned_v<hash_value_type>,
68+
"HyperLogLog requires an unsigned hash value type");
69+
6570
public:
6671
static constexpr auto thread_scope = Scope; ///< CUDA thread scope
6772

@@ -153,13 +158,11 @@ class hyperloglog_impl {
153158
*/
154159
__device__ constexpr void add(T const& item) noexcept
155160
{
156-
auto const h = this->hash_(item);
157-
auto const reg = h & this->register_mask();
158-
auto const zeroes = cuda::std::countl_zero(h | this->register_mask()) + 1; // __clz
159-
160-
// reversed order (same one as Spark uses)
161-
// auto const reg = h >> ((sizeof(hash_value_type) * 8) - this->precision_);
162-
// auto const zeroes = cuda::std::countl_zero(h << this->precision_) + 1;
161+
constexpr auto hash_bits = cuda::std::numeric_limits<hash_value_type>::digits;
162+
auto const h = this->hash_(item);
163+
auto const reg = static_cast<int>(h >> (hash_bits - this->precision_));
164+
auto const w_padding = hash_value_type{1} << static_cast<hash_value_type>(this->precision_ - 1);
165+
auto const zeroes = cuda::std::countl_zero((h << this->precision_) | w_padding) + 1; // __clz
163166

164167
this->update_max(reg, zeroes);
165168
}
@@ -405,7 +408,7 @@ class hyperloglog_impl {
405408
int thread_zeroes = 0;
406409
for (int i = group.thread_rank(); i < this->sketch_.size(); i += group.size()) {
407410
auto const reg = this->sketch_[i];
408-
thread_sum += fp_type{1} / static_cast<fp_type>(1 << reg);
411+
thread_sum += fp_type{1} / static_cast<fp_type>(1ull << reg);
409412
thread_zeroes += reg == 0;
410413
}
411414

@@ -626,16 +629,6 @@ class hyperloglog_impl {
626629
}
627630
}
628631

629-
/**
630-
* @brief Gets the register mask used to separate register index from count.
631-
*
632-
* @return The register mask
633-
*/
634-
__host__ __device__ constexpr hash_value_type register_mask() const noexcept
635-
{
636-
return (1ull << this->precision_) - 1;
637-
}
638-
639632
hasher hash_; ///< Hash function used to hash items
640633
int32_t precision_; ///< HLL precision parameter
641634
cuda::std::span<register_type> sketch_; ///< HLL sketch storage

0 commit comments

Comments
 (0)