Skip to content

Commit 20be4e3

Browse files
committed
Address Doxygen
1 parent 2a8de62 commit 20be4e3

1 file changed

Lines changed: 79 additions & 23 deletions

File tree

include/cuco/detail/bloom_filter/parametric_filter_policy.cuh

Lines changed: 79 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,12 @@ template <class Hash,
6363
uint32_t GroupsPerBlock = WordsPerBlock>
6464
class parametric_filter_policy {
6565
public:
66-
using hasher = Hash;
67-
using word_type = Word;
68-
using hash_argument_type = typename hasher::argument_type;
69-
using hash_result_type = decltype(std::declval<hasher>()(std::declval<hash_argument_type>()));
66+
using hasher = Hash; ///< 64-bit hash functor type
67+
using word_type = Word; ///< Underlying filter-block word type
68+
using hash_argument_type = typename hasher::argument_type; ///< Hash function input type
69+
using hash_result_type =
70+
decltype(std::declval<hasher>()(std::declval<hash_argument_type>())); ///< Hash function
71+
///< output type
7072

7173
private:
7274
static constexpr uint32_t max_salts = 64;
@@ -84,8 +86,8 @@ class parametric_filter_policy {
8486
static constexpr uint32_t word_bits = cuda::std::numeric_limits<word_type>::digits;
8587

8688
public:
87-
static constexpr uint32_t words_per_block = WordsPerBlock;
88-
static constexpr uint32_t pattern_bits = PatternBits;
89+
static constexpr uint32_t words_per_block = WordsPerBlock; ///< Number of words per filter block
90+
static constexpr uint32_t pattern_bits = PatternBits; ///< Fingerprint bits per key
8991

9092
static constexpr uint32_t add_horizontal_layout =
9193
AddHorizontalLayout; ///< horizontal vectorization layout for add operation
@@ -96,31 +98,39 @@ class parametric_filter_policy {
9698
static constexpr uint32_t contains_vertical_layout =
9799
ContainsVerticalLayout; ///< vertical vectorization layout for contains operation
98100

99-
static constexpr size_t max_filter_blocks = cuda::std::numeric_limits<uint32_t>::max();
100-
// This ensures each word in the block has at least one bit set; otherwise we would never
101-
// use some of the words
101+
static constexpr size_t max_filter_blocks =
102+
cuda::std::numeric_limits<uint32_t>::max(); ///< Upper bound on the number of filter blocks
103+
/// Lower bound on `pattern_bits`: at least one bit per word so every word contributes.
102104
static constexpr auto min_pattern_bits = words_per_block;
103-
// The maximum number of bits to be set for a key is capped by the total number of bits in
104-
// the filter block, as well as the number of available salts
105+
/// Upper bound on `pattern_bits`: the total number of bits in a filter block, capped by the
106+
/// number of available salts.
105107
static constexpr auto max_pattern_bits = cuda::std::min(word_bits * words_per_block, max_salts);
106108

107109
//===----------Cache-Sectorized----------===//
108-
static constexpr uint32_t groups_per_block = GroupsPerBlock;
109-
static constexpr bool is_cache_sectorized = groups_per_block != words_per_block ? true : false;
110-
static constexpr uint32_t words_per_group = words_per_block / groups_per_block;
110+
static constexpr uint32_t groups_per_block =
111+
GroupsPerBlock; ///< Cache-sectorization groups per block (paper's z)
112+
static constexpr bool is_cache_sectorized =
113+
groups_per_block != words_per_block ? true : false; ///< CSBF mode flag
114+
static constexpr uint32_t words_per_group =
115+
words_per_block / groups_per_block; ///< Words per cache-sectorization group
111116
// TODO: when `pattern_bits % groups_per_block != 0`, using a ceil packs all remainder bits into
112117
// the first `pattern_bits / max_bits_per_group` groups, leaving later groups with a zero
113118
// expected pattern. This wastes block capacity and inflates FPR. Distribute floor bits to every
114119
// group plus one extra bit to the first `pattern_bits % groups_per_block` groups, and update
115120
// the salt-to-group mapping in `set_bits` accordingly.
116-
static constexpr uint32_t max_bits_per_group =
117-
cuco::detail::int_div_ceil(pattern_bits, groups_per_block);
118-
static constexpr uint32_t add_groups_per_vertical_layout = add_vertical_layout / words_per_group;
121+
static constexpr uint32_t max_bits_per_group = cuco::detail::int_div_ceil(
122+
pattern_bits, groups_per_block); ///< CSBF: max fingerprint bits set per group per key
123+
static constexpr uint32_t add_groups_per_vertical_layout =
124+
add_vertical_layout / words_per_group; ///< CSBF: groups touched per add vertical step
119125
static constexpr uint32_t contains_groups_per_vertical_layout =
120-
contains_vertical_layout / words_per_group;
121-
static constexpr uint32_t group_index_salt = 0x5bd1e995U;
122-
static constexpr uint32_t group_index_width = cuda::std::bit_width(words_per_group - 1);
123-
static constexpr uint32_t group_index_mask = words_per_group - 1;
126+
contains_vertical_layout /
127+
words_per_group; ///< CSBF: groups touched per contains vertical step
128+
static constexpr uint32_t group_index_salt =
129+
0x5bd1e995U; ///< CSBF: salt for selecting one word per group
130+
static constexpr uint32_t group_index_width = cuda::std::bit_width(
131+
words_per_group - 1); ///< CSBF: bits needed to encode an in-group word index
132+
static constexpr uint32_t group_index_mask =
133+
words_per_group - 1; ///< CSBF: mask for selecting an in-group word index
124134

125135
private:
126136
static constexpr uint32_t bit_index_width = cuda::std::bit_width(word_bits - 1);
@@ -135,6 +145,11 @@ class parametric_filter_policy {
135145
cuco::detail::int_div_ceil(pattern_bits, words_per_block);
136146

137147
public:
148+
/**
149+
* @brief Constructs a parametric filter policy.
150+
*
151+
* @param hash Hash function used to generate fingerprints.
152+
*/
138153
__host__ __device__ constexpr parametric_filter_policy(Hash hash = {}) : hash_{hash}
139154
{
140155
static_assert(pattern_bits >= min_pattern_bits,
@@ -178,28 +193,69 @@ class parametric_filter_policy {
178193
"within 32 bits");
179194
}
180195

181-
// Return {upper 32b, lower 32b} of 64b hash
196+
/**
197+
* @brief Splits the 64-bit hash of a key into its upper and lower 32 bits.
198+
*
199+
* The upper half is used for block selection (via multiply-shift); the lower half drives the
200+
* per-word fingerprint pattern via salt-based multiplicative hashing.
201+
*
202+
* @param key Key to hash.
203+
*
204+
* @return `{upper 32 bits, lower 32 bits}` of the 64-bit hash.
205+
*/
182206
__device__ constexpr cuda::std::pair<uint32_t, uint32_t> split_hash(hash_argument_type key) const
183207
{
184208
auto const hash_value = hash_(key);
185209
return {static_cast<uint32_t>(hash_value >> 32), static_cast<uint32_t>(hash_value)};
186210
}
187211

212+
/**
213+
* @brief Determines the filter block a key maps to via fast multiply-shift modulo.
214+
*
215+
* @tparam Extent Size type used to determine the number of blocks in the filter.
216+
*
217+
* @param upper_hash_value Upper 32 bits of the key's hash.
218+
* @param num_blocks Number of blocks in the filter.
219+
*
220+
* @return Block index in `[0, num_blocks)`.
221+
*/
188222
template <class Extent>
189223
__device__ constexpr auto block_index(uint32_t upper_hash_value, Extent num_blocks) const
190224
{
191-
// return upper_hash_value % num_blocks;
192225
return static_cast<uint32_t>((static_cast<uint64_t>(upper_hash_value) *
193226
static_cast<typename Extent::value_type>(num_blocks)) >>
194227
32);
195228
}
196229

230+
/**
231+
* @brief Generates the per-word fingerprint pattern for a key when the horizontal layout is 1.
232+
*
233+
* @tparam LoopIndex Outer-loop iteration index when `words_per_block / VerticalLayout > 1`.
234+
* @tparam VerticalLayout Number of contiguous words this call produces.
235+
*
236+
* @param lower_hash_value Lower 32 bits of the key's hash.
237+
*
238+
* @return Array of `VerticalLayout` (or `groups_per_vertical_layout` in CSBF mode) words.
239+
*/
197240
template <uint32_t LoopIndex, uint32_t VerticalLayout>
198241
__device__ constexpr auto array_pattern(uint32_t lower_hash_value) const
199242
{
200243
return pattern_impl<LoopIndex, VerticalLayout>(lower_hash_value);
201244
}
202245

246+
/**
247+
* @brief Generates the per-word fingerprint pattern for a key when the horizontal layout is > 1.
248+
*
249+
* @tparam LoopIndex Outer-loop iteration index.
250+
* @tparam HorizontalLayout Cooperative-group size cooperating on a single key.
251+
* @tparam VerticalLayout Number of contiguous words this call produces.
252+
*
253+
* @param lower_hash_value Lower 32 bits of the key's hash.
254+
* @param thread_index Caller's rank within the cooperative group.
255+
*
256+
* @return Array of `VerticalLayout` (or `groups_per_vertical_layout` in CSBF mode) words owned
257+
* by the calling thread.
258+
*/
203259
template <uint32_t LoopIndex, uint32_t HorizontalLayout, uint32_t VerticalLayout>
204260
__device__ constexpr auto array_pattern(uint32_t lower_hash_value, uint32_t thread_index) const
205261
{

0 commit comments

Comments
 (0)