Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions include/cuco/detail/pair/helpers.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,22 @@ struct packed {
using type = void; ///< `void` type by default
};

/**
* @brief Denotes the packed type when the size of the object is 1.
*/
template <>
struct packed<sizeof(uint8_t)> {
using type = uint8_t; ///< Packed type as `uint8_t` if the size of the object is 1
};
Comment on lines +53 to +56
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We pack paired data (e.g., a 32-bit key and 32-bit value) into a single 64-bit unit for CAS, used only in map scenarios. Therefore, the smallest CAS we need is 8 bits (1B), and the smallest packed type is 2B, making the 1B overload unnecessary.

Suggested change
template <>
struct packed<sizeof(uint8_t)> {
using type = uint8_t; ///< Packed type as `uint8_t` if the size of the object is 1
};


/**
* @brief Denotes the packed type when the size of the object is 2.
*/
template <>
struct packed<sizeof(uint16_t)> {
using type = uint16_t; ///< Packed type as `uint16_t` if the size of the object is 2
};
Comment on lines +61 to +64
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could we add a 2B key-value pair in the map unit test to exercise this code path? One test case is sufficient.


/**
* @brief Denotes the packed type when the size of the object is 8.
*/
Expand Down
1 change: 1 addition & 0 deletions tests/static_set/atomic_storage_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ TEST_CASE("atomic_storage_test", "")
cuda::proclaim_return_type<Key>(build_fn{}));

set.insert_async(keys_begin, keys_begin + num_keys);
CUCO_CUDA_TRY(cudaDeviceSynchronize());
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question: why is this needed?

Copy link
Copy Markdown
Collaborator

@sleeepyjack sleeepyjack May 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It shouldn't be required since it is running on the default stream IIUC. It's probably a red herring trying to fix open bug #804.

auto const count = set.size();

REQUIRE(count == num_keys);
Expand Down
27 changes: 21 additions & 6 deletions tests/static_set/for_each_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,12 @@

#include <catch2/catch_template_test_macros.hpp>

#include <cstdint>

using size_type = std::size_t;

template <typename Set>
void test_for_each(Set& set, size_type num_keys)
void test_for_each(Set& set, size_type num_keys, size_type expected_evens, size_type expected_odds)
{
using Key = typename Set::key_type;

Expand All @@ -40,7 +42,7 @@ void test_for_each(Set& set, size_type num_keys)
// Insert keys
auto keys_begin = cuda::make_transform_iterator(
cuda::counting_iterator<size_type>{0}, cuda::proclaim_return_type<Key>([] __device__(auto i) {
// generates a sequence of 1, 2, 1, 2, ...
// generates a sequence of 0, 1, 2, ...
return static_cast<Key>(i);
}));
set.insert(keys_begin, keys_begin + num_keys, stream);
Expand All @@ -56,7 +58,7 @@ void test_for_each(Set& set, size_type num_keys)
if (slot % 2 == 0) { counter->fetch_add(slot, cuda::memory_order_relaxed); }
},
stream);
REQUIRE(counter_storage.load_to_host(stream) == 249'500);
REQUIRE(counter_storage.load_to_host(stream) == expected_evens);

counter_storage.reset(stream);

Expand All @@ -68,13 +70,19 @@ void test_for_each(Set& set, size_type num_keys)
if (!(slot % 2 == 0)) { counter->fetch_add(slot, cuda::memory_order_relaxed); }
},
stream);
REQUIRE(counter_storage.load_to_host(stream) == 250'000);
REQUIRE(counter_storage.load_to_host(stream) == expected_odds);
}

TEMPLATE_TEST_CASE_SIG(
"static_set for_each tests",
"",
((typename Key, cuco::test::probe_sequence Probe, int CGSize), Key, Probe, CGSize),
(uint8_t, cuco::test::probe_sequence::double_hashing, 1),
(uint8_t, cuco::test::probe_sequence::linear_probing, 1),
(uint16_t, cuco::test::probe_sequence::double_hashing, 1),
(uint16_t, cuco::test::probe_sequence::double_hashing, 2),
(uint16_t, cuco::test::probe_sequence::linear_probing, 1),
(uint16_t, cuco::test::probe_sequence::linear_probing, 2),
Comment on lines +80 to +85
Copy link
Copy Markdown
Collaborator

@sleeepyjack sleeepyjack May 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if using signed integers (as in the rest of the tests) would make sense for consistency. Plus, it would probably also catch some unintended overflow/wrap-around errors. Just a suggestion though - no hard requirement.

(int32_t, cuco::test::probe_sequence::double_hashing, 1),
(int32_t, cuco::test::probe_sequence::double_hashing, 2),
(int64_t, cuco::test::probe_sequence::double_hashing, 1),
Expand All @@ -92,7 +100,14 @@ TEMPLATE_TEST_CASE_SIG(
#endif
)
{
constexpr size_type num_keys{1'000};
// Limit key count for small types: leave room for the 0xFF/0xFFFF sentinel.
// Expected sums are pre-computed per type class:
// uint8_t (num_keys=100): sum of evens 0..98 = 2450, sum of odds 1..99 = 2500
// uint16_t+ (num_keys=1000): sum of evens 0..998 = 249'500, sum of odds 1..999 = 250'000
constexpr size_type num_keys = (sizeof(Key) == 1) ? 100 : 1'000;
constexpr size_type expected_evens = (sizeof(Key) == 1) ? 2'450 : 249'500;
constexpr size_type expected_odds = (sizeof(Key) == 1) ? 2'500 : 250'000;

using probe = std::conditional_t<
Probe == cuco::test::probe_sequence::linear_probing,
cuco::linear_probing<CGSize, cuco::murmurhash3_32<Key>>,
Expand All @@ -107,5 +122,5 @@ TEMPLATE_TEST_CASE_SIG(
cuco::storage<2>>;

auto set = set_t{num_keys, cuco::empty_key<Key>{static_cast<Key>(-1)}};
test_for_each(set, num_keys);
test_for_each(set, num_keys, expected_evens, expected_odds);
}
11 changes: 10 additions & 1 deletion tests/static_set/insert_and_find_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@

#include <catch2/catch_template_test_macros.hpp>

#include <cstdint>

template <typename Set>
void test_insert_and_find(Set& set, std::size_t num_keys)
{
Expand Down Expand Up @@ -56,6 +58,12 @@ TEMPLATE_TEST_CASE_SIG(
"static_set Insert and find",
"",
((typename Key, cuco::test::probe_sequence Probe, int CGSize), Key, Probe, CGSize),
(uint8_t, cuco::test::probe_sequence::double_hashing, 1),
(uint8_t, cuco::test::probe_sequence::linear_probing, 1),
(uint16_t, cuco::test::probe_sequence::double_hashing, 1),
(uint16_t, cuco::test::probe_sequence::double_hashing, 2),
(uint16_t, cuco::test::probe_sequence::linear_probing, 1),
(uint16_t, cuco::test::probe_sequence::linear_probing, 2),
(int32_t, cuco::test::probe_sequence::double_hashing, 1),
(int32_t, cuco::test::probe_sequence::double_hashing, 2),
(int64_t, cuco::test::probe_sequence::double_hashing, 1),
Expand All @@ -73,7 +81,8 @@ TEMPLATE_TEST_CASE_SIG(
#endif
)
{
constexpr std::size_t num_keys{400};
// Limit key count for small types: leave room for the 0xFF/0xFFFF sentinel
constexpr std::size_t num_keys = (sizeof(Key) == 1) ? 100 : 400;

using probe = std::conditional_t<Probe == cuco::test::probe_sequence::linear_probing,
cuco::linear_probing<CGSize, cuco::default_hash_function<Key>>,
Expand Down
22 changes: 18 additions & 4 deletions tests/static_set/retrieve_all_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@

#include <catch2/catch_template_test_macros.hpp>

#include <cstdint>

template <typename Set>
void test_unique_sequence(Set& set, std::size_t num_keys)
{
Expand Down Expand Up @@ -61,6 +63,12 @@ TEMPLATE_TEST_CASE_SIG(
"static_set::retrieve_all tests",
"",
((typename Key, cuco::test::probe_sequence Probe, int CGSize), Key, Probe, CGSize),
(uint8_t, cuco::test::probe_sequence::double_hashing, 1),
(uint8_t, cuco::test::probe_sequence::linear_probing, 1),
(uint16_t, cuco::test::probe_sequence::double_hashing, 1),
(uint16_t, cuco::test::probe_sequence::double_hashing, 2),
(uint16_t, cuco::test::probe_sequence::linear_probing, 1),
(uint16_t, cuco::test::probe_sequence::linear_probing, 2),
(int32_t, cuco::test::probe_sequence::double_hashing, 1),
(int32_t, cuco::test::probe_sequence::double_hashing, 2),
(int64_t, cuco::test::probe_sequence::double_hashing, 1),
Expand All @@ -78,7 +86,8 @@ TEMPLATE_TEST_CASE_SIG(
#endif
)
{
constexpr std::size_t num_keys{400};
// Limit key count for small types: leave room for the 0xFF/0xFFFF sentinel
constexpr std::size_t num_keys = (sizeof(Key) == 1) ? 100 : 400;
constexpr double desired_load_factor = 1.;

using probe = std::conditional_t<Probe == cuco::test::probe_sequence::linear_probing,
Expand All @@ -87,10 +96,15 @@ TEMPLATE_TEST_CASE_SIG(

constexpr std::size_t gold_capacity = [&]() {
if constexpr (cuco::is_double_hashing<probe>::value) {
return (CGSize == 1) ? 401 // 401 x 1 x 1
: 422; // 211 x 2 x 1
if constexpr (num_keys == 100) {
return (CGSize == 1) ? 101 // 101 x 1 x 1
: 106; // 53 x 2 x 1
} else {
return (CGSize == 1) ? 401 // 401 x 1 x 1
: 422; // 211 x 2 x 1
}
} else {
return 400;
return num_keys;
}
}();

Expand Down
7 changes: 6 additions & 1 deletion tests/static_set/retrieve_if_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@

#include <catch2/catch_template_test_macros.hpp>

#include <cstdint>

using size_type = std::size_t;

template <class Container>
Expand Down Expand Up @@ -110,6 +112,8 @@ __global__ void test_retrieve_if_all_true_kernel(
TEMPLATE_TEST_CASE_SIG("static_set retrieve_if",
"",
((typename Key), Key),
(uint8_t),
(uint16_t),
(int32_t),
(int64_t)
#if defined(CUCO_HAS_128BIT_ATOMICS)
Expand All @@ -118,7 +122,8 @@ TEMPLATE_TEST_CASE_SIG("static_set retrieve_if",
#endif
)
{
constexpr size_type num_keys{400};
// Limit key count for small types: keys start at 1, sentinel is 0xFF/0xFFFF
constexpr size_type num_keys = (sizeof(Key) == 1) ? 100 : 400;

using container_type = cuco::static_set<Key>;

Expand Down
11 changes: 10 additions & 1 deletion tests/static_set/retrieve_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@

#include <catch2/catch_template_test_macros.hpp>

#include <cstdint>

static constexpr int key_sentinel = -1;

template <typename Set>
Expand Down Expand Up @@ -75,6 +77,12 @@ TEMPLATE_TEST_CASE_SIG(
"static_set retrieve tests",
"",
((typename Key, cuco::test::probe_sequence Probe, int CGSize), Key, Probe, CGSize),
(uint8_t, cuco::test::probe_sequence::double_hashing, 1),
(uint8_t, cuco::test::probe_sequence::linear_probing, 1),
(uint16_t, cuco::test::probe_sequence::double_hashing, 1),
(uint16_t, cuco::test::probe_sequence::double_hashing, 2),
(uint16_t, cuco::test::probe_sequence::linear_probing, 1),
(uint16_t, cuco::test::probe_sequence::linear_probing, 2),
(int32_t, cuco::test::probe_sequence::double_hashing, 1),
(int32_t, cuco::test::probe_sequence::double_hashing, 2),
(int64_t, cuco::test::probe_sequence::double_hashing, 1),
Expand All @@ -92,7 +100,8 @@ TEMPLATE_TEST_CASE_SIG(
#endif
)
{
constexpr std::size_t num_keys{400};
// Limit key count for small types: leave room for the 0xFF/0xFFFF sentinel
constexpr std::size_t num_keys = (sizeof(Key) == 1) ? 100 : 400;
constexpr double desired_load_factor = 1.;

using probe = std::conditional_t<Probe == cuco::test::probe_sequence::linear_probing,
Expand Down
16 changes: 13 additions & 3 deletions tests/static_set/shared_memory_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

#include <catch2/catch_template_test_macros.hpp>

#include <cstdint>
#include <limits>

template <std::size_t ValidSize, typename Ref>
Expand All @@ -47,6 +48,8 @@ __global__ void shared_memory_test_kernel(Ref* sets,
auto insert_ref = sets[set_id].make_copy(g, sm_buffer, cuco::thread_scope_block);
auto find_ref = insert_ref.rebind_operators(cuco::op::find);

__syncthreads();

for (int i = g.thread_rank(); i < number_of_elements; i += g.size()) {
auto found_it = find_ref.find(insterted_keys[offset + i]);

Expand All @@ -67,6 +70,8 @@ __global__ void shared_memory_test_kernel(Ref* sets,
TEMPLATE_TEST_CASE_SIG("static_set shared memory tests",
"",
((typename Key), Key),
(uint8_t),
(uint16_t),
(int32_t),
(int64_t)
#if defined(CUCO_HAS_128BIT_ATOMICS)
Expand All @@ -75,8 +80,12 @@ TEMPLATE_TEST_CASE_SIG("static_set shared memory tests",
#endif
)
{
constexpr std::size_t number_of_sets = 1000;
constexpr std::size_t elements_in_set = 500;
// For uint8_t: sentinel = 0xFF (255), so usable key range is 0..254.
// For uint16_t: sentinel = 0xFFFF (65535), so usable key range is 0..65534.
// thrust::sequence over number_of_sets*elements_in_set keys must not wrap.
// Use smaller set count and element count for smaller types.
constexpr std::size_t number_of_sets = (sizeof(Key) <= 2) ? (sizeof(Key) == 1 ? 2 : 100) : 1000;
constexpr std::size_t elements_in_set = (sizeof(Key) <= 2) ? (sizeof(Key) == 1 ? 100 : 600) : 500;
constexpr std::size_t set_capacity = 2 * elements_in_set;

using extent_type = cuco::extent<std::size_t, set_capacity>;
Expand All @@ -96,7 +105,8 @@ TEMPLATE_TEST_CASE_SIG("static_set shared memory tests",
// operator yet
std::vector<std::unique_ptr<set_type>> sets;
for (std::size_t set_id = 0; set_id < number_of_sets; ++set_id) {
sets.push_back(std::make_unique<set_type>(extent_type{}, cuco::empty_key<Key>{-1}));
sets.push_back(
std::make_unique<set_type>(extent_type{}, cuco::empty_key<Key>{static_cast<Key>(-1)}));
}

thrust::device_vector<bool> d_keys_exist(number_of_sets * elements_in_set);
Expand Down
19 changes: 13 additions & 6 deletions tests/static_set/stream_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,13 @@

#include <catch2/catch_template_test_macros.hpp>

#include <cstdint>

TEMPLATE_TEST_CASE_SIG("static_set: operations on different stream than constructor",
"",
((typename Key), Key),
(uint8_t),
(uint16_t),
(int32_t),
(int64_t)
#if defined(CUCO_HAS_128BIT_ATOMICS)
Expand All @@ -47,14 +51,17 @@ TEMPLATE_TEST_CASE_SIG("static_set: operations on different stream than construc
CUCO_CUDA_TRY(cudaStreamCreate(&operation_stream));

{ // Scope ensures set is destroyed before streams
constexpr std::size_t num_keys{500'000};
auto set = cuco::static_set{num_keys * 2,
// Scale num_keys to fit in the key type's value range (sentinel = 0xFF/0xFFFF).
constexpr std::size_t num_keys = (sizeof(Key) == 1) ? 100
: (sizeof(Key) == 2) ? 1'000
: 500'000;
auto set = cuco::static_set{num_keys * 2,
cuco::empty_key<Key>{static_cast<Key>(-1)},
{},
{},
cuco::linear_probing<1, cuco::default_hash_function<Key>>{},
{},
{},
{},
{},
{},
{},
constructor_stream};

thrust::device_vector<Key> d_keys(num_keys);
Expand Down
22 changes: 18 additions & 4 deletions tests/static_set/unique_sequence_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@

#include <catch2/catch_template_test_macros.hpp>

#include <cstdint>

using size_type = int32_t;

int32_t constexpr SENTINEL = -1;
Expand Down Expand Up @@ -141,6 +143,12 @@ TEMPLATE_TEST_CASE_SIG(
"static_set unique sequence tests",
"",
((typename Key, cuco::test::probe_sequence Probe, int CGSize), Key, Probe, CGSize),
(uint8_t, cuco::test::probe_sequence::double_hashing, 1),
(uint8_t, cuco::test::probe_sequence::linear_probing, 1),
(uint16_t, cuco::test::probe_sequence::double_hashing, 1),
(uint16_t, cuco::test::probe_sequence::double_hashing, 2),
(uint16_t, cuco::test::probe_sequence::linear_probing, 1),
(uint16_t, cuco::test::probe_sequence::linear_probing, 2),
(int32_t, cuco::test::probe_sequence::double_hashing, 1),
(int32_t, cuco::test::probe_sequence::double_hashing, 2),
(int64_t, cuco::test::probe_sequence::double_hashing, 1),
Expand All @@ -158,17 +166,23 @@ TEMPLATE_TEST_CASE_SIG(
#endif
)
{
constexpr size_type num_keys{400};
// Limit key count for small types: leave room for the 0xFF/0xFFFF sentinel
constexpr size_type num_keys = (sizeof(Key) == 1) ? 100 : 400;
using probe = std::conditional_t<Probe == cuco::test::probe_sequence::linear_probing,
cuco::linear_probing<CGSize, cuco::default_hash_function<Key>>,
cuco::double_hashing<CGSize, cuco::default_hash_function<Key>>>;

constexpr size_type gold_capacity = [&]() {
if constexpr (cuco::is_double_hashing<probe>::value) {
return (CGSize == 1) ? 422 // 211 x 1 x 2
: 404; // 101 x 2 x 2
if constexpr (num_keys == 100) {
return (CGSize == 1) ? 106 // 53 x 1 x 2
: 106; // 53 x 2 x 1... only CGSize=1 used for uint8_t
} else {
return (CGSize == 1) ? 422 // 211 x 1 x 2
: 404; // 101 x 2 x 2
}
} else {
return 400;
return num_keys;
}
}();

Expand Down
Loading