diff --git a/include/cuco/detail/pair/helpers.cuh b/include/cuco/detail/pair/helpers.cuh index c21cbdb86..b8f4d8f70 100644 --- a/include/cuco/detail/pair/helpers.cuh +++ b/include/cuco/detail/pair/helpers.cuh @@ -47,6 +47,22 @@ struct packed { using type = void; ///< `void` type by default }; +/** + * @brief Denotes the packed type when the size of the object is 1. + */ +template <> +struct packed { + using type = uint8_t; ///< Packed type as `uint8_t` if the size of the object is 1 +}; + +/** + * @brief Denotes the packed type when the size of the object is 2. + */ +template <> +struct packed { + using type = uint16_t; ///< Packed type as `uint16_t` if the size of the object is 2 +}; + /** * @brief Denotes the packed type when the size of the object is 8. */ diff --git a/tests/static_set/atomic_storage_test.cu b/tests/static_set/atomic_storage_test.cu index b611214bb..746608428 100644 --- a/tests/static_set/atomic_storage_test.cu +++ b/tests/static_set/atomic_storage_test.cu @@ -69,6 +69,7 @@ TEST_CASE("atomic_storage_test", "") cuda::proclaim_return_type(build_fn{})); set.insert_async(keys_begin, keys_begin + num_keys); + CUCO_CUDA_TRY(cudaDeviceSynchronize()); auto const count = set.size(); REQUIRE(count == num_keys); diff --git a/tests/static_set/for_each_test.cu b/tests/static_set/for_each_test.cu index 5374f8fac..814656f27 100644 --- a/tests/static_set/for_each_test.cu +++ b/tests/static_set/for_each_test.cu @@ -26,10 +26,12 @@ #include +#include + using size_type = std::size_t; template -void test_for_each(Set& set, size_type num_keys) +void test_for_each(Set& set, size_type num_keys, size_type expected_evens, size_type expected_odds) { using Key = typename Set::key_type; @@ -40,7 +42,7 @@ void test_for_each(Set& set, size_type num_keys) // Insert keys auto keys_begin = cuda::make_transform_iterator( cuda::counting_iterator{0}, cuda::proclaim_return_type([] __device__(auto i) { - // generates a sequence of 1, 2, 1, 2, ... + // generates a sequence of 0, 1, 2, ... return static_cast(i); })); set.insert(keys_begin, keys_begin + num_keys, stream); @@ -56,7 +58,7 @@ void test_for_each(Set& set, size_type num_keys) if (slot % 2 == 0) { counter->fetch_add(slot, cuda::memory_order_relaxed); } }, stream); - REQUIRE(counter_storage.load_to_host(stream) == 249'500); + REQUIRE(counter_storage.load_to_host(stream) == expected_evens); counter_storage.reset(stream); @@ -68,13 +70,19 @@ void test_for_each(Set& set, size_type num_keys) if (!(slot % 2 == 0)) { counter->fetch_add(slot, cuda::memory_order_relaxed); } }, stream); - REQUIRE(counter_storage.load_to_host(stream) == 250'000); + REQUIRE(counter_storage.load_to_host(stream) == expected_odds); } TEMPLATE_TEST_CASE_SIG( "static_set for_each tests", "", ((typename Key, cuco::test::probe_sequence Probe, int CGSize), Key, Probe, CGSize), + (uint8_t, cuco::test::probe_sequence::double_hashing, 1), + (uint8_t, cuco::test::probe_sequence::linear_probing, 1), + (uint16_t, cuco::test::probe_sequence::double_hashing, 1), + (uint16_t, cuco::test::probe_sequence::double_hashing, 2), + (uint16_t, cuco::test::probe_sequence::linear_probing, 1), + (uint16_t, cuco::test::probe_sequence::linear_probing, 2), (int32_t, cuco::test::probe_sequence::double_hashing, 1), (int32_t, cuco::test::probe_sequence::double_hashing, 2), (int64_t, cuco::test::probe_sequence::double_hashing, 1), @@ -92,7 +100,14 @@ TEMPLATE_TEST_CASE_SIG( #endif ) { - constexpr size_type num_keys{1'000}; + // Limit key count for small types: leave room for the 0xFF/0xFFFF sentinel. + // Expected sums are pre-computed per type class: + // uint8_t (num_keys=100): sum of evens 0..98 = 2450, sum of odds 1..99 = 2500 + // uint16_t+ (num_keys=1000): sum of evens 0..998 = 249'500, sum of odds 1..999 = 250'000 + constexpr size_type num_keys = (sizeof(Key) == 1) ? 100 : 1'000; + constexpr size_type expected_evens = (sizeof(Key) == 1) ? 2'450 : 249'500; + constexpr size_type expected_odds = (sizeof(Key) == 1) ? 2'500 : 250'000; + using probe = std::conditional_t< Probe == cuco::test::probe_sequence::linear_probing, cuco::linear_probing>, @@ -107,5 +122,5 @@ TEMPLATE_TEST_CASE_SIG( cuco::storage<2>>; auto set = set_t{num_keys, cuco::empty_key{static_cast(-1)}}; - test_for_each(set, num_keys); + test_for_each(set, num_keys, expected_evens, expected_odds); } diff --git a/tests/static_set/insert_and_find_test.cu b/tests/static_set/insert_and_find_test.cu index 7eefdfb6c..7ce771365 100644 --- a/tests/static_set/insert_and_find_test.cu +++ b/tests/static_set/insert_and_find_test.cu @@ -25,6 +25,8 @@ #include +#include + template void test_insert_and_find(Set& set, std::size_t num_keys) { @@ -56,6 +58,12 @@ TEMPLATE_TEST_CASE_SIG( "static_set Insert and find", "", ((typename Key, cuco::test::probe_sequence Probe, int CGSize), Key, Probe, CGSize), + (uint8_t, cuco::test::probe_sequence::double_hashing, 1), + (uint8_t, cuco::test::probe_sequence::linear_probing, 1), + (uint16_t, cuco::test::probe_sequence::double_hashing, 1), + (uint16_t, cuco::test::probe_sequence::double_hashing, 2), + (uint16_t, cuco::test::probe_sequence::linear_probing, 1), + (uint16_t, cuco::test::probe_sequence::linear_probing, 2), (int32_t, cuco::test::probe_sequence::double_hashing, 1), (int32_t, cuco::test::probe_sequence::double_hashing, 2), (int64_t, cuco::test::probe_sequence::double_hashing, 1), @@ -73,7 +81,8 @@ TEMPLATE_TEST_CASE_SIG( #endif ) { - constexpr std::size_t num_keys{400}; + // Limit key count for small types: leave room for the 0xFF/0xFFFF sentinel + constexpr std::size_t num_keys = (sizeof(Key) == 1) ? 100 : 400; using probe = std::conditional_t>, diff --git a/tests/static_set/retrieve_all_test.cu b/tests/static_set/retrieve_all_test.cu index ed2e70abb..f3caa51e1 100644 --- a/tests/static_set/retrieve_all_test.cu +++ b/tests/static_set/retrieve_all_test.cu @@ -27,6 +27,8 @@ #include +#include + template void test_unique_sequence(Set& set, std::size_t num_keys) { @@ -61,6 +63,12 @@ TEMPLATE_TEST_CASE_SIG( "static_set::retrieve_all tests", "", ((typename Key, cuco::test::probe_sequence Probe, int CGSize), Key, Probe, CGSize), + (uint8_t, cuco::test::probe_sequence::double_hashing, 1), + (uint8_t, cuco::test::probe_sequence::linear_probing, 1), + (uint16_t, cuco::test::probe_sequence::double_hashing, 1), + (uint16_t, cuco::test::probe_sequence::double_hashing, 2), + (uint16_t, cuco::test::probe_sequence::linear_probing, 1), + (uint16_t, cuco::test::probe_sequence::linear_probing, 2), (int32_t, cuco::test::probe_sequence::double_hashing, 1), (int32_t, cuco::test::probe_sequence::double_hashing, 2), (int64_t, cuco::test::probe_sequence::double_hashing, 1), @@ -78,7 +86,8 @@ TEMPLATE_TEST_CASE_SIG( #endif ) { - constexpr std::size_t num_keys{400}; + // Limit key count for small types: leave room for the 0xFF/0xFFFF sentinel + constexpr std::size_t num_keys = (sizeof(Key) == 1) ? 100 : 400; constexpr double desired_load_factor = 1.; using probe = std::conditional_t::value) { - return (CGSize == 1) ? 401 // 401 x 1 x 1 - : 422; // 211 x 2 x 1 + if constexpr (num_keys == 100) { + return (CGSize == 1) ? 101 // 101 x 1 x 1 + : 106; // 53 x 2 x 1 + } else { + return (CGSize == 1) ? 401 // 401 x 1 x 1 + : 422; // 211 x 2 x 1 + } } else { - return 400; + return num_keys; } }(); diff --git a/tests/static_set/retrieve_if_test.cu b/tests/static_set/retrieve_if_test.cu index b82969926..ec0a9f800 100644 --- a/tests/static_set/retrieve_if_test.cu +++ b/tests/static_set/retrieve_if_test.cu @@ -27,6 +27,8 @@ #include +#include + using size_type = std::size_t; template @@ -110,6 +112,8 @@ __global__ void test_retrieve_if_all_true_kernel( TEMPLATE_TEST_CASE_SIG("static_set retrieve_if", "", ((typename Key), Key), + (uint8_t), + (uint16_t), (int32_t), (int64_t) #if defined(CUCO_HAS_128BIT_ATOMICS) @@ -118,7 +122,8 @@ TEMPLATE_TEST_CASE_SIG("static_set retrieve_if", #endif ) { - constexpr size_type num_keys{400}; + // Limit key count for small types: keys start at 1, sentinel is 0xFF/0xFFFF + constexpr size_type num_keys = (sizeof(Key) == 1) ? 100 : 400; using container_type = cuco::static_set; diff --git a/tests/static_set/retrieve_test.cu b/tests/static_set/retrieve_test.cu index f5b7763dc..5032b2c32 100644 --- a/tests/static_set/retrieve_test.cu +++ b/tests/static_set/retrieve_test.cu @@ -28,6 +28,8 @@ #include +#include + static constexpr int key_sentinel = -1; template @@ -75,6 +77,12 @@ TEMPLATE_TEST_CASE_SIG( "static_set retrieve tests", "", ((typename Key, cuco::test::probe_sequence Probe, int CGSize), Key, Probe, CGSize), + (uint8_t, cuco::test::probe_sequence::double_hashing, 1), + (uint8_t, cuco::test::probe_sequence::linear_probing, 1), + (uint16_t, cuco::test::probe_sequence::double_hashing, 1), + (uint16_t, cuco::test::probe_sequence::double_hashing, 2), + (uint16_t, cuco::test::probe_sequence::linear_probing, 1), + (uint16_t, cuco::test::probe_sequence::linear_probing, 2), (int32_t, cuco::test::probe_sequence::double_hashing, 1), (int32_t, cuco::test::probe_sequence::double_hashing, 2), (int64_t, cuco::test::probe_sequence::double_hashing, 1), @@ -92,7 +100,8 @@ TEMPLATE_TEST_CASE_SIG( #endif ) { - constexpr std::size_t num_keys{400}; + // Limit key count for small types: leave room for the 0xFF/0xFFFF sentinel + constexpr std::size_t num_keys = (sizeof(Key) == 1) ? 100 : 400; constexpr double desired_load_factor = 1.; using probe = std::conditional_t +#include #include template @@ -47,6 +48,8 @@ __global__ void shared_memory_test_kernel(Ref* sets, auto insert_ref = sets[set_id].make_copy(g, sm_buffer, cuco::thread_scope_block); auto find_ref = insert_ref.rebind_operators(cuco::op::find); + __syncthreads(); + for (int i = g.thread_rank(); i < number_of_elements; i += g.size()) { auto found_it = find_ref.find(insterted_keys[offset + i]); @@ -67,6 +70,8 @@ __global__ void shared_memory_test_kernel(Ref* sets, TEMPLATE_TEST_CASE_SIG("static_set shared memory tests", "", ((typename Key), Key), + (uint8_t), + (uint16_t), (int32_t), (int64_t) #if defined(CUCO_HAS_128BIT_ATOMICS) @@ -75,8 +80,12 @@ TEMPLATE_TEST_CASE_SIG("static_set shared memory tests", #endif ) { - constexpr std::size_t number_of_sets = 1000; - constexpr std::size_t elements_in_set = 500; + // For uint8_t: sentinel = 0xFF (255), so usable key range is 0..254. + // For uint16_t: sentinel = 0xFFFF (65535), so usable key range is 0..65534. + // thrust::sequence over number_of_sets*elements_in_set keys must not wrap. + // Use smaller set count and element count for smaller types. + constexpr std::size_t number_of_sets = (sizeof(Key) <= 2) ? (sizeof(Key) == 1 ? 2 : 100) : 1000; + constexpr std::size_t elements_in_set = (sizeof(Key) <= 2) ? (sizeof(Key) == 1 ? 100 : 600) : 500; constexpr std::size_t set_capacity = 2 * elements_in_set; using extent_type = cuco::extent; @@ -96,7 +105,8 @@ TEMPLATE_TEST_CASE_SIG("static_set shared memory tests", // operator yet std::vector> sets; for (std::size_t set_id = 0; set_id < number_of_sets; ++set_id) { - sets.push_back(std::make_unique(extent_type{}, cuco::empty_key{-1})); + sets.push_back( + std::make_unique(extent_type{}, cuco::empty_key{static_cast(-1)})); } thrust::device_vector d_keys_exist(number_of_sets * elements_in_set); diff --git a/tests/static_set/stream_test.cu b/tests/static_set/stream_test.cu index f9277ed8f..add087f90 100644 --- a/tests/static_set/stream_test.cu +++ b/tests/static_set/stream_test.cu @@ -30,9 +30,13 @@ #include +#include + TEMPLATE_TEST_CASE_SIG("static_set: operations on different stream than constructor", "", ((typename Key), Key), + (uint8_t), + (uint16_t), (int32_t), (int64_t) #if defined(CUCO_HAS_128BIT_ATOMICS) @@ -47,14 +51,17 @@ TEMPLATE_TEST_CASE_SIG("static_set: operations on different stream than construc CUCO_CUDA_TRY(cudaStreamCreate(&operation_stream)); { // Scope ensures set is destroyed before streams - constexpr std::size_t num_keys{500'000}; - auto set = cuco::static_set{num_keys * 2, + // Scale num_keys to fit in the key type's value range (sentinel = 0xFF/0xFFFF). + constexpr std::size_t num_keys = (sizeof(Key) == 1) ? 100 + : (sizeof(Key) == 2) ? 1'000 + : 500'000; + auto set = cuco::static_set{num_keys * 2, cuco::empty_key{static_cast(-1)}, - {}, + {}, cuco::linear_probing<1, cuco::default_hash_function>{}, - {}, - {}, - {}, + {}, + {}, + {}, constructor_stream}; thrust::device_vector d_keys(num_keys); diff --git a/tests/static_set/unique_sequence_test.cu b/tests/static_set/unique_sequence_test.cu index 2db91c2b6..9dc3c3513 100644 --- a/tests/static_set/unique_sequence_test.cu +++ b/tests/static_set/unique_sequence_test.cu @@ -29,6 +29,8 @@ #include +#include + using size_type = int32_t; int32_t constexpr SENTINEL = -1; @@ -141,6 +143,12 @@ TEMPLATE_TEST_CASE_SIG( "static_set unique sequence tests", "", ((typename Key, cuco::test::probe_sequence Probe, int CGSize), Key, Probe, CGSize), + (uint8_t, cuco::test::probe_sequence::double_hashing, 1), + (uint8_t, cuco::test::probe_sequence::linear_probing, 1), + (uint16_t, cuco::test::probe_sequence::double_hashing, 1), + (uint16_t, cuco::test::probe_sequence::double_hashing, 2), + (uint16_t, cuco::test::probe_sequence::linear_probing, 1), + (uint16_t, cuco::test::probe_sequence::linear_probing, 2), (int32_t, cuco::test::probe_sequence::double_hashing, 1), (int32_t, cuco::test::probe_sequence::double_hashing, 2), (int64_t, cuco::test::probe_sequence::double_hashing, 1), @@ -158,17 +166,23 @@ TEMPLATE_TEST_CASE_SIG( #endif ) { - constexpr size_type num_keys{400}; + // Limit key count for small types: leave room for the 0xFF/0xFFFF sentinel + constexpr size_type num_keys = (sizeof(Key) == 1) ? 100 : 400; using probe = std::conditional_t>, cuco::double_hashing>>; constexpr size_type gold_capacity = [&]() { if constexpr (cuco::is_double_hashing::value) { - return (CGSize == 1) ? 422 // 211 x 1 x 2 - : 404; // 101 x 2 x 2 + if constexpr (num_keys == 100) { + return (CGSize == 1) ? 106 // 53 x 1 x 2 + : 106; // 53 x 2 x 1... only CGSize=1 used for uint8_t + } else { + return (CGSize == 1) ? 422 // 211 x 1 x 2 + : 404; // 101 x 2 x 2 + } } else { - return 400; + return num_keys; } }();