Skip to content

Commit 8576506

Browse files
authored
Replace precomputed prime table with on-the-fly Miller-Rabin computation (#802)
This PR replaces the precomputed prime table lookup with an on-the-fly calculation. While the new approach is slightly slower, it adds at most ~2 µs of overhead, making the performance impact negligible, and eliminates the need to store a large precomputed prime table.
1 parent 6477be2 commit 8576506

8 files changed

Lines changed: 228 additions & 20158 deletions

File tree

include/cuco/detail/extent/extent.inl

Lines changed: 7 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2023-2025, NVIDIA CORPORATION.
2+
* Copyright (c) 2023-2026, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -17,18 +17,15 @@
1717
#pragma once
1818

1919
#include <cuco/detail/error.hpp>
20-
#include <cuco/detail/prime.hpp> // TODO move to detail/extent/
20+
#include <cuco/detail/prime.hpp>
2121
#include <cuco/detail/utility/math.cuh>
22-
#include <cuco/detail/utils.hpp>
2322
#include <cuco/probing_scheme.cuh>
2423
#include <cuco/storage.cuh>
2524
#include <cuco/utility/fast_int.cuh>
2625

2726
#include <cuda/std/type_traits>
2827

29-
#include <algorithm>
3028
#include <cstdint>
31-
#include <limits>
3229

3330
namespace cuco {
3431
template <typename SizeType, std::size_t N>
@@ -116,28 +113,17 @@ struct valid_extent<SizeType, dynamic_extent> : cuco::utility::fast_int<SizeType
116113
template <int32_t CGSize, int32_t BucketSize, typename SizeType, std::size_t N>
117114
[[nodiscard]] auto constexpr make_valid_extent(extent<SizeType, N> ext)
118115
{
119-
auto constexpr stride = CGSize * BucketSize;
120-
auto constexpr max_prime = cuco::detail::primes.back();
121-
auto constexpr max_value =
122-
(static_cast<uint64_t>(cuda::std::numeric_limits<SizeType>::max()) < max_prime)
123-
? cuda::std::numeric_limits<SizeType>::max()
124-
: static_cast<SizeType>(max_prime);
125-
auto const size = cuco::detail::int_div_ceil(
116+
auto constexpr stride = CGSize * BucketSize;
117+
auto const size = cuco::detail::int_div_ceil(
126118
cuda::std::max(static_cast<SizeType>(ext), static_cast<SizeType>(1)), stride);
127-
if (size > max_value) { CUCO_FAIL("Invalid input extent"); }
128119

129120
if constexpr (N == dynamic_extent) {
130-
return valid_extent<SizeType, dynamic_extent>{static_cast<SizeType>(
131-
*cuco::detail::lower_bound(
132-
cuco::detail::primes.begin(), cuco::detail::primes.end(), static_cast<uint64_t>(size)) *
133-
stride)};
121+
return valid_extent<SizeType, dynamic_extent>{
122+
static_cast<SizeType>(cuco::detail::next_prime(static_cast<std::uint64_t>(size)) * stride)};
134123
} else {
135124
return valid_extent<SizeType,
136125
static_cast<std::size_t>(
137-
*cuco::detail::lower_bound(cuco::detail::primes.begin(),
138-
cuco::detail::primes.end(),
139-
static_cast<uint64_t>(size)) *
140-
stride)>{};
126+
cuco::detail::next_prime(static_cast<std::uint64_t>(size)) * stride)>{};
141127
}
142128
}
143129

include/cuco/detail/open_addressing/open_addressing_ref_impl.cuh

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include <cuco/detail/equal_wrapper.cuh>
2020
#include <cuco/detail/probing_scheme/probing_scheme_base.cuh>
2121
#include <cuco/detail/utility/cuda.cuh>
22+
#include <cuco/detail/utils.hpp>
2223
#include <cuco/extent.cuh>
2324
#include <cuco/pair.cuh>
2425
#include <cuco/probing_scheme.cuh>
@@ -1082,8 +1083,8 @@ class open_addressing_ref_impl {
10821083
OutputMatchIt output_match,
10831084
AtomicCounter& atomic_counter) const
10841085
{
1085-
auto constexpr is_outer = false;
1086-
auto const n = cuco::detail::distance(input_probe_begin, input_probe_end); // TODO include
1086+
auto constexpr is_outer = false;
1087+
auto const n = cuco::detail::distance(input_probe_begin, input_probe_end);
10871088
auto const always_true_stencil = cuda::constant_iterator<bool>(true);
10881089
auto const identity_predicate = cuda::std::identity{};
10891090
this->retrieve_impl<is_outer, BlockSize>(block,
@@ -1139,8 +1140,8 @@ class open_addressing_ref_impl {
11391140
OutputMatchIt output_match,
11401141
AtomicCounter& atomic_counter) const
11411142
{
1142-
auto constexpr is_outer = true;
1143-
auto const n = cuco::detail::distance(input_probe_begin, input_probe_end); // TODO include
1143+
auto constexpr is_outer = true;
1144+
auto const n = cuco::detail::distance(input_probe_begin, input_probe_end);
11441145
auto const always_true_stencil = cuda::constant_iterator<bool>(true);
11451146
auto const identity_predicate = cuda::std::identity{};
11461147
this->retrieve_impl<is_outer, BlockSize>(block,

include/cuco/detail/prime.hpp

Lines changed: 101 additions & 20129 deletions
Large diffs are not rendered by default.

tests/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ endfunction(ConfigureTest)
5050
# - utility tests ---------------------------------------------------------------------------------
5151
ConfigureTest(UTILITY_TEST
5252
utility/extent_test.cu
53+
utility/next_prime_test.cu
5354
utility/storage_test.cu
5455
utility/fast_int_test.cu
5556
utility/hash_test.cu

tests/static_multiset/insert_test.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ TEMPLATE_TEST_CASE_SIG(
9696
constexpr size_type gold_capacity = [&]() {
9797
if constexpr (cuco::is_double_hashing<probe>::value) {
9898
return (CGSize == 1) ? 422 // 211 x 1 x 2
99-
: 412; // 103 x 2 x 2
99+
: 404; // 101 x 2 x 2
100100
} else {
101101
return 400;
102102
}

tests/static_set/retrieve_all_test.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,8 @@ TEMPLATE_TEST_CASE_SIG(
7878

7979
constexpr std::size_t gold_capacity = [&]() {
8080
if constexpr (cuco::is_double_hashing<probe>::value) {
81-
return (CGSize == 1) ? 409 // 409 x 1 x 2
82-
: 422; // 211 x 2 x 2
81+
return (CGSize == 1) ? 401 // 401 x 1 x 1
82+
: 422; // 211 x 2 x 1
8383
} else {
8484
return 400;
8585
}

tests/static_set/unique_sequence_test.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ TEMPLATE_TEST_CASE_SIG(
157157
constexpr size_type gold_capacity = [&]() {
158158
if constexpr (cuco::is_double_hashing<probe>::value) {
159159
return (CGSize == 1) ? 422 // 211 x 1 x 2
160-
: 412; // 103 x 2 x 2
160+
: 404; // 101 x 2 x 2
161161
} else {
162162
return 400;
163163
}

tests/utility/next_prime_test.cu

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
/*
2+
* Copyright (c) 2026, NVIDIA CORPORATION.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#include <cuco/detail/prime.hpp>
18+
19+
#include <catch2/catch_test_macros.hpp>
20+
21+
#include <cstdint>
22+
23+
TEST_CASE("detail::is_prime", "")
24+
{
25+
using cuco::detail::is_prime;
26+
27+
SECTION("Values below 2 are not prime")
28+
{
29+
STATIC_REQUIRE(not is_prime(0));
30+
STATIC_REQUIRE(not is_prime(1));
31+
}
32+
33+
SECTION("Small primes and composites")
34+
{
35+
STATIC_REQUIRE(is_prime(2));
36+
STATIC_REQUIRE(is_prime(3));
37+
STATIC_REQUIRE(not is_prime(4));
38+
STATIC_REQUIRE(is_prime(5));
39+
STATIC_REQUIRE(not is_prime(9));
40+
STATIC_REQUIRE(is_prime(11));
41+
STATIC_REQUIRE(is_prime(97));
42+
STATIC_REQUIRE(not is_prime(100));
43+
}
44+
45+
SECTION("Carmichael numbers are correctly rejected")
46+
{
47+
// Strong pseudoprime candidates that fool weak primality tests
48+
REQUIRE(not is_prime(561)); // 3 * 11 * 17
49+
REQUIRE(not is_prime(1105)); // 5 * 13 * 17
50+
REQUIRE(not is_prime(1729)); // 7 * 13 * 19
51+
REQUIRE(not is_prime(2465)); // 5 * 17 * 29
52+
REQUIRE(not is_prime(41041)); // 7 * 11 * 13 * 41
53+
REQUIRE(not is_prime(825265)); // 5 * 7 * 17 * 19 * 73
54+
}
55+
56+
SECTION("Large primes")
57+
{
58+
// Mersenne prime 2^31 - 1
59+
REQUIRE(is_prime(2147483647ull));
60+
// Near uint32 max
61+
REQUIRE(is_prime(4294967291ull));
62+
// Large 64-bit prime
63+
REQUIRE(is_prime(18446744073709551557ull));
64+
// Adjacent composite
65+
REQUIRE(not is_prime(18446744073709551556ull));
66+
}
67+
}
68+
69+
TEST_CASE("detail::next_prime", "")
70+
{
71+
using cuco::detail::next_prime;
72+
73+
SECTION("Values at or below 2 map to 2")
74+
{
75+
STATIC_REQUIRE(next_prime(0) == 2ull);
76+
STATIC_REQUIRE(next_prime(1) == 2ull);
77+
STATIC_REQUIRE(next_prime(2) == 2ull);
78+
}
79+
80+
SECTION("Already-prime inputs are returned unchanged")
81+
{
82+
STATIC_REQUIRE(next_prime(3) == 3ull);
83+
STATIC_REQUIRE(next_prime(13) == 13ull);
84+
STATIC_REQUIRE(next_prime(101) == 101ull);
85+
}
86+
87+
SECTION("Composite inputs advance to the next prime")
88+
{
89+
STATIC_REQUIRE(next_prime(4) == 5ull);
90+
STATIC_REQUIRE(next_prime(14) == 17ull);
91+
STATIC_REQUIRE(next_prime(100) == 101ull);
92+
STATIC_REQUIRE(next_prime(155) == 157ull); // used by extent_test
93+
}
94+
95+
SECTION("Large composite inputs")
96+
{
97+
REQUIRE(next_prime(1ull << 20) == 1048583ull);
98+
REQUIRE(next_prime(1ull << 32) == 4294967311ull);
99+
}
100+
101+
SECTION("Result is always >= input and prime")
102+
{
103+
using cuco::detail::is_prime;
104+
for (std::uint64_t n : {0ull, 1ull, 42ull, 1000ull, 999983ull, 1ull << 40}) {
105+
auto const p = next_prime(n);
106+
REQUIRE(p >= n);
107+
REQUIRE(is_prime(p));
108+
}
109+
}
110+
}

0 commit comments

Comments
 (0)