Skip to content

Commit 2ba499b

Browse files
committed
Merge remote-tracking branch 'upstream' into worktree-128bit-atomics
2 parents 92a8d3d + 8576506 commit 2ba499b

8 files changed

Lines changed: 228 additions & 20158 deletions

File tree

include/cuco/detail/extent/extent.inl

Lines changed: 7 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2023-2025, NVIDIA CORPORATION.
2+
* Copyright (c) 2023-2026, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -17,18 +17,15 @@
1717
#pragma once
1818

1919
#include <cuco/detail/error.hpp>
20-
#include <cuco/detail/prime.hpp> // TODO move to detail/extent/
20+
#include <cuco/detail/prime.hpp>
2121
#include <cuco/detail/utility/math.cuh>
22-
#include <cuco/detail/utils.hpp>
2322
#include <cuco/probing_scheme.cuh>
2423
#include <cuco/storage.cuh>
2524
#include <cuco/utility/fast_int.cuh>
2625

2726
#include <cuda/std/type_traits>
2827

29-
#include <algorithm>
3028
#include <cstdint>
31-
#include <limits>
3229

3330
namespace cuco {
3431
template <typename SizeType, std::size_t N>
@@ -116,28 +113,17 @@ struct valid_extent<SizeType, dynamic_extent> : cuco::utility::fast_int<SizeType
116113
template <int32_t CGSize, int32_t BucketSize, typename SizeType, std::size_t N>
117114
[[nodiscard]] auto constexpr make_valid_extent(extent<SizeType, N> ext)
118115
{
119-
auto constexpr stride = CGSize * BucketSize;
120-
auto constexpr max_prime = cuco::detail::primes.back();
121-
auto constexpr max_value =
122-
(static_cast<uint64_t>(cuda::std::numeric_limits<SizeType>::max()) < max_prime)
123-
? cuda::std::numeric_limits<SizeType>::max()
124-
: static_cast<SizeType>(max_prime);
125-
auto const size = cuco::detail::int_div_ceil(
116+
auto constexpr stride = CGSize * BucketSize;
117+
auto const size = cuco::detail::int_div_ceil(
126118
cuda::std::max(static_cast<SizeType>(ext), static_cast<SizeType>(1)), stride);
127-
if (size > max_value) { CUCO_FAIL("Invalid input extent"); }
128119

129120
if constexpr (N == dynamic_extent) {
130-
return valid_extent<SizeType, dynamic_extent>{static_cast<SizeType>(
131-
*cuco::detail::lower_bound(
132-
cuco::detail::primes.begin(), cuco::detail::primes.end(), static_cast<uint64_t>(size)) *
133-
stride)};
121+
return valid_extent<SizeType, dynamic_extent>{
122+
static_cast<SizeType>(cuco::detail::next_prime(static_cast<std::uint64_t>(size)) * stride)};
134123
} else {
135124
return valid_extent<SizeType,
136125
static_cast<std::size_t>(
137-
*cuco::detail::lower_bound(cuco::detail::primes.begin(),
138-
cuco::detail::primes.end(),
139-
static_cast<uint64_t>(size)) *
140-
stride)>{};
126+
cuco::detail::next_prime(static_cast<std::uint64_t>(size)) * stride)>{};
141127
}
142128
}
143129

include/cuco/detail/open_addressing/open_addressing_ref_impl.cuh

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include <cuco/detail/equal_wrapper.cuh>
2020
#include <cuco/detail/probing_scheme/probing_scheme_base.cuh>
2121
#include <cuco/detail/utility/cuda.cuh>
22+
#include <cuco/detail/utils.hpp>
2223
#include <cuco/extent.cuh>
2324
#include <cuco/pair.cuh>
2425
#include <cuco/probing_scheme.cuh>
@@ -1062,8 +1063,8 @@ class open_addressing_ref_impl {
10621063
OutputMatchIt output_match,
10631064
AtomicCounter& atomic_counter) const
10641065
{
1065-
auto constexpr is_outer = false;
1066-
auto const n = cuco::detail::distance(input_probe_begin, input_probe_end); // TODO include
1066+
auto constexpr is_outer = false;
1067+
auto const n = cuco::detail::distance(input_probe_begin, input_probe_end);
10671068
auto const always_true_stencil = cuda::constant_iterator<bool>(true);
10681069
auto const identity_predicate = cuda::std::identity{};
10691070
this->retrieve_impl<is_outer, BlockSize>(block,
@@ -1119,8 +1120,8 @@ class open_addressing_ref_impl {
11191120
OutputMatchIt output_match,
11201121
AtomicCounter& atomic_counter) const
11211122
{
1122-
auto constexpr is_outer = true;
1123-
auto const n = cuco::detail::distance(input_probe_begin, input_probe_end); // TODO include
1123+
auto constexpr is_outer = true;
1124+
auto const n = cuco::detail::distance(input_probe_begin, input_probe_end);
11241125
auto const always_true_stencil = cuda::constant_iterator<bool>(true);
11251126
auto const identity_predicate = cuda::std::identity{};
11261127
this->retrieve_impl<is_outer, BlockSize>(block,

include/cuco/detail/prime.hpp

Lines changed: 101 additions & 20129 deletions
Large diffs are not rendered by default.

tests/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ endfunction(ConfigureTest)
5050
# - utility tests ---------------------------------------------------------------------------------
5151
ConfigureTest(UTILITY_TEST
5252
utility/extent_test.cu
53+
utility/next_prime_test.cu
5354
utility/storage_test.cu
5455
utility/fast_int_test.cu
5556
utility/hash_test.cu

tests/static_multiset/insert_test.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ TEMPLATE_TEST_CASE_SIG(
105105
constexpr size_type gold_capacity = [&]() {
106106
if constexpr (cuco::is_double_hashing<probe>::value) {
107107
return (CGSize == 1) ? 422 // 211 x 1 x 2
108-
: 412; // 103 x 2 x 2
108+
: 404; // 101 x 2 x 2
109109
} else {
110110
return 400;
111111
}

tests/static_set/retrieve_all_test.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,8 @@ TEMPLATE_TEST_CASE_SIG(
8787

8888
constexpr std::size_t gold_capacity = [&]() {
8989
if constexpr (cuco::is_double_hashing<probe>::value) {
90-
return (CGSize == 1) ? 409 // 409 x 1 x 2
91-
: 422; // 211 x 2 x 2
90+
return (CGSize == 1) ? 401 // 401 x 1 x 1
91+
: 422; // 211 x 2 x 1
9292
} else {
9393
return 400;
9494
}

tests/static_set/unique_sequence_test.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ TEMPLATE_TEST_CASE_SIG(
166166
constexpr size_type gold_capacity = [&]() {
167167
if constexpr (cuco::is_double_hashing<probe>::value) {
168168
return (CGSize == 1) ? 422 // 211 x 1 x 2
169-
: 412; // 103 x 2 x 2
169+
: 404; // 101 x 2 x 2
170170
} else {
171171
return 400;
172172
}

tests/utility/next_prime_test.cu

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
/*
2+
* Copyright (c) 2026, NVIDIA CORPORATION.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#include <cuco/detail/prime.hpp>
18+
19+
#include <catch2/catch_test_macros.hpp>
20+
21+
#include <cstdint>
22+
23+
TEST_CASE("detail::is_prime", "")
24+
{
25+
using cuco::detail::is_prime;
26+
27+
SECTION("Values below 2 are not prime")
28+
{
29+
STATIC_REQUIRE(not is_prime(0));
30+
STATIC_REQUIRE(not is_prime(1));
31+
}
32+
33+
SECTION("Small primes and composites")
34+
{
35+
STATIC_REQUIRE(is_prime(2));
36+
STATIC_REQUIRE(is_prime(3));
37+
STATIC_REQUIRE(not is_prime(4));
38+
STATIC_REQUIRE(is_prime(5));
39+
STATIC_REQUIRE(not is_prime(9));
40+
STATIC_REQUIRE(is_prime(11));
41+
STATIC_REQUIRE(is_prime(97));
42+
STATIC_REQUIRE(not is_prime(100));
43+
}
44+
45+
SECTION("Carmichael numbers are correctly rejected")
46+
{
47+
// Strong pseudoprime candidates that fool weak primality tests
48+
REQUIRE(not is_prime(561)); // 3 * 11 * 17
49+
REQUIRE(not is_prime(1105)); // 5 * 13 * 17
50+
REQUIRE(not is_prime(1729)); // 7 * 13 * 19
51+
REQUIRE(not is_prime(2465)); // 5 * 17 * 29
52+
REQUIRE(not is_prime(41041)); // 7 * 11 * 13 * 41
53+
REQUIRE(not is_prime(825265)); // 5 * 7 * 17 * 19 * 73
54+
}
55+
56+
SECTION("Large primes")
57+
{
58+
// Mersenne prime 2^31 - 1
59+
REQUIRE(is_prime(2147483647ull));
60+
// Near uint32 max
61+
REQUIRE(is_prime(4294967291ull));
62+
// Large 64-bit prime
63+
REQUIRE(is_prime(18446744073709551557ull));
64+
// Adjacent composite
65+
REQUIRE(not is_prime(18446744073709551556ull));
66+
}
67+
}
68+
69+
TEST_CASE("detail::next_prime", "")
70+
{
71+
using cuco::detail::next_prime;
72+
73+
SECTION("Values at or below 2 map to 2")
74+
{
75+
STATIC_REQUIRE(next_prime(0) == 2ull);
76+
STATIC_REQUIRE(next_prime(1) == 2ull);
77+
STATIC_REQUIRE(next_prime(2) == 2ull);
78+
}
79+
80+
SECTION("Already-prime inputs are returned unchanged")
81+
{
82+
STATIC_REQUIRE(next_prime(3) == 3ull);
83+
STATIC_REQUIRE(next_prime(13) == 13ull);
84+
STATIC_REQUIRE(next_prime(101) == 101ull);
85+
}
86+
87+
SECTION("Composite inputs advance to the next prime")
88+
{
89+
STATIC_REQUIRE(next_prime(4) == 5ull);
90+
STATIC_REQUIRE(next_prime(14) == 17ull);
91+
STATIC_REQUIRE(next_prime(100) == 101ull);
92+
STATIC_REQUIRE(next_prime(155) == 157ull); // used by extent_test
93+
}
94+
95+
SECTION("Large composite inputs")
96+
{
97+
REQUIRE(next_prime(1ull << 20) == 1048583ull);
98+
REQUIRE(next_prime(1ull << 32) == 4294967311ull);
99+
}
100+
101+
SECTION("Result is always >= input and prime")
102+
{
103+
using cuco::detail::is_prime;
104+
for (std::uint64_t n : {0ull, 1ull, 42ull, 1000ull, 999983ull, 1ull << 40}) {
105+
auto const p = next_prime(n);
106+
REQUIRE(p >= n);
107+
REQUIRE(is_prime(p));
108+
}
109+
}
110+
}

0 commit comments

Comments
 (0)