|
1 | 1 |
|
2 | 2 | #include "barretenberg/commitment_schemes/commitment_key.hpp" |
| 3 | +#include "barretenberg/common/thread.hpp" |
3 | 4 | #include "barretenberg/srs/global_crs.hpp" |
4 | 5 |
|
5 | 6 | #include <gtest/gtest.h> |
@@ -125,6 +126,89 @@ template <typename Curve> class CommitmentKeyTest : public ::testing::Test { |
125 | 126 | Commitment expected = commit_naive(ck, poly); |
126 | 127 | EXPECT_EQ(expected, commitment); |
127 | 128 | } |
| 129 | + |
| 130 | + // Regression test for a zero-counting bug in Pippenger's MSD radix sort |
| 131 | + // (sort_point_schedule_and_count_zero_buckets in process_buckets.cpp). |
| 132 | + // |
| 133 | + // The bug: the recursive radix sort passed `keys` instead of `top_level_keys` when recursing, |
| 134 | + // causing the zero-entry counter to be overwritten by non-zero-bucket counts when the sort |
| 135 | + // uses 3+ recursion levels. The inflated count makes the MSM skip valid point contributions. |
| 136 | + // |
| 137 | + // When does 3-level recursion occur? |
| 138 | + // - Pippenger chooses bits_per_slice via a cost model (get_optimal_log_num_buckets). |
| 139 | + // - bits_per_slice > 16 pads to 24 bits -> initial_shift=16 -> 3 levels (shift 16->8->0). |
| 140 | + // - For BN254 (254-bit scalars), bits_per_slice=17 at ~4.6M+ points per work unit. |
| 141 | + // - Multi-threading splits MSM across cores, so each work unit is total_points/num_threads. |
| 142 | + // On a 32-core machine, a single work unit reaches 4.6M at ~150M total points. |
| 143 | + // - Single-threaded execution (WASM, resource-constrained environments) hits the threshold |
| 144 | + // at 4.6M points directly. |
| 145 | + // |
| 146 | + // Polynomial design (deterministic, all coefficients non-zero): |
| 147 | + // get_scalar_slice extracts bits MSB-first. With bits_per_slice=17 and 15 rounds for BN254, |
| 148 | + // round 13 extracts bits [16:33) of each scalar. We choose scalar values so that round 13 |
| 149 | + // has the bucket distribution needed to trigger the overwrite: |
| 150 | + // |
| 151 | + // 100 coefficients = Fr(1) -> bits [16:33) = 0 -> bucket_index = 0 |
| 152 | + // 10 coefficients = Fr(2^16) -> bits [16:33) = 1 -> bucket_index = 1 [DROPPED] |
| 153 | + // ~5M coefficients = Fr(2^32) -> bits [16:33) = 2^16 -> bucket_index = 65536 [OVERWRITES] |
| 154 | + // |
| 155 | + // Fr(1) entries must be non-zero (zero scalars are filtered before the MSM) but still |
| 156 | + // land in bucket 0 for round 13. They ensure point_schedule[0] has bucket_index=0 after |
| 157 | + // sorting, bypassing the post-sort safety check in sort_point_schedule_and_count_zero_buckets. |
| 158 | + // |
| 159 | + // The bug overwrites num_zero_entries from 100 (correct) to ~5M (count at bucket 65536). |
| 160 | + // The MSM span then starts ~5M entries into the sorted schedule, skipping all 10 target |
| 161 | + // entries with bucket_index=1 and silently dropping their contributions. |
| 162 | + // |
| 163 | + // This layout is chosen for efficiency (~1.5s) and full determinism (no random scalars). |
| 164 | + // The reference commitment is computed by chunking into 1M-point sub-MSMs, each using |
| 165 | + // bits_per_slice <= 15 (2-level sort, bug-free). |
| 166 | + void test_pippenger_zero_count_regression() |
| 167 | + { |
| 168 | + constexpr size_t n = 5000000; |
| 169 | + CK ck(n); |
| 170 | + |
| 171 | + Polynomial poly(n); |
| 172 | + |
| 173 | + constexpr size_t num_fake_zeros = 100; |
| 174 | + for (size_t i = 0; i < num_fake_zeros; ++i) { |
| 175 | + poly.at(i) = Fr(1); |
| 176 | + } |
| 177 | + |
| 178 | + constexpr size_t num_targets = 10; |
| 179 | + for (size_t i = num_fake_zeros; i < num_fake_zeros + num_targets; ++i) { |
| 180 | + poly.at(i) = Fr(65536); |
| 181 | + } |
| 182 | + |
| 183 | + for (size_t i = num_fake_zeros + num_targets; i < n; ++i) { |
| 184 | + poly.at(i) = Fr(uint256_t(1) << 32); |
| 185 | + } |
| 186 | + |
| 187 | + // Commit single-threaded to keep the full point set in one work unit |
| 188 | + size_t original_concurrency = get_num_cpus(); |
| 189 | + set_parallel_for_concurrency(1); |
| 190 | + Commitment actual_commitment = ck.commit(poly); |
| 191 | + set_parallel_for_concurrency(original_concurrency); |
| 192 | + |
| 193 | + // Reference: sum of chunked sub-MSMs (each chunk uses bits_per_slice <= 15, bug-free) |
| 194 | + constexpr size_t chunk_size = 1UL << 20; |
| 195 | + auto srs_points = ck.get_monomial_points(); |
| 196 | + GroupElement correct_sum; |
| 197 | + correct_sum.self_set_infinity(); |
| 198 | + |
| 199 | + for (size_t offset = 0; offset < n; offset += chunk_size) { |
| 200 | + size_t this_chunk = std::min(chunk_size, n - offset); |
| 201 | + std::span<const Fr> chunk_coeffs(&poly[offset], this_chunk); |
| 202 | + PolynomialSpan<const Fr> chunk_span(0, chunk_coeffs); |
| 203 | + std::span<const Commitment> chunk_points = srs_points.subspan(offset, this_chunk); |
| 204 | + |
| 205 | + auto chunk_result = scalar_multiplication::pippenger_unsafe<Curve>(chunk_span, chunk_points); |
| 206 | + correct_sum += chunk_result; |
| 207 | + } |
| 208 | + Commitment correct_commitment(correct_sum); |
| 209 | + |
| 210 | + EXPECT_EQ(actual_commitment, correct_commitment); |
| 211 | + } |
128 | 212 | }; |
129 | 213 |
|
130 | 214 | using Curves = ::testing::Types<curve::BN254, curve::Grumpkin>; |
@@ -154,5 +238,16 @@ TYPED_TEST(CommitmentKeyTest, CommitWithStartIndex) |
154 | 238 | { |
155 | 239 | TestFixture::test_commit_with_start_index(); |
156 | 240 | } |
| 241 | +TYPED_TEST(CommitmentKeyTest, DISABLED_PippengerZeroCountRegression) |
| 242 | +{ |
| 243 | + if constexpr (!std::is_same_v<TypeParam, curve::BN254>) { |
| 244 | + GTEST_SKIP() << "BN254 only: Grumpkin CRS has insufficient points for the 5M threshold"; |
| 245 | + } |
| 246 | +#ifndef NDEBUG |
| 247 | + GTEST_SKIP() << "Too slow in debug builds"; |
| 248 | +#else |
| 249 | + TestFixture::test_pippenger_zero_count_regression(); |
| 250 | +#endif |
| 251 | +} |
157 | 252 |
|
158 | 253 | } // namespace bb |
0 commit comments