Skip to content

Commit 7bfae55

Browse files
authored
feat: merge-train/barretenberg (#22147)
BEGIN_COMMIT_OVERRIDE chore: bench phase breakdown + thread sweep for MSM reduction (#21885) chore: minor fixes pt. 2 (#22138) chore: minor fixes pt. 3 (#22181) fix: satisfy Chonk num_circuits >= 4 assertion in mock IVC creation (#22188) END_COMMIT_OVERRIDE
2 parents 8b3beee + daf1360 commit 7bfae55

24 files changed

Lines changed: 303 additions & 196 deletions

File tree

barretenberg/cpp/src/barretenberg/bbapi/bbapi.test.cpp

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
#include "barretenberg/bbapi/bbapi.hpp"
2+
#include "barretenberg/api/file_io.hpp"
3+
#include "barretenberg/bbapi/bbapi_shared.hpp"
24
#include "barretenberg/common/serialize.hpp"
35
#include "barretenberg/common/utils.hpp"
46
#include "barretenberg/serialize/test_helper.hpp"
@@ -42,3 +44,72 @@ TYPED_TEST(BBApiMsgpack, DefaultConstructorRoundtrip)
4244
EXPECT_EQ(actual_response, expected_response);
4345
std::cout << msgpack_schema_to_string(command) << " " << msgpack_schema_to_string(response) << std::endl;
4446
}
47+
48+
// Regression tests for input validation at API boundaries.
49+
// These ensure non-canonical field encodings and trailing bytes are rejected.
50+
51+
TEST(BBApiInputValidation, NonCanonicalPublicInputRejected)
52+
{
53+
using Flavor = UltraFlavor;
54+
// A value >= BN254 scalar field modulus should be rejected
55+
uint256_t non_canonical = fr::modulus + 1;
56+
std::vector<uint256_t> bad_public_inputs = { non_canonical };
57+
std::vector<uint256_t> proof = { uint256_t(0) };
58+
59+
EXPECT_THROW(bbapi::concatenate_proof<Flavor>(bad_public_inputs, proof), std::runtime_error);
60+
}
61+
62+
TEST(BBApiInputValidation, NonCanonicalProofElementRejected)
63+
{
64+
using Flavor = UltraFlavor;
65+
// The modulus itself is non-canonical (valid range is [0, modulus))
66+
uint256_t non_canonical = fr::modulus;
67+
std::vector<uint256_t> public_inputs = { uint256_t(42) };
68+
std::vector<uint256_t> bad_proof = { non_canonical };
69+
70+
EXPECT_THROW(bbapi::concatenate_proof<Flavor>(public_inputs, bad_proof), std::runtime_error);
71+
}
72+
73+
TEST(BBApiInputValidation, CanonicalValuesAccepted)
74+
{
75+
using Flavor = UltraFlavor;
76+
// modulus - 1 is the largest canonical value
77+
uint256_t max_canonical = fr::modulus - 1;
78+
std::vector<uint256_t> public_inputs = { uint256_t(0), max_canonical };
79+
std::vector<uint256_t> proof = { uint256_t(1) };
80+
81+
EXPECT_NO_THROW(bbapi::concatenate_proof<Flavor>(public_inputs, proof));
82+
}
83+
84+
TEST(BBApiInputValidation, TrailingBytesInBinaryInputRejected)
85+
{
86+
// A buffer that is not a multiple of 32 bytes should be rejected
87+
std::vector<uint8_t> buf(32 + 1, 0); // 33 bytes = 1 field element + 1 trailing byte
88+
EXPECT_THROW(many_from_buffer_exact<uint256_t>(buf, "test input"), std::runtime_error);
89+
}
90+
91+
TEST(BBApiInputValidation, ExactBinaryInputAccepted)
92+
{
93+
// A buffer that is exactly 2 field elements should parse fine
94+
std::vector<uint8_t> buf(64, 0);
95+
EXPECT_NO_THROW(many_from_buffer_exact<uint256_t>(buf, "test input"));
96+
auto result = many_from_buffer_exact<uint256_t>(buf, "test input");
97+
EXPECT_EQ(result.size(), 2UL);
98+
}
99+
100+
TEST(BBApiInputValidation, VkWithTrailingBytesRejectedOnProveSide)
101+
{
102+
using VK = UltraFlavor::VerificationKey;
103+
const size_t expected_size = VK::calc_num_data_types() * sizeof(bb::fr);
104+
// One extra byte beyond the expected VK size
105+
std::vector<uint8_t> bad_vk(expected_size + 1, 0);
106+
EXPECT_THROW(bbapi::validate_vk_size<VK>(bad_vk), std::runtime_error);
107+
}
108+
109+
TEST(BBApiInputValidation, VkWithCorrectSizeAccepted)
110+
{
111+
using VK = UltraFlavor::VerificationKey;
112+
const size_t expected_size = VK::calc_num_data_types() * sizeof(bb::fr);
113+
std::vector<uint8_t> good_vk(expected_size, 0);
114+
EXPECT_NO_THROW(bbapi::validate_vk_size<VK>(good_vk));
115+
}

barretenberg/cpp/src/barretenberg/bbapi/bbapi_ultra_honk.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ CircuitProve::Response _prove(std::vector<uint8_t>&& bytecode,
7272
info("WARNING: computing verification key while proving. Pass in a precomputed vk for better performance.");
7373
vk = std::make_shared<VerificationKey>(prover_instance->get_precomputed());
7474
} else {
75+
validate_vk_size<VerificationKey>(vk_bytes);
7576
vk = std::make_shared<VerificationKey>(from_buffer<VerificationKey>(vk_bytes));
7677
}
7778

barretenberg/cpp/src/barretenberg/benchmark/pippenger_bench/pippenger.bench.cpp

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
* Batch config modeled after AVM: ~2618 wire polys of size 2^21, committed in batches of 32.
66
*/
77
#include "barretenberg/common/assert.hpp"
8+
#include "barretenberg/common/thread.hpp"
89
#include "barretenberg/ecc/curves/bn254/bn254.hpp"
910
#include "barretenberg/ecc/scalar_multiplication/scalar_multiplication.hpp"
1011
#include "barretenberg/polynomials/polynomial_arithmetic.hpp"
@@ -82,6 +83,43 @@ BENCHMARK_DEFINE_F(PippengerBench, BatchMSM)(benchmark::State& state)
8283
}
8384
}
8485

86+
/**
87+
* @brief Batch MSM benchmark variant added to investigate `AztecProtocol/barretenberg#1656`.
88+
*
89+
* The issue is concerned about the single-threaded final reduction step in
90+
* `MSM::batch_multi_scalar_mul(...)` when the work is split across a large number of threads,
91+
* e.g. `2^16` points with `256` threads.
92+
*
93+
* We run a single MSM (num_polys = 1) and sweep thread counts and MSM sizes to make the
94+
* final reduction overhead visible in the phase breakdown reported via `BB_BENCH` counters.
95+
*/
96+
BENCHMARK_DEFINE_F(PippengerBench, BatchMSM_1656)(benchmark::State& state)
97+
{
98+
const size_t num_threads = static_cast<size_t>(state.range(0));
99+
const size_t msm_size = static_cast<size_t>(state.range(1));
100+
101+
std::vector<Fr> msm_scalars(msm_size);
102+
for (auto& s : msm_scalars) {
103+
s = Fr::random_element(&engine);
104+
}
105+
106+
std::vector<std::span<Fr>> scalar_spans;
107+
std::vector<std::span<const G1>> point_spans;
108+
scalar_spans.emplace_back(msm_scalars);
109+
point_spans.emplace_back(srs->get_monomial_points().subspan(0, msm_size));
110+
111+
// This is thread-local: restore after the benchmark so other cases in this binary are unaffected.
112+
const size_t original_concurrency = bb::get_num_cpus();
113+
bb::set_parallel_for_concurrency(num_threads);
114+
115+
for (auto _ : state) {
116+
GOOGLE_BB_BENCH_REPORTER(state);
117+
bb::scalar_multiplication::MSM<Curve>::batch_multi_scalar_mul(point_spans, scalar_spans, false);
118+
}
119+
120+
bb::set_parallel_for_concurrency(original_concurrency);
121+
}
122+
85123
// ===================== Registration =====================
86124

87125
// Single MSM: 2^14 to 2^20
@@ -97,6 +135,12 @@ BENCHMARK_REGISTER_F(PippengerBench, BatchMSM)
97135
->Args({ 32, 1 << 19 })
98136
->Args({ 32, 1 << 21 });
99137

138+
// Issue #1656 target: {threads=256, msm_size}
139+
BENCHMARK_REGISTER_F(PippengerBench, BatchMSM_1656)
140+
->Unit(benchmark::kMillisecond)
141+
->Args({ 256, 1 << 16 })
142+
->Args({ 256, 1 << 20 });
143+
100144
} // namespace
101145

102146
BENCHMARK_MAIN();

barretenberg/cpp/src/barretenberg/chonk/chonk.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ namespace bb {
2525
Chonk::Chonk(size_t num_circuits)
2626
: num_circuits(num_circuits)
2727
{
28-
BB_ASSERT_GT(num_circuits, 0UL, "Number of circuits must be specified and greater than 0.");
28+
BB_ASSERT_GTE(num_circuits, 4UL, "Number of circuits must be at least 4 (get_queue_type uses num_circuits - 3).");
2929
}
3030

3131
/**

barretenberg/cpp/src/barretenberg/chonk/chonk_proof.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,9 @@ ChonkProof_<IsRecursive> ChonkProof_<IsRecursive>::from_field_elements(const std
4343

4444
// MegaZK Oink proof size = total - all other fixed-size components.
4545
// This correctly accounts for any ACIR public inputs prepended to the oink portion.
46-
const size_t mega_zk_oink_length = fields.size() - merge_size - eccvm_size - ipa_size - joint_size;
46+
constexpr size_t fixed_total = merge_size + eccvm_size + ipa_size + joint_size;
47+
BB_ASSERT_GTE(fields.size(), fixed_total, "ChonkProof::from_field_elements: proof too short");
48+
const size_t mega_zk_oink_length = fields.size() - fixed_total;
4749

4850
auto it = fields.begin();
4951

barretenberg/cpp/src/barretenberg/circuit_checker/ultra_circuit_builder_elliptic.test.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,3 +276,30 @@ TEST_F(UltraCircuitBuilderElliptic, ChainedOperationsDoubleFailure)
276276
// Should fail because the middle operation (doubling) has an invalid result
277277
EXPECT_FALSE(CircuitChecker::check(builder));
278278
}
279+
280+
// Demonstrates that a doubling gate with off-curve input (0,0) does not constrain the output.
281+
// The elliptic relation substitutes x1^3 = y1^2 - b, which is invalid for off-curve points.
282+
// When (x1,y1) = (0,0), both the x- and y-coordinate subrelations become 0 = 0 for any output.
283+
// Production code (cycle_group::dbl) prevents this by substituting y1 = 1 for points at infinity.
284+
// See CycleGroupTest.TestDblWitnessPoints (cycle_group.test.cpp) for the complementary test
285+
// that verifies the mitigation works correctly.
286+
TEST_F(UltraCircuitBuilderElliptic, DoubleOffCurveOriginUnconstrainedOutput)
287+
{
288+
// (0,0) is not on grumpkin (y^2 = x^3 - 17), so this is an off-curve input
289+
auto x1_val = bb::fr(0);
290+
auto y1_val = bb::fr(0);
291+
// Claim an arbitrary output point
292+
auto x3_val = bb::fr::random_element();
293+
auto y3_val = bb::fr::random_element();
294+
295+
UltraCircuitBuilder builder;
296+
auto x1 = builder.add_variable(x1_val);
297+
auto y1 = builder.add_variable(y1_val);
298+
auto x3 = builder.add_variable(x3_val);
299+
auto y3 = builder.add_variable(y3_val);
300+
builder.create_ecc_dbl_gate({ x1, y1, x3, y3 });
301+
302+
// The circuit checker passes because the relation is trivially satisfied for (0,0) input.
303+
// This is NOT a bug — production code never lets (0,0) reach a doubling gate.
304+
EXPECT_TRUE(CircuitChecker::check(builder));
305+
}

barretenberg/cpp/src/barretenberg/circuit_checker/ultra_circuit_checker.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,10 @@ MegaCircuitBuilder_<bb::fr> UltraCircuitChecker::prepare_circuit<MegaCircuitBuil
4848

4949
template <typename Builder> bool UltraCircuitChecker::check(const Builder& builder_in)
5050
{
51+
if (builder_in.failed()) {
52+
info("CircuitChecker: circuit contains invalid witnesses: ", builder_in.err());
53+
}
54+
5155
Builder builder = UltraCircuitChecker::prepare_circuit(builder_in);
5256

5357
// Construct a hash table for lookup table entries to efficiently determine if a lookup gate is valid

barretenberg/cpp/src/barretenberg/commitment_schemes/gemini/gemini.hpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -267,8 +267,7 @@ template <typename Curve> class GeminiProver_ {
267267

268268
static std::vector<Polynomial> compute_fold_polynomials(const size_t log_n,
269269
std::span<const Fr> multilinear_challenge,
270-
const Polynomial& A_0,
271-
const bool& has_zk = false);
270+
const Polynomial& A_0);
272271

273272
static std::vector<Claim> construct_univariate_opening_claims(const size_t log_n,
274273
Polynomial&& A_0_pos,

barretenberg/cpp/src/barretenberg/commitment_schemes/gemini/gemini_impl.hpp

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -66,12 +66,12 @@ std::vector<typename GeminiProver_<Curve>::Claim> GeminiProver_<Curve>::prove(
6666
Polynomial A_0 = polynomial_batcher.compute_batched(rho);
6767

6868
// Construct the d-1 Gemini foldings of A₀(X)
69-
std::vector<Polynomial> fold_polynomials = compute_fold_polynomials(log_n, multilinear_challenge, A_0, has_zk);
69+
std::vector<Polynomial> fold_polynomials = compute_fold_polynomials(log_n, multilinear_challenge, A_0);
7070

7171
// If virtual_log_n >= log_n, pad the fold commitments with dummy group elements [1]_1.
7272
for (size_t l = 0; l < virtual_log_n - 1; l++) {
7373
std::string label = "Gemini:FOLD_" + std::to_string(l + 1);
74-
// When has_zk is true, we are sending commitments to 0. Seems to work, but maybe brittle.
74+
// Virtual-round fold polynomials are constant; their commitments are zeroed by the verifier.
7575
transcript->send_to_verifier(label, commitment_key.commit(fold_polynomials[l]));
7676
}
7777
const Fr r_challenge = transcript->template get_challenge<Fr>("Gemini:r");
@@ -108,7 +108,7 @@ std::vector<typename GeminiProver_<Curve>::Claim> GeminiProver_<Curve>::prove(
108108
*/
109109
template <typename Curve>
110110
std::vector<typename GeminiProver_<Curve>::Polynomial> GeminiProver_<Curve>::compute_fold_polynomials(
111-
const size_t log_n, std::span<const Fr> multilinear_challenge, const Polynomial& A_0, const bool& has_zk)
111+
const size_t log_n, std::span<const Fr> multilinear_challenge, const Polynomial& A_0)
112112
{
113113
BB_BENCH_NAME("Gemini::compute_fold_polynomials");
114114
BB_ASSERT_GTE(log_n, size_t(2), "Gemini folding requires at least 4-element polynomials");
@@ -156,25 +156,23 @@ std::vector<typename GeminiProver_<Curve>::Polynomial> GeminiProver_<Curve>::com
156156
A_l = A_l_fold;
157157
}
158158

159-
// Perform virtual rounds.
160-
// After the first `log_n - 1` rounds, the prover's `fold` univariates stabilize. With ZK, the verifier multiplies
161-
// the evaluations by 0, otherwise, when `virtual_log_n > log_n`, the prover honestly computes and sends the
162-
// constant folds.
159+
// Virtual rounds (indices log_n .. virtual_log_n - 1).
160+
// After real folding, the fold polynomials are constant. Since each constant polynomial evaluates to its own
161+
// value at every point, (f(X) - f(x)) / (X - x) = 0, so these contribute nothing to the Shplonk quotient Q(X).
162+
// On the verifier side, padding_indicator_array zeros their contributions independently.
163163
const auto& last = fold_polynomials.back();
164164
const Fr u_last = multilinear_challenge[log_n - 1];
165165
const Fr final_eval = last.at(0) + u_last * (last.at(1) - last.at(0));
166166
Polynomial const_fold(1);
167-
// Temporary fix: when we're running a zk proof, the verifier uses a `padding_indicator_array`. So the evals in
168-
// rounds past `log_n - 1` will be ignored. Hence the prover also needs to ignore them, otherwise Shplonk will fail.
169-
const_fold.at(0) = final_eval * Fr(static_cast<int>(!has_zk));
167+
const_fold.at(0) = final_eval;
170168
fold_polynomials.emplace_back(const_fold);
171169

172170
// FOLD_{log_n+1}, ..., FOLD_{d_v-1}
173171
Fr tail = Fr(1);
174172
for (size_t k = log_n; k < virtual_log_n - 1; ++k) {
175173
tail *= (Fr(1) - multilinear_challenge[k]); // multiply by (1 - u_k)
176174
Polynomial next_const(1);
177-
next_const.at(0) = final_eval * tail * Fr(static_cast<int>(!has_zk));
175+
next_const.at(0) = final_eval * tail;
178176
fold_polynomials.emplace_back(next_const);
179177
}
180178

barretenberg/cpp/src/barretenberg/commitment_schemes/shplonk/shplemini.hpp

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -368,8 +368,10 @@ template <typename Curve, bool HasZK = false> class ShpleminiVerifier_ {
368368
libra_evaluations, gemini_evaluation_challenge, multivariate_challenge, libra_univariate_evaluation);
369369
}
370370

371-
// Currently, only used in ECCVM
371+
// Used in ECCVM and BatchedHonkTranslator. The nu power offset in batch_sumcheck_round_claims
372+
// assumes ZK claims (NUM_SMALL_IPA_EVALUATIONS) precede sumcheck round claims in the batching order.
372373
if (committed_sumcheck) {
374+
BB_ASSERT(HasZK, "committed sumcheck requires ZK for correct nu power indexing");
373375
batch_sumcheck_round_claims(commitments,
374376
scalars,
375377
constant_term_accumulator,
@@ -513,18 +515,28 @@ template <typename Curve, bool HasZK = false> class ShpleminiVerifier_ {
513515
}
514516

515517
// Erase the duplicate entries (higher-index range first to preserve lower indices)
516-
auto erase_range = [&](size_t start, size_t count) {
518+
auto erase_range = [&](size_t duplicate_start, size_t original_start, size_t count) {
517519
for (size_t i = 0; i < count; ++i) {
518-
scalars.erase(scalars.begin() + static_cast<std::ptrdiff_t>(start));
519-
commitments.erase(commitments.begin() + static_cast<std::ptrdiff_t>(start));
520+
// Verify the commitment being erased matches its original (native only).
521+
// Each erase shifts elements down, so duplicate_start always points to the
522+
// next duplicate; the original at original_start + i is unaffected since
523+
// we erase higher-index ranges first.
524+
if constexpr (!Curve::is_stdlib_type) {
525+
BB_ASSERT(commitments[duplicate_start] == commitments[original_start + i],
526+
"remove_repeated_commitments: commitment mismatch at duplicate index " +
527+
std::to_string(duplicate_start) + " vs original index " +
528+
std::to_string(original_start + i));
529+
}
530+
scalars.erase(scalars.begin() + static_cast<std::ptrdiff_t>(duplicate_start));
531+
commitments.erase(commitments.begin() + static_cast<std::ptrdiff_t>(duplicate_start));
520532
}
521533
};
522534
if (second_duplicate_start > first_duplicate_start) {
523-
erase_range(second_duplicate_start, r2.count);
524-
erase_range(first_duplicate_start, r1.count);
535+
erase_range(second_duplicate_start, second_original_start, r2.count);
536+
erase_range(first_duplicate_start, first_original_start, r1.count);
525537
} else {
526-
erase_range(first_duplicate_start, r1.count);
527-
erase_range(second_duplicate_start, r2.count);
538+
erase_range(first_duplicate_start, first_original_start, r1.count);
539+
erase_range(second_duplicate_start, second_original_start, r2.count);
528540
}
529541
}
530542

0 commit comments

Comments
 (0)