Skip to content

Commit a622eb5

Browse files
committed
unify and clean up
1 parent 2b68b8c commit a622eb5

4 files changed

Lines changed: 72 additions & 143 deletions

File tree

barretenberg/cpp/src/barretenberg/dsl/acir_format/acir_format.cpp

Lines changed: 26 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -379,28 +379,26 @@ void build_constraints_parallel(UltraCircuitBuilder& builder,
379379
std::vector<TaskBlockSizes> task_sizes;
380380
std::vector<size_t> task_profile_indices;
381381

382-
// Helper: given a constraint vector, a handler, and a key function, profile unique keys
383-
// and return a map from key to profile index. Does NOT add tasks (that happens in order below).
384-
auto profile_grouped = [&](auto& items, auto handler, auto key_fn) -> std::map<decltype(key_fn(items[0])), size_t> {
382+
// Helper: profile unique keys in a constraint vector, then add tasks in vector order.
383+
// Combines profiling and task collection in a single call per constraint type.
384+
auto profile_and_collect = [&](auto& items, auto handler, auto key_fn) {
385+
if (items.empty()) {
386+
return;
387+
}
385388
using Key = decltype(key_fn(items[0]));
386389
std::map<Key, size_t> key_to_profile;
390+
// Phase 1: profile unique keys
387391
for (size_t i = 0; i < items.size(); i++) {
388392
Key k = key_fn(items[i]);
389393
if (key_to_profile.count(k) == 0) {
390394
auto profile = profile_constraint_type(items[i], handler, num_witnesses);
391-
size_t profile_idx = profiles.size();
395+
key_to_profile[k] = profiles.size();
392396
profiles.push_back(profile);
393-
key_to_profile[k] = profile_idx;
394397
}
395398
}
396-
return key_to_profile;
397-
};
398-
399-
// Helper: add tasks for a constraint vector in vector order, looking up each instance's profile.
400-
auto collect_tasks = [&](auto& items, auto handler, const auto& key_to_profile, auto key_fn) {
399+
// Phase 1b: add tasks in vector order
401400
for (size_t i = 0; i < items.size(); i++) {
402-
auto k = key_fn(items[i]);
403-
size_t profile_idx = key_to_profile.at(k);
401+
size_t profile_idx = key_to_profile.at(key_fn(items[i]));
404402
const auto& profile = profiles[profile_idx];
405403
auto sizes = profile.block_sizes;
406404
sizes.num_rom_arrays = profile.num_rom_arrays_per_instance;
@@ -466,72 +464,22 @@ void build_constraints_parallel(UltraCircuitBuilder& builder,
466464
};
467465
auto ec_add_handler = [](UltraCircuitBuilder& b, const EcAdd& c) { create_ec_add_constraint(b, c); };
468466

469-
// Profile all types (order doesn't matter here — just building the key→profile maps)
470-
auto quad_profiles = !constraints.quad_constraints.empty()
471-
? profile_grouped(constraints.quad_constraints, quad_handler, const_key)
472-
: decltype(profile_grouped(constraints.quad_constraints, quad_handler, const_key)){};
473-
auto big_quad_profiles =
474-
!constraints.big_quad_constraints.empty()
475-
? profile_grouped(constraints.big_quad_constraints, big_quad_handler, big_quad_key)
476-
: decltype(profile_grouped(constraints.big_quad_constraints, big_quad_handler, big_quad_key)){};
477-
auto logic_profiles = !constraints.logic_constraints.empty()
478-
? profile_grouped(constraints.logic_constraints, logic_handler, logic_key)
479-
: decltype(profile_grouped(constraints.logic_constraints, logic_handler, logic_key)){};
480-
auto range_profiles = !constraints.range_constraints.empty()
481-
? profile_grouped(constraints.range_constraints, range_handler, range_key)
482-
: decltype(profile_grouped(constraints.range_constraints, range_handler, range_key)){};
483-
auto aes_profiles = !constraints.aes128_constraints.empty()
484-
? profile_grouped(constraints.aes128_constraints, aes_handler, aes_key)
485-
: decltype(profile_grouped(constraints.aes128_constraints, aes_handler, aes_key)){};
486-
auto sha_profiles = !constraints.sha256_compression.empty()
487-
? profile_grouped(constraints.sha256_compression, sha_handler, const_key)
488-
: decltype(profile_grouped(constraints.sha256_compression, sha_handler, const_key)){};
489-
auto ecdsa_k1_profiles =
490-
!constraints.ecdsa_k1_constraints.empty()
491-
? profile_grouped(constraints.ecdsa_k1_constraints, ecdsa_k1_handler, const_key)
492-
: decltype(profile_grouped(constraints.ecdsa_k1_constraints, ecdsa_k1_handler, const_key)){};
493-
auto ecdsa_r1_profiles =
494-
!constraints.ecdsa_r1_constraints.empty()
495-
? profile_grouped(constraints.ecdsa_r1_constraints, ecdsa_r1_handler, const_key)
496-
: decltype(profile_grouped(constraints.ecdsa_r1_constraints, ecdsa_r1_handler, const_key)){};
497-
auto blake2s_profiles =
498-
!constraints.blake2s_constraints.empty()
499-
? profile_grouped(constraints.blake2s_constraints, blake2s_handler, blake2s_key)
500-
: decltype(profile_grouped(constraints.blake2s_constraints, blake2s_handler, blake2s_key)){};
501-
auto blake3_profiles =
502-
!constraints.blake3_constraints.empty()
503-
? profile_grouped(constraints.blake3_constraints, blake3_handler, blake3_key)
504-
: decltype(profile_grouped(constraints.blake3_constraints, blake3_handler, blake3_key)){};
505-
auto keccak_profiles =
506-
!constraints.keccak_permutations.empty()
507-
? profile_grouped(constraints.keccak_permutations, keccak_handler, const_key)
508-
: decltype(profile_grouped(constraints.keccak_permutations, keccak_handler, const_key)){};
509-
auto pos2_profiles = !constraints.poseidon2_constraints.empty()
510-
? profile_grouped(constraints.poseidon2_constraints, pos2_handler, pos2_key)
511-
: decltype(profile_grouped(constraints.poseidon2_constraints, pos2_handler, pos2_key)){};
512-
auto msm_profiles =
513-
!constraints.multi_scalar_mul_constraints.empty()
514-
? profile_grouped(constraints.multi_scalar_mul_constraints, msm_handler, msm_key)
515-
: decltype(profile_grouped(constraints.multi_scalar_mul_constraints, msm_handler, msm_key)){};
516-
auto ec_add_profiles = !constraints.ec_add_constraints.empty()
517-
? profile_grouped(constraints.ec_add_constraints, ec_add_handler, const_key)
518-
: decltype(profile_grouped(constraints.ec_add_constraints, ec_add_handler, const_key)){};
519-
520-
// Collect tasks in the same order as sequential build_constraints
521-
collect_tasks(constraints.quad_constraints, quad_handler, quad_profiles, const_key);
522-
collect_tasks(constraints.big_quad_constraints, big_quad_handler, big_quad_profiles, big_quad_key);
523-
collect_tasks(constraints.logic_constraints, logic_handler, logic_profiles, logic_key);
524-
collect_tasks(constraints.range_constraints, range_handler, range_profiles, range_key);
525-
collect_tasks(constraints.aes128_constraints, aes_handler, aes_profiles, aes_key);
526-
collect_tasks(constraints.sha256_compression, sha_handler, sha_profiles, const_key);
527-
collect_tasks(constraints.ecdsa_k1_constraints, ecdsa_k1_handler, ecdsa_k1_profiles, const_key);
528-
collect_tasks(constraints.ecdsa_r1_constraints, ecdsa_r1_handler, ecdsa_r1_profiles, const_key);
529-
collect_tasks(constraints.blake2s_constraints, blake2s_handler, blake2s_profiles, blake2s_key);
530-
collect_tasks(constraints.blake3_constraints, blake3_handler, blake3_profiles, blake3_key);
531-
collect_tasks(constraints.keccak_permutations, keccak_handler, keccak_profiles, const_key);
532-
collect_tasks(constraints.poseidon2_constraints, pos2_handler, pos2_profiles, pos2_key);
533-
collect_tasks(constraints.multi_scalar_mul_constraints, msm_handler, msm_profiles, msm_key);
534-
collect_tasks(constraints.ec_add_constraints, ec_add_handler, ec_add_profiles, const_key);
467+
// Profile and collect tasks in the same order as sequential build_constraints.
468+
// Each call profiles unique keys, then adds tasks in constraint vector order.
469+
profile_and_collect(constraints.quad_constraints, quad_handler, const_key);
470+
profile_and_collect(constraints.big_quad_constraints, big_quad_handler, big_quad_key);
471+
profile_and_collect(constraints.logic_constraints, logic_handler, logic_key);
472+
profile_and_collect(constraints.range_constraints, range_handler, range_key);
473+
profile_and_collect(constraints.aes128_constraints, aes_handler, aes_key);
474+
profile_and_collect(constraints.sha256_compression, sha_handler, const_key);
475+
profile_and_collect(constraints.ecdsa_k1_constraints, ecdsa_k1_handler, const_key);
476+
profile_and_collect(constraints.ecdsa_r1_constraints, ecdsa_r1_handler, const_key);
477+
profile_and_collect(constraints.blake2s_constraints, blake2s_handler, blake2s_key);
478+
profile_and_collect(constraints.blake3_constraints, blake3_handler, blake3_key);
479+
profile_and_collect(constraints.keccak_permutations, keccak_handler, const_key);
480+
profile_and_collect(constraints.poseidon2_constraints, pos2_handler, pos2_key);
481+
profile_and_collect(constraints.multi_scalar_mul_constraints, msm_handler, msm_key);
482+
profile_and_collect(constraints.ec_add_constraints, ec_add_handler, const_key);
535483

536484
// Phase 2: Prepare the builder's caches from profiles (no constraint execution).
537485
prepare_builder_from_profiles(builder, profiles);

barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/circuit_builder_base.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ template <typename FF_> class CircuitBuilderBase {
5959
static constexpr uint32_t VARIABLE_CURSOR_DISABLED = UINT32_MAX;
6060

6161
// Deferred assert_equal entries for parallel construction. In cursor mode, assert_equal calls
62-
// are recorded per-thread and replayed in deterministic task order after all threads join.
62+
// are recorded per-task and replayed in deterministic task order after all threads join.
6363
// This prevents nondeterministic union-find results when multiple threads assert_equal on
6464
// the same shared ACIR witness.
6565
struct DeferredAssertEqual {
@@ -80,8 +80,8 @@ template <typename FF_> class CircuitBuilderBase {
8080
{
8181
// Replay in task order (0, 1, 2, ...) for deterministic union-find results
8282
for (auto& task_buf : deferred_assert_equals_) {
83-
for (auto& [a, b, msg] : task_buf) {
84-
assert_equal(a, b, msg);
83+
for (auto& entry : task_buf) {
84+
assert_equal(entry.a_variable_idx, entry.b_variable_idx, entry.msg);
8585
}
8686
task_buf.clear();
8787
}

barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/ultra_circuit_builder.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -584,7 +584,7 @@ plookup::ReadData<uint32_t> UltraCircuitBuilder_<ExecutionTrace>::create_gates_f
584584
// In cursor mode, defer the lookup gate entry to avoid races on table.lookup_gates
585585
if (this->get_variable_cursor() != this->VARIABLE_CURSOR_DISABLED) {
586586
auto tidx = get_parallel_thread_index();
587-
deferred_lookup_gates_[tidx].push_back({ multi_table.basic_table_ids[i], read_values.lookup_entries[i] });
587+
deferred_lookup_gates_.defer(tidx, { multi_table.basic_table_ids[i], read_values.lookup_entries[i] });
588588
} else {
589589
table.lookup_gates.emplace_back(read_values.lookup_entries[i]);
590590
}
@@ -787,7 +787,7 @@ void UltraCircuitBuilder_<ExecutionTrace>::create_small_range_constraint(const u
787787
// In cursor mode, defer range constraint to avoid races on range_lists and real_variable_tags
788788
if (this->get_variable_cursor() != this->VARIABLE_CURSOR_DISABLED) {
789789
auto tidx = get_parallel_thread_index();
790-
deferred_range_constraints_[tidx].push_back({ variable_index, target_range });
790+
deferred_range_constraints_.defer(tidx, { variable_index, target_range });
791791
return;
792792
}
793793

@@ -1567,7 +1567,7 @@ std::array<uint32_t, 2> UltraCircuitBuilder_<ExecutionTrace>::queue_partial_non_
15671567
.hi_1 = hi_1_idx,
15681568
};
15691569
if (this->get_variable_cursor() != this->VARIABLE_CURSOR_DISABLED) {
1570-
deferred_non_native_field_muls_[get_parallel_thread_index()].emplace_back(cache_entry);
1570+
deferred_non_native_field_muls_.defer(get_parallel_thread_index(), cache_entry);
15711571
} else {
15721572
cached_partial_non_native_field_multiplications.emplace_back(cache_entry);
15731573
}

barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/ultra_circuit_builder.hpp

Lines changed: 40 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -202,74 +202,50 @@ class UltraCircuitBuilder_ : public CircuitBuilderBase<typename ExecutionTrace_:
202202
// The set of variables which have been constrained to a particular value via an arithmetic gate
203203
std::unordered_map<FF, uint32_t> constant_variable_indices;
204204

205-
// Deferred lookup gate entries for parallel construction. In cursor mode, plookup reads append here
206-
// instead of to table.lookup_gates. Replayed after parallel phase via apply_deferred_lookup_gates().
205+
/**
206+
* @brief Per-thread buffer for deferring operations during parallel construction.
207+
* @details Operations that modify shared builder state are buffered per-thread during
208+
* execute_parallel and replayed sequentially after all threads join. The replay callback
209+
* receives each entry and applies it to the builder.
210+
*/
211+
template <typename Entry> struct DeferredBuffer {
212+
std::vector<std::vector<Entry>> thread_buffers;
213+
214+
void init(size_t num_threads) { thread_buffers.resize(num_threads); }
215+
216+
void defer(size_t thread_idx, Entry&& entry) { thread_buffers[thread_idx].emplace_back(std::move(entry)); }
217+
218+
void defer(size_t thread_idx, const Entry& entry) { thread_buffers[thread_idx].push_back(entry); }
219+
220+
template <typename Callback> void apply(Callback&& callback)
221+
{
222+
for (auto& buf : thread_buffers) {
223+
for (auto& entry : buf) {
224+
callback(entry);
225+
}
226+
buf.clear();
227+
}
228+
}
229+
};
230+
207231
struct DeferredLookupEntry {
208232
plookup::BasicTableId table_id;
209233
plookup::BasicTable::LookupEntry entry;
210234
};
211-
std::vector<std::vector<DeferredLookupEntry>> deferred_lookup_gates_; // per-thread
212-
213-
// Deferred range constraint entries for parallel construction. In cursor mode, range constraints
214-
// buffer here instead of modifying range_lists. Replayed via apply_deferred_range_constraints().
215235
struct DeferredRangeConstraint {
216236
uint32_t variable_index;
217237
uint64_t target_range;
218238
};
219-
std::vector<std::vector<DeferredRangeConstraint>> deferred_range_constraints_; // per-thread
220239

221-
// Deferred non-native field multiplications for parallel construction. In cursor mode,
222-
// these are buffered per-thread to avoid races on the shared vector.
223-
std::vector<std::vector<cached_partial_non_native_field_multiplication>> deferred_non_native_field_muls_;
240+
DeferredBuffer<DeferredLookupEntry> deferred_lookup_gates_;
241+
DeferredBuffer<DeferredRangeConstraint> deferred_range_constraints_;
242+
DeferredBuffer<cached_partial_non_native_field_multiplication> deferred_non_native_field_muls_;
224243

225-
/**
226-
* @brief Initialize deferred buffers for N threads.
227-
*/
228244
void init_deferred_buffers(size_t num_threads)
229245
{
230-
deferred_lookup_gates_.resize(num_threads);
231-
deferred_range_constraints_.resize(num_threads);
232-
deferred_non_native_field_muls_.resize(num_threads);
233-
}
234-
235-
/**
236-
* @brief Replay all deferred non-native field multiplications into the shared cache.
237-
*/
238-
void apply_deferred_non_native_field_muls()
239-
{
240-
for (auto& thread_buf : deferred_non_native_field_muls_) {
241-
for (auto& entry : thread_buf) {
242-
cached_partial_non_native_field_multiplications.emplace_back(entry);
243-
}
244-
thread_buf.clear();
245-
}
246-
}
247-
248-
/**
249-
* @brief Replay all deferred lookup gate entries into the appropriate tables.
250-
*/
251-
void apply_deferred_lookup_gates()
252-
{
253-
for (auto& thread_buf : deferred_lookup_gates_) {
254-
for (auto& [table_id, entry] : thread_buf) {
255-
auto& table = get_table(table_id);
256-
table.lookup_gates.emplace_back(entry);
257-
}
258-
thread_buf.clear();
259-
}
260-
}
261-
262-
/**
263-
* @brief Replay all deferred range constraints.
264-
*/
265-
void apply_deferred_range_constraints()
266-
{
267-
for (auto& thread_buf : deferred_range_constraints_) {
268-
for (auto& [variable_index, target_range] : thread_buf) {
269-
create_small_range_constraint(variable_index, target_range);
270-
}
271-
thread_buf.clear();
272-
}
246+
deferred_lookup_gates_.init(num_threads);
247+
deferred_range_constraints_.init(num_threads);
248+
deferred_non_native_field_muls_.init(num_threads);
273249
}
274250

275251
/**
@@ -427,9 +403,14 @@ class UltraCircuitBuilder_ : public CircuitBuilderBase<typename ExecutionTrace_:
427403
rom_ram_logic.ram_id_cursors_.clear();
428404

429405
// Replay deferred operations
430-
apply_deferred_lookup_gates();
431-
apply_deferred_range_constraints();
432-
apply_deferred_non_native_field_muls();
406+
deferred_lookup_gates_.apply([this](auto& e) {
407+
auto& table = get_table(e.table_id);
408+
table.lookup_gates.emplace_back(e.entry);
409+
});
410+
deferred_range_constraints_.apply(
411+
[this](auto& e) { create_small_range_constraint(e.variable_index, e.target_range); });
412+
deferred_non_native_field_muls_.apply(
413+
[this](auto& e) { cached_partial_non_native_field_multiplications.emplace_back(e); });
433414
this->apply_deferred_assert_equals();
434415
}
435416

0 commit comments

Comments
 (0)