@@ -423,9 +423,33 @@ void build_constraints_parallel(UltraCircuitBuilder& builder,
423423 }
424424 }
425425 }
426- profile_and_collect (constraints.logic_constraints , [](UltraCircuitBuilder& b, const LogicConstraint& c) {
427- create_logic_gate (b, c.a , c.b , c.result , c.num_bits , c.is_xor_gate );
428- });
426+ // Logic constraints must be grouped by (num_bits, is_xor_gate) since both affect gate count.
427+ {
428+ std::map<std::pair<uint32_t , bool >, std::vector<size_t >> logic_groups;
429+ for (size_t i = 0 ; i < constraints.logic_constraints .size (); i++) {
430+ const auto & c = constraints.logic_constraints [i];
431+ logic_groups[{ c.num_bits , c.is_xor_gate }].push_back (i);
432+ }
433+ auto handler = [](UltraCircuitBuilder& b, const LogicConstraint& c) {
434+ create_logic_gate (b, c.a , c.b , c.result , c.num_bits , c.is_xor_gate );
435+ };
436+ for (auto & [key, indices] : logic_groups) {
437+ auto & representative = constraints.logic_constraints [indices[0 ]];
438+ auto profile = profile_constraint_type (representative, handler, num_witnesses);
439+ size_t profile_idx = profiles.size ();
440+ profiles.push_back (profile);
441+ auto sizes = profile.block_sizes ;
442+ sizes.num_rom_arrays = profile.num_rom_arrays_per_instance ;
443+ sizes.num_ram_arrays = profile.num_ram_arrays_per_instance ;
444+ for (size_t idx : indices) {
445+ tasks.emplace_back ([handler, &constraints, idx](UltraCircuitBuilder& b) {
446+ handler (b, constraints.logic_constraints [idx]);
447+ });
448+ task_sizes.push_back (sizes);
449+ task_profile_indices.push_back (profile_idx);
450+ }
451+ }
452+ }
429453 // Range constraints must be grouped by num_bits since different bit widths produce different gate counts.
430454 {
431455 // Group range constraints by num_bits
@@ -453,8 +477,30 @@ void build_constraints_parallel(UltraCircuitBuilder& builder,
453477 }
454478 }
455479 }
456- profile_and_collect (constraints.aes128_constraints ,
457- [](UltraCircuitBuilder& b, const AES128Constraint& c) { create_aes128_constraints (b, c); });
480+ // AES128 constraints must be grouped by inputs.size() since different input lengths produce different gate counts.
481+ {
482+ std::map<size_t , std::vector<size_t >> aes_groups;
483+ for (size_t i = 0 ; i < constraints.aes128_constraints .size (); i++) {
484+ aes_groups[constraints.aes128_constraints [i].inputs .size ()].push_back (i);
485+ }
486+ auto handler = [](UltraCircuitBuilder& b, const AES128Constraint& c) { create_aes128_constraints (b, c); };
487+ for (auto & [sz, indices] : aes_groups) {
488+ auto & representative = constraints.aes128_constraints [indices[0 ]];
489+ auto profile = profile_constraint_type (representative, handler, num_witnesses);
490+ size_t profile_idx = profiles.size ();
491+ profiles.push_back (profile);
492+ auto sizes = profile.block_sizes ;
493+ sizes.num_rom_arrays = profile.num_rom_arrays_per_instance ;
494+ sizes.num_ram_arrays = profile.num_ram_arrays_per_instance ;
495+ for (size_t idx : indices) {
496+ tasks.emplace_back ([handler, &constraints, idx](UltraCircuitBuilder& b) {
497+ handler (b, constraints.aes128_constraints [idx]);
498+ });
499+ task_sizes.push_back (sizes);
500+ task_profile_indices.push_back (profile_idx);
501+ }
502+ }
503+ }
458504 profile_and_collect (constraints.sha256_compression , [](UltraCircuitBuilder& b, const Sha256Compression& c) {
459505 create_sha256_compression_constraints (b, c);
460506 });
0 commit comments