From 3e00820ea0d2cd9b9d6df87c7545f287f44f688b Mon Sep 17 00:00:00 2001 From: zhenfei Date: Tue, 1 Mar 2022 10:48:52 -0500 Subject: [PATCH 1/6] micro benchmarks --- CHANGELOG.md | 1 + README.md | 2 +- plonk/Cargo.toml | 4 +- plonk/benches/bench.rs | 72 +++++++++++- plonk/src/bencher.rs | 184 ++++++++++++++++++++++++++++++ plonk/src/circuit/basic.rs | 49 ++++++-- plonk/src/lib.rs | 1 + plonk/src/proof_system/prover.rs | 23 +++- plonk/src/proof_system/structs.rs | 6 +- 9 files changed, 323 insertions(+), 19 deletions(-) create mode 100644 plonk/src/bencher.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index d378b2068..6b26526d3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ - Introducing an example for proving knowledge of exponent - Add api to get SRS size. +- Adding micro benchmarks for MSM, FFT and Poly Evaluation. ### Improvements diff --git a/README.md b/README.md index 24880fc7a..e15fff082 100644 --- a/README.md +++ b/README.md @@ -118,7 +118,7 @@ The additional flags allow using assembly implementation of `square_in_place` an For benchmark, run: ``` -RAYON_NUM_THREADS=N cargo bench +RAYON_NUM_THREADS=N cargo bench --features bench ``` where N is the number of threads you want to use (N = 1 for single-thread). diff --git a/plonk/Cargo.toml b/plonk/Cargo.toml index 018ee5267..f0ae37267 100644 --- a/plonk/Cargo.toml +++ b/plonk/Cargo.toml @@ -51,6 +51,8 @@ path = "benches/bench.rs" harness = false [features] -std = [] +std = [ ] # exposing apis for testing purpose test_apis = [] +# enabling mircobench +bench = [] \ No newline at end of file diff --git a/plonk/benches/bench.rs b/plonk/benches/bench.rs index 19004b3fe..20e2f65e2 100644 --- a/plonk/benches/bench.rs +++ b/plonk/benches/bench.rs @@ -5,7 +5,7 @@ // along with the Jellyfish library. If not, see . // For benchmark, run: -// RAYON_NUM_THREADS=N cargo bench +// RAYON_NUM_THREADS=N cargo bench --features bench // where N is the number of threads you want to use (N = 1 for single-thread). use ark_bls12_377::{Bls12_377, Fr as Fr377}; @@ -14,6 +14,7 @@ use ark_bn254::{Bn254, Fr as Fr254}; use ark_bw6_761::{Fr as Fr761, BW6_761}; use ark_ff::PrimeField; use jf_plonk::{ + bencher::{init_timers, total_fft_time, total_msm_time, total_poly_eval_time}, circuit::{Circuit, PlonkCircuit}, errors::PlonkError, proof_system::{PlonkKzgSnark, Snark}, @@ -46,6 +47,8 @@ fn gen_circuit_for_bench( macro_rules! plonk_prove_bench { ($bench_curve:ty, $bench_field:ty, $bench_plonk_type:expr, $num_gates:expr) => { + init_timers(); + let rng = &mut ark_std::test_rng(); let cs = gen_circuit_for_bench::<$bench_field>($num_gates, $bench_plonk_type).unwrap(); @@ -62,13 +65,31 @@ macro_rules! plonk_prove_bench { ) .unwrap(); } - + println!("====================================="); println!( - "proving time for {}, {}: {} ns/gate", + "proving time for {}, {} with dim {}: {} ns/gate", stringify!($bench_curve), stringify!($bench_plonk_type), + $num_gates, start.elapsed().as_nanos() / NUM_REPETITIONS as u128 / $num_gates as u128 ); + println!( + "total proving time: {} ns", + start.elapsed().as_nanos() / NUM_REPETITIONS as u128 + ); + println!( + "time spend on FFT: {} ns", + total_fft_time().as_nanos() / NUM_REPETITIONS as u128 + ); + println!( + "time spend on MSM: {} ns", + total_msm_time().as_nanos() / NUM_REPETITIONS as u128 + ); + println!( + "time spend on poly evaluation: {} ns", + total_poly_eval_time().as_nanos() / NUM_REPETITIONS as u128 + ); + println!("====================================="); }; } @@ -85,6 +106,8 @@ fn bench_prove() { macro_rules! plonk_verify_bench { ($bench_curve:ty, $bench_field:ty, $bench_plonk_type:expr, $num_gates:expr) => { + init_timers(); + let rng = &mut ark_std::test_rng(); let cs = gen_circuit_for_bench::<$bench_field>($num_gates, $bench_plonk_type).unwrap(); @@ -104,13 +127,28 @@ macro_rules! plonk_verify_bench { PlonkKzgSnark::<$bench_curve>::verify::(&vk, &[], &proof, None) .unwrap(); } - + println!("====================================="); println!( - "verifying time for {}, {}: {} ns", + "verifying time for {}, {} with dim {}: {} ns", stringify!($bench_curve), stringify!($bench_plonk_type), + $num_gates, start.elapsed().as_nanos() / NUM_REPETITIONS as u128 ); + println!( + "total verify time: {} ns", + start.elapsed().as_nanos() / NUM_REPETITIONS as u128 + ); + println!( + "time spend on FFT: {} ns", + total_fft_time().as_nanos() / NUM_REPETITIONS as u128 + ); + println!( + "time spend on MSM: {} ns", + total_msm_time().as_nanos() / NUM_REPETITIONS as u128 + ); + + println!("====================================="); }; } @@ -127,6 +165,8 @@ fn bench_verify() { macro_rules! plonk_batch_verify_bench { ($bench_curve:ty, $bench_field:ty, $bench_plonk_type:expr, $num_proofs:expr) => { + init_timers(); + let rng = &mut ark_std::test_rng(); let cs = gen_circuit_for_bench::<$bench_field>(1024, $bench_plonk_type).unwrap(); @@ -163,6 +203,19 @@ macro_rules! plonk_batch_verify_bench { stringify!($num_proofs), start.elapsed().as_nanos() / NUM_REPETITIONS as u128 / $num_proofs as u128 ); + + println!( + "total batch verify time: {} ns", + start.elapsed().as_nanos() / NUM_REPETITIONS as u128 + ); + println!( + "time spend on FFT: {} ns", + total_fft_time().as_nanos() / NUM_REPETITIONS as u128 + ); + println!( + "time spend on MSM: {} ns", + total_msm_time().as_nanos() / NUM_REPETITIONS as u128 + ); }; } @@ -177,8 +230,17 @@ fn bench_batch_verify() { plonk_batch_verify_bench!(BW6_761, Fr761, PlonkType::UltraPlonk, 1000); } +fn bench_intense() { + for i in 10..=30 { + let dim = 1 << i; + plonk_prove_bench!(Bls12_377, Fr377, PlonkType::TurboPlonk, dim); + plonk_verify_bench!(Bls12_377, Fr377, PlonkType::TurboPlonk, dim); + } +} + fn main() { bench_prove(); bench_verify(); bench_batch_verify(); + bench_intense(); } diff --git a/plonk/src/bencher.rs b/plonk/src/bencher.rs new file mode 100644 index 000000000..46db6c165 --- /dev/null +++ b/plonk/src/bencher.rs @@ -0,0 +1,184 @@ +//! Helper functions for micro-benchmarks + +use ark_std::{thread_local, time::Instant}; +use core::{cell::RefCell, time::Duration}; + +thread_local!(static FFT_START_TIME: RefCell = RefCell::new(Instant::now())); +thread_local!(static FFT_TIMER_LOCK: RefCell = RefCell::new(false)); +thread_local!(static FFT_TOTAL_TIME: RefCell = RefCell::new(Duration::ZERO)); + +thread_local!(static MSM_START_TIME: RefCell = RefCell::new(Instant::now())); +thread_local!(static MSM_TIMER_LOCK: RefCell = RefCell::new(false)); +thread_local!(static MSM_TOTAL_TIME: RefCell = RefCell::new(Duration::ZERO)); + +thread_local!(static POLY_EVAL_START_TIME: RefCell = RefCell::new(Instant::now())); +thread_local!(static POLY_EVAL_TIMER_LOCK: RefCell = RefCell::new(false)); +thread_local!(static POLY_EVAL_TOTAL_TIME: RefCell = RefCell::new(Duration::ZERO)); + +/// Initializing the timers +#[inline] +pub fn init_timers() { + #[cfg(feature = "bench")] + { + FFT_TOTAL_TIME.with(|timer| { + *timer.borrow_mut() = Duration::ZERO; + }); + FFT_TIMER_LOCK.with(|lock| { + *lock.borrow_mut() = false; + }); + MSM_TOTAL_TIME.with(|timer| { + *timer.borrow_mut() = Duration::ZERO; + }); + MSM_TIMER_LOCK.with(|lock| { + *lock.borrow_mut() = false; + }); + POLY_EVAL_TOTAL_TIME.with(|timer| { + *timer.borrow_mut() = Duration::ZERO; + }); + POLY_EVAL_TIMER_LOCK.with(|lock| { + *lock.borrow_mut() = false; + }); + } +} + +/// Get the total time that we have spend on FFT related computations +#[inline] +pub fn total_fft_time() -> Duration { + #[cfg(feature = "bench")] + { + FFT_TOTAL_TIME.with(|duration| *duration.borrow()) + } + #[cfg(not(feature = "bench"))] + Duration::ZERO +} + +/// Get the total time that we have spend on MSM related computations +#[inline] +pub fn total_msm_time() -> Duration { + #[cfg(feature = "bench")] + { + MSM_TOTAL_TIME.with(|duration| *duration.borrow()) + } + #[cfg(not(feature = "bench"))] + Duration::ZERO +} + +/// Get the total time that we have spend on polynomial evaluations +#[inline] +pub fn total_poly_eval_time() -> Duration { + #[cfg(feature = "bench")] + { + POLY_EVAL_TOTAL_TIME.with(|duration| *duration.borrow()) + } + #[cfg(not(feature = "bench"))] + Duration::ZERO +} + +#[inline] +pub(crate) fn fft_start() { + #[cfg(feature = "bench")] + { + if FFT_TIMER_LOCK.with(|lock| *lock.borrow()) { + panic!("another FFT timer has already started somewhere else"); + } + + FFT_START_TIME.with(|timer| { + *timer.borrow_mut() = Instant::now(); + }); + + FFT_TIMER_LOCK.with(|lock| { + *lock.borrow_mut() = true; + }) + } +} + +#[inline] +pub(crate) fn fft_end() { + #[cfg(feature = "bench")] + { + if !FFT_TIMER_LOCK.with(|lock| *lock.borrow()) { + panic!("FFT timer has not started yet"); + } + + let start_time = FFT_START_TIME.with(|timer| *timer.borrow()); + let end_time = Instant::now(); + FFT_TOTAL_TIME.with(|duration| { + *duration.borrow_mut() += end_time - start_time; + }); + FFT_TIMER_LOCK.with(|lock| { + *lock.borrow_mut() = false; + }) + } +} + +#[inline] +pub(crate) fn msm_start() { + #[cfg(feature = "bench")] + { + if MSM_TIMER_LOCK.with(|lock| *lock.borrow()) { + panic!("another MSM timer has already started somewhere else"); + } + + MSM_START_TIME.with(|timer| { + *timer.borrow_mut() = Instant::now(); + }); + + MSM_TIMER_LOCK.with(|lock| { + *lock.borrow_mut() = true; + }) + } +} + +#[inline] +pub(crate) fn msm_end() { + #[cfg(feature = "bench")] + { + if !MSM_TIMER_LOCK.with(|lock| *lock.borrow()) { + panic!("MSM timer has not started yet"); + } + let start_time = MSM_START_TIME.with(|timer| *timer.borrow()); + let end_time = Instant::now(); + MSM_TOTAL_TIME.with(|duration| { + *duration.borrow_mut() += end_time - start_time; + }); + MSM_TIMER_LOCK.with(|lock| { + *lock.borrow_mut() = false; + }) + } +} + +#[inline] +pub(crate) fn poly_eval_start() { + #[cfg(feature = "bench")] + { + if POLY_EVAL_TIMER_LOCK.with(|lock| *lock.borrow()) { + panic!("another poly eval timer has already started somewhere else"); + } + + POLY_EVAL_START_TIME.with(|timer| { + *timer.borrow_mut() = Instant::now(); + }); + + POLY_EVAL_TIMER_LOCK.with(|lock| { + *lock.borrow_mut() = true; + }) + } +} + +#[inline] +pub(crate) fn poly_eval_end() { + #[cfg(feature = "bench")] + { + if !POLY_EVAL_TIMER_LOCK.with(|lock| *lock.borrow()) { + panic!("poly eval timer has not started yet"); + } + let start_time = POLY_EVAL_START_TIME.with(|timer| *timer.borrow()); + let end_time = Instant::now(); + POLY_EVAL_TOTAL_TIME.with(|duration| { + *duration.borrow_mut() += end_time - start_time; + }); + POLY_EVAL_TIMER_LOCK.with(|lock| { + *lock.borrow_mut() = false; + }) + } +} diff --git a/plonk/src/circuit/basic.rs b/plonk/src/circuit/basic.rs index 8c8cc3c16..1d8100893 100644 --- a/plonk/src/circuit/basic.rs +++ b/plonk/src/circuit/basic.rs @@ -7,6 +7,7 @@ //! Basic instantiations of Plonk-based constraint systems use super::{Arithmetization, Circuit, GateId, Variable, WireId}; use crate::{ + bencher::{fft_end, fft_start}, circuit::{gates::*, SortedLookupVecAndPolys}, constants::{compute_coset_representatives, GATE_WIDTH, N_MUL_SELECTORS}, errors::{CircuitError::*, PlonkError}, @@ -1059,6 +1060,8 @@ where } fn compute_selector_polynomials(&self) -> Result>, PlonkError> { + fft_start(); + self.check_finalize_flag(true)?; let domain = &self.eval_domain; if domain.size() < self.num_gates() { @@ -1074,12 +1077,16 @@ where .map(|selector| DensePolynomial::from_coefficients_vec(domain.ifft(selector))) .collect(); + fft_end(); + Ok(selector_polys) } fn compute_extended_permutation_polynomials( &self, ) -> Result>, PlonkError> { + fft_start(); + self.check_finalize_flag(true)?; let domain = &self.eval_domain; let n = domain.size(); @@ -1092,6 +1099,8 @@ where ) }) .collect(); + fft_end(); + Ok(extended_perm_polys) } @@ -1100,6 +1109,8 @@ where beta: &F, gamma: &F, ) -> Result, PlonkError> { + fft_start(); + self.check_finalize_flag(true)?; let mut product_vec = vec![F::one()]; let domain = &self.eval_domain; @@ -1119,10 +1130,14 @@ where product_vec.push(prev_prod * a / b); } domain.ifft_in_place(&mut product_vec); - Ok(DensePolynomial::from_coefficients_vec(product_vec)) + + let res = DensePolynomial::from_coefficients_vec(product_vec); + fft_end(); + Ok(res) } fn compute_wire_polynomials(&self) -> Result>, PlonkError> { + fft_start(); self.check_finalize_flag(true)?; let domain = &self.eval_domain; if domain.size() < self.num_gates() { @@ -1145,10 +1160,13 @@ where }) .collect(); assert_eq!(wire_polys.len(), self.num_wire_types()); + fft_end(); Ok(wire_polys) } fn compute_pub_input_polynomial(&self) -> Result, PlonkError> { + fft_start(); + self.check_finalize_flag(true)?; let domain = &self.eval_domain; let mut pub_input_vec = vec![F::zero(); domain.size()]; @@ -1157,25 +1175,31 @@ where pub_input_vec[io_gate_id] = self.witness[var]; }); domain.ifft_in_place(&mut pub_input_vec); - Ok(DensePolynomial::from_coefficients_vec(pub_input_vec)) + let res = DensePolynomial::from_coefficients_vec(pub_input_vec); + fft_end(); + Ok(res) } // Plookup-related methods // fn compute_range_table_polynomial(&self) -> Result, PlonkError> { + fft_start(); let range_table = self.compute_range_table()?; let domain = &self.eval_domain; - Ok(DensePolynomial::from_coefficients_vec( - domain.ifft(&range_table), - )) + + let res = DensePolynomial::from_coefficients_vec(domain.ifft(&range_table)); + fft_end(); + Ok(res) } fn compute_key_table_polynomial(&self) -> Result, PlonkError> { + fft_start(); let key_table = self.compute_key_table()?; let domain = &self.eval_domain; - Ok(DensePolynomial::from_coefficients_vec( - domain.ifft(&key_table), - )) + + let res = DensePolynomial::from_coefficients_vec(domain.ifft(&key_table)); + fft_end(); + Ok(res) } fn compute_merged_lookup_table(&self, tau: F) -> Result, PlonkError> { @@ -1252,8 +1276,12 @@ where product_vec.push(prev_prod * a / b); } product_vec.push(F::one()); + + fft_start(); domain.ifft_in_place(&mut product_vec); - Ok(DensePolynomial::from_coefficients_vec(product_vec)) + let res = DensePolynomial::from_coefficients_vec(product_vec); + fft_end(); + Ok(res) } fn compute_lookup_sorted_vec_polynomials( @@ -1301,8 +1329,11 @@ where if sorted_vec.len() != 2 * n - 1 { return Err(ParameterError("The sorted vector has wrong length, some lookup variables might be outside the table".to_string()).into()); } + + fft_start(); let h1_poly = DensePolynomial::from_coefficients_vec(domain.ifft(&sorted_vec[..n])); let h2_poly = DensePolynomial::from_coefficients_vec(domain.ifft(&sorted_vec[n - 1..])); + fft_end(); Ok((sorted_vec, h1_poly, h2_poly)) } } diff --git a/plonk/src/lib.rs b/plonk/src/lib.rs index 4020ea392..25b6139ac 100644 --- a/plonk/src/lib.rs +++ b/plonk/src/lib.rs @@ -18,6 +18,7 @@ extern crate downcast_rs; #[macro_use] extern crate derivative; +pub mod bencher; pub mod circuit; pub mod constants; pub mod errors; diff --git a/plonk/src/proof_system/prover.rs b/plonk/src/proof_system/prover.rs index fbc0ea40d..ed997bb26 100644 --- a/plonk/src/proof_system/prover.rs +++ b/plonk/src/proof_system/prover.rs @@ -11,6 +11,7 @@ use super::structs::{ PlookupOracles, ProofEvaluations, ProvingKey, }; use crate::{ + bencher::{fft_end, fft_start, msm_end, msm_start, poly_eval_end, poly_eval_start}, circuit::Arithmetization, constants::{domain_size_ratio, GATE_WIDTH}, errors::{PlonkError, SnarkError::*}, @@ -190,6 +191,9 @@ impl Prover { online_oracles: &Oracles, num_wire_types: usize, ) -> ProofEvaluations { + // TODO: a potential optimization -- dense polynomial evaluations re-computed + // powers-of-zetas consider pre-compute them and pass them in + poly_eval_start(); let wires_evals: Vec = online_oracles .wire_polys .par_iter() @@ -205,6 +209,7 @@ impl Prover { .prod_perm_poly .evaluate(&(challenges.zeta * self.domain.group_gen)); + poly_eval_end(); ProofEvaluations { wires_evals, wire_sigma_evals, @@ -220,6 +225,8 @@ impl Prover { challenges: &Challenges, online_oracles: &Oracles, ) -> Result, PlonkError> { + poly_eval_start(); + if pk.plookup_pk.is_none() { return Err(ParameterError( "Evaluate Plookup polynomials without supporting lookup".to_string(), @@ -241,6 +248,8 @@ impl Prover { let h_1_eval = online_oracles.plookup_oracles.h_polys[0].evaluate(&challenges.zeta); let q_lookup_eval = pk.q_lookup_poly()?.evaluate(&challenges.zeta); + // TODO: a potential optimization -- dense polynomial evaluations re-computed + // powers-of-gs consider pre-compute them and pass them in let zeta_mul_g = challenges.zeta * self.domain.group_gen; let prod_next_eval = online_oracles .plookup_oracles @@ -254,6 +263,7 @@ impl Prover { let w_3_next_eval = online_oracles.wire_polys[3].evaluate(&zeta_mul_g); let w_4_next_eval = online_oracles.wire_polys[4].evaluate(&zeta_mul_g); + poly_eval_end(); Ok(PlookupEvaluations { range_table_eval, key_table_eval, @@ -458,7 +468,9 @@ impl Prover { ck: &CommitKey, poly: &DensePolynomial, ) -> Result, PlonkError> { + msm_start(); let (poly_comm, _) = KZG10::commit(ck, poly, None, None).map_err(PlonkError::PcsError)?; + msm_end(); Ok(poly_comm) } @@ -521,10 +533,12 @@ impl Prover { let alpha_3 = challenges.alpha.square() * challenges.alpha; let alpha_7 = alpha_3.square() * challenges.alpha; // enumerate proving instances + for (oracles, pk) in online_oracles.iter().zip(pks.iter()) { // lookup_flag = 1 if support Plookup argument. let lookup_flag = pk.plookup_pk.is_some(); + fft_start(); // Compute coset evaluations. let selectors_coset_fft: Vec> = pk .selectors @@ -581,6 +595,8 @@ impl Prover { (None, None, None, None) }; + fft_end(); + // Compute coset evaluations of the quotient polynomial. let quot_poly_coset_evals: Vec = (0..m) .into_par_iter() @@ -646,9 +662,12 @@ impl Prover { } } // Compute the coefficient form of the quotient polynomial - Ok(DensePolynomial::from_coefficients_vec( + fft_start(); + let res = DensePolynomial::from_coefficients_vec( self.quot_domain.coset_ifft("_poly_coset_evals_sum), - )) + ); + fft_end(); + Ok(res) } // Compute the i-th coset evaluation of the circuit part of the quotient diff --git a/plonk/src/proof_system/structs.rs b/plonk/src/proof_system/structs.rs index 9a6f2d33a..1ac67f8f4 100644 --- a/plonk/src/proof_system/structs.rs +++ b/plonk/src/proof_system/structs.rs @@ -6,6 +6,7 @@ //! Data structures used in Plonk proof systems use crate::{ + bencher::{msm_end, msm_start}, circuit::{ customized::{ ecc::{Point, SWToTEConParam}, @@ -866,13 +867,16 @@ impl ScalarsAndBases { } /// Compute the multi-scalar multiplication. pub(crate) fn multi_scalar_mul(&self) -> E::G1Projective { + msm_start(); let mut bases = vec![]; let mut scalars = vec![]; for (&base, scalar) in &self.base_scalar_map { bases.push(base); scalars.push(scalar.into_repr()); } - VariableBaseMSM::multi_scalar_mul(&bases, &scalars) + let res = VariableBaseMSM::multi_scalar_mul(&bases, &scalars); + msm_end(); + res } } From b68b1f3cee481882d9cab9b7696d7cd6ebaa1232 Mon Sep 17 00:00:00 2001 From: zhenfei Date: Tue, 1 Mar 2022 12:21:52 -0500 Subject: [PATCH 2/6] add quotient poly eval to fft bench --- plonk/src/proof_system/prover.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/plonk/src/proof_system/prover.rs b/plonk/src/proof_system/prover.rs index ed997bb26..8e43e54b7 100644 --- a/plonk/src/proof_system/prover.rs +++ b/plonk/src/proof_system/prover.rs @@ -533,12 +533,12 @@ impl Prover { let alpha_3 = challenges.alpha.square() * challenges.alpha; let alpha_7 = alpha_3.square() * challenges.alpha; // enumerate proving instances - + fft_start(); for (oracles, pk) in online_oracles.iter().zip(pks.iter()) { // lookup_flag = 1 if support Plookup argument. let lookup_flag = pk.plookup_pk.is_some(); - fft_start(); + // fft_start(); // Compute coset evaluations. let selectors_coset_fft: Vec> = pk .selectors @@ -595,7 +595,7 @@ impl Prover { (None, None, None, None) }; - fft_end(); + // fft_end(); // Compute coset evaluations of the quotient polynomial. let quot_poly_coset_evals: Vec = (0..m) @@ -662,7 +662,7 @@ impl Prover { } } // Compute the coefficient form of the quotient polynomial - fft_start(); + // fft_start(); let res = DensePolynomial::from_coefficients_vec( self.quot_domain.coset_ifft("_poly_coset_evals_sum), ); From 7921af5a138de0c91165d6b00c73a4b099c95c72 Mon Sep 17 00:00:00 2001 From: zhenfei Date: Tue, 1 Mar 2022 16:02:28 -0500 Subject: [PATCH 3/6] refine msm bench --- plonk/benches/bench.rs | 11 +++++++---- plonk/src/proof_system/prover.rs | 19 ++++++++++++++----- scripts/run_mt_bench.sh | 10 ++++++++++ 3 files changed, 31 insertions(+), 9 deletions(-) create mode 100755 scripts/run_mt_bench.sh diff --git a/plonk/benches/bench.rs b/plonk/benches/bench.rs index 20e2f65e2..a876bafcb 100644 --- a/plonk/benches/bench.rs +++ b/plonk/benches/bench.rs @@ -209,12 +209,14 @@ macro_rules! plonk_batch_verify_bench { start.elapsed().as_nanos() / NUM_REPETITIONS as u128 ); println!( - "time spend on FFT: {} ns", - total_fft_time().as_nanos() / NUM_REPETITIONS as u128 + "time spend on FFT: {} ns, or {}%", + total_fft_time().as_nanos() / NUM_REPETITIONS as u128, + 100f64 * total_fft_time().as_nanos() as f64 / start.elapsed().as_nanos() as f64 ); println!( - "time spend on MSM: {} ns", - total_msm_time().as_nanos() / NUM_REPETITIONS as u128 + "time spend on MSM: {} ns, or {}%", + total_msm_time().as_nanos() / NUM_REPETITIONS as u128, + 100f64 * total_fft_time().as_nanos() as f64 / start.elapsed().as_nanos() as f64 ); }; } @@ -233,6 +235,7 @@ fn bench_batch_verify() { fn bench_intense() { for i in 10..=30 { let dim = 1 << i; + println!("bench with log(dim) = {}", i); plonk_prove_bench!(Bls12_377, Fr377, PlonkType::TurboPlonk, dim); plonk_verify_bench!(Bls12_377, Fr377, PlonkType::TurboPlonk, dim); } diff --git a/plonk/src/proof_system/prover.rs b/plonk/src/proof_system/prover.rs index 8e43e54b7..5bbe2592d 100644 --- a/plonk/src/proof_system/prover.rs +++ b/plonk/src/proof_system/prover.rs @@ -81,7 +81,9 @@ impl Prover { .into_iter() .map(|poly| self.mask_polynomial(prng, poly, 1)) .collect(); + msm_start(); let wires_poly_comms = Self::commit_polynomials(ck, &wire_polys)?; + msm_end(); let pub_input_poly = cs.compute_pub_input_polynomial()?; Ok(((wires_poly_comms, wire_polys), pub_input_poly)) } @@ -105,7 +107,9 @@ impl Prover { let h_1_poly = self.mask_polynomial(prng, h_1_poly, 2); let h_2_poly = self.mask_polynomial(prng, h_2_poly, 2); let h_polys = vec![h_1_poly, h_2_poly]; + msm_start(); let h_poly_comms = Self::commit_polynomials(ck, &h_polys)?; + msm_end(); Ok(((h_poly_comms, h_polys), sorted_vec, merged_lookup_table)) } @@ -123,7 +127,9 @@ impl Prover { cs.compute_prod_permutation_polynomial(&challenges.beta, &challenges.gamma)?, 2, ); + msm_start(); let prod_perm_comm = Self::commit_polynomial(ck, &prod_perm_poly)?; + msm_end(); Ok((prod_perm_comm, prod_perm_poly)) } @@ -156,7 +162,9 @@ impl Prover { )?, 2, ); + msm_start(); let prod_lookup_comm = Self::commit_polynomial(ck, &prod_lookup_poly)?; + msm_end(); Ok((prod_lookup_comm, prod_lookup_poly)) } @@ -174,8 +182,9 @@ impl Prover { let quot_poly = self.compute_quotient_polynomial(challenges, pks, online_oracles, num_wire_types)?; let split_quot_polys = self.split_quotient_polynomial("_poly, num_wire_types)?; + msm_start(); let split_quot_poly_comms = Self::commit_polynomials(ck, &split_quot_polys)?; - + msm_end(); Ok((split_quot_poly_comms, split_quot_polys)) } @@ -468,9 +477,7 @@ impl Prover { ck: &CommitKey, poly: &DensePolynomial, ) -> Result, PlonkError> { - msm_start(); let (poly_comm, _) = KZG10::commit(ck, poly, None, None).map_err(PlonkError::PcsError)?; - msm_end(); Ok(poly_comm) } @@ -495,8 +502,10 @@ impl Prover { *eval_point, &empty_rand, )?; - - Self::commit_polynomial(ck, &witness_poly) + msm_start(); + let res = Self::commit_polynomial(ck, &witness_poly); + msm_end(); + res } /// Compute the quotient polynomial via (i)FFTs. diff --git a/scripts/run_mt_bench.sh b/scripts/run_mt_bench.sh new file mode 100755 index 000000000..c3178c137 --- /dev/null +++ b/scripts/run_mt_bench.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +RAYON_NUM_THREADS=64 cargo bench --features=bench > 64core.txt +RAYON_NUM_THREADS=32 cargo bench --features=bench > 32core.txt +RAYON_NUM_THREADS=16 cargo bench --features=bench > 16core.txt +RAYON_NUM_THREADS=8 cargo bench --features=bench > 8core.txt +RAYON_NUM_THREADS=4 cargo bench --features=bench > 4core.txt + + + From a727877eaa04a2db81badca5a430e0d52a745f05 Mon Sep 17 00:00:00 2001 From: zhenfei Date: Tue, 1 Mar 2022 16:23:24 -0500 Subject: [PATCH 4/6] optimize tests --- plonk/benches/bench.rs | 62 ++++++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 26 deletions(-) diff --git a/plonk/benches/bench.rs b/plonk/benches/bench.rs index a876bafcb..0582190f3 100644 --- a/plonk/benches/bench.rs +++ b/plonk/benches/bench.rs @@ -74,20 +74,23 @@ macro_rules! plonk_prove_bench { start.elapsed().as_nanos() / NUM_REPETITIONS as u128 / $num_gates as u128 ); println!( - "total proving time: {} ns", - start.elapsed().as_nanos() / NUM_REPETITIONS as u128 + "total batch verify time: {:.2} ms", + start.elapsed().as_nanos() as f64 / NUM_REPETITIONS as f64 / 1_000_000f64 ); println!( - "time spend on FFT: {} ns", - total_fft_time().as_nanos() / NUM_REPETITIONS as u128 + "time spend on FFT: {:.2} ms, or {:.2}%", + total_fft_time().as_nanos() as f64 / NUM_REPETITIONS as f64 / 1_000_000f64, + 100f64 * total_fft_time().as_nanos() as f64 / start.elapsed().as_nanos() as f64 ); println!( - "time spend on MSM: {} ns", - total_msm_time().as_nanos() / NUM_REPETITIONS as u128 + "time spend on MSM: {:.2} ms, or {:.2}%", + total_msm_time().as_nanos() as f64 / NUM_REPETITIONS as f64 / 1_000_000f64, + 100f64 * total_msm_time().as_nanos() as f64 / start.elapsed().as_nanos() as f64 ); println!( - "time spend on poly evaluation: {} ns", - total_poly_eval_time().as_nanos() / NUM_REPETITIONS as u128 + "time spend on poly evaluation: {:.2} ms, or {:.2}%", + total_poly_eval_time().as_nanos() as f64 / NUM_REPETITIONS as f64 / 1_000_000f64, + 100f64 * total_poly_eval_time().as_nanos() as f64 / start.elapsed().as_nanos() as f64 ); println!("====================================="); }; @@ -106,8 +109,6 @@ fn bench_prove() { macro_rules! plonk_verify_bench { ($bench_curve:ty, $bench_field:ty, $bench_plonk_type:expr, $num_gates:expr) => { - init_timers(); - let rng = &mut ark_std::test_rng(); let cs = gen_circuit_for_bench::<$bench_field>($num_gates, $bench_plonk_type).unwrap(); @@ -120,6 +121,7 @@ macro_rules! plonk_verify_bench { PlonkKzgSnark::<$bench_curve>::prove::<_, _, StandardTranscript>(rng, &cs, &pk, None) .unwrap(); + init_timers(); let start = ark_std::time::Instant::now(); for _ in 0..NUM_REPETITIONS { @@ -136,16 +138,18 @@ macro_rules! plonk_verify_bench { start.elapsed().as_nanos() / NUM_REPETITIONS as u128 ); println!( - "total verify time: {} ns", - start.elapsed().as_nanos() / NUM_REPETITIONS as u128 + "total batch verify time: {:.2} ms", + start.elapsed().as_nanos() as f64 / NUM_REPETITIONS as f64 / 1_000_000f64 ); println!( - "time spend on FFT: {} ns", - total_fft_time().as_nanos() / NUM_REPETITIONS as u128 + "time spend on FFT: {:.2} ms, or {:.2}%", + total_fft_time().as_nanos() as f64 / NUM_REPETITIONS as f64 / 1_000_000f64, + 100f64 * total_fft_time().as_nanos() as f64 / start.elapsed().as_nanos() as f64 ); println!( - "time spend on MSM: {} ns", - total_msm_time().as_nanos() / NUM_REPETITIONS as u128 + "time spend on MSM: {:.2} ms, or {:.2}%", + total_msm_time().as_nanos() as f64 / NUM_REPETITIONS as f64 / 1_000_000f64, + 100f64 * total_msm_time().as_nanos() as f64 / start.elapsed().as_nanos() as f64 ); println!("====================================="); @@ -205,18 +209,18 @@ macro_rules! plonk_batch_verify_bench { ); println!( - "total batch verify time: {} ns", - start.elapsed().as_nanos() / NUM_REPETITIONS as u128 + "total batch verify time: {:.2} ms", + start.elapsed().as_nanos() as f64 / NUM_REPETITIONS as f64 / 1_000_000f64 ); println!( - "time spend on FFT: {} ns, or {}%", - total_fft_time().as_nanos() / NUM_REPETITIONS as u128, + "time spend on FFT: {:.2} ms, or {:.2}%", + total_fft_time().as_nanos() as f64 / NUM_REPETITIONS as f64 / 1_000_000f64, 100f64 * total_fft_time().as_nanos() as f64 / start.elapsed().as_nanos() as f64 ); println!( - "time spend on MSM: {} ns, or {}%", - total_msm_time().as_nanos() / NUM_REPETITIONS as u128, - 100f64 * total_fft_time().as_nanos() as f64 / start.elapsed().as_nanos() as f64 + "time spend on MSM: {:.2} ms, or {:.2}%", + total_msm_time().as_nanos() as f64 / NUM_REPETITIONS as f64 / 1_000_000f64, + 100f64 * total_msm_time().as_nanos() as f64 / start.elapsed().as_nanos() as f64 ); }; } @@ -237,13 +241,19 @@ fn bench_intense() { let dim = 1 << i; println!("bench with log(dim) = {}", i); plonk_prove_bench!(Bls12_377, Fr377, PlonkType::TurboPlonk, dim); + } + + for i in 10..=30 { + let dim = 1 << i; + println!("bench with log(dim) = {}", i); plonk_verify_bench!(Bls12_377, Fr377, PlonkType::TurboPlonk, dim); } } fn main() { - bench_prove(); - bench_verify(); - bench_batch_verify(); + // temporarily disable the first three benches for cloud bench + // bench_prove(); + // bench_verify(); + // bench_batch_verify(); bench_intense(); } From a004078b2c32f75fdc8a17050dda66ad61d5f08b Mon Sep 17 00:00:00 2001 From: zhenfei Date: Tue, 1 Mar 2022 17:08:09 -0500 Subject: [PATCH 5/6] optimize bench and add logs --- plonk/benches/bench.rs | 78 ++++++++++++++++++++++++++++++++++++++--- scripts/run_mt_bench.sh | 12 ++++--- 2 files changed, 80 insertions(+), 10 deletions(-) diff --git a/plonk/benches/bench.rs b/plonk/benches/bench.rs index 0582190f3..929ba79fa 100644 --- a/plonk/benches/bench.rs +++ b/plonk/benches/bench.rs @@ -13,6 +13,7 @@ use ark_bls12_381::{Bls12_381, Fr as Fr381}; use ark_bn254::{Bn254, Fr as Fr254}; use ark_bw6_761::{Fr as Fr761, BW6_761}; use ark_ff::PrimeField; +use ark_std::{fs::File, io::Write}; use jf_plonk::{ bencher::{init_timers, total_fft_time, total_msm_time, total_poly_eval_time}, circuit::{Circuit, PlonkCircuit}, @@ -47,8 +48,56 @@ fn gen_circuit_for_bench( macro_rules! plonk_prove_bench { ($bench_curve:ty, $bench_field:ty, $bench_plonk_type:expr, $num_gates:expr) => { + let rng = &mut ark_std::test_rng(); + let cs = gen_circuit_for_bench::<$bench_field>($num_gates, $bench_plonk_type).unwrap(); + + let max_degree = $num_gates + 2; + let srs = PlonkKzgSnark::<$bench_curve>::universal_setup(max_degree, rng).unwrap(); + + let (pk, _) = PlonkKzgSnark::<$bench_curve>::preprocess(&srs, &cs).unwrap(); + init_timers(); + let start = ark_std::time::Instant::now(); + + for _ in 0..NUM_REPETITIONS { + let _ = PlonkKzgSnark::<$bench_curve>::prove::<_, _, StandardTranscript>( + rng, &cs, &pk, None, + ) + .unwrap(); + } + println!("====================================="); + println!( + "proving time for {}, {} with dim {}: {} ns/gate", + stringify!($bench_curve), + stringify!($bench_plonk_type), + $num_gates, + start.elapsed().as_nanos() / NUM_REPETITIONS as u128 / $num_gates as u128 + ); + println!( + "total batch verify time: {:.2} ms", + start.elapsed().as_nanos() as f64 / NUM_REPETITIONS as f64 / 1_000_000f64 + ); + println!( + "time spend on FFT: {:.2} ms, or {:.2}%", + total_fft_time().as_nanos() as f64 / NUM_REPETITIONS as f64 / 1_000_000f64, + 100f64 * total_fft_time().as_nanos() as f64 / start.elapsed().as_nanos() as f64 + ); + println!( + "time spend on MSM: {:.2} ms, or {:.2}%", + total_msm_time().as_nanos() as f64 / NUM_REPETITIONS as f64 / 1_000_000f64, + 100f64 * total_msm_time().as_nanos() as f64 / start.elapsed().as_nanos() as f64 + ); + println!( + "time spend on poly evaluation: {:.2} ms, or {:.2}%", + total_poly_eval_time().as_nanos() as f64 / NUM_REPETITIONS as f64 / 1_000_000f64, + 100f64 * total_poly_eval_time().as_nanos() as f64 / start.elapsed().as_nanos() as f64 + ); + println!("====================================="); + }; +} +macro_rules! plonk_prove_mt_bench { + ($bench_curve:ty, $bench_field:ty, $bench_plonk_type:expr, $num_gates:expr, $file:expr) => { let rng = &mut ark_std::test_rng(); let cs = gen_circuit_for_bench::<$bench_field>($num_gates, $bench_plonk_type).unwrap(); @@ -57,6 +106,7 @@ macro_rules! plonk_prove_bench { let (pk, _) = PlonkKzgSnark::<$bench_curve>::preprocess(&srs, &cs).unwrap(); + init_timers(); let start = ark_std::time::Instant::now(); for _ in 0..NUM_REPETITIONS { @@ -93,6 +143,20 @@ macro_rules! plonk_prove_bench { 100f64 * total_poly_eval_time().as_nanos() as f64 / start.elapsed().as_nanos() as f64 ); println!("====================================="); + $file + .write_all( + format!( + "{} {:.2} {:.2} {:.2} {:.2} {:.2}\n", + $num_gates, + start.elapsed().as_nanos() as f64 / NUM_REPETITIONS as f64 / 1_000_000f64, + total_fft_time().as_nanos() as f64 / NUM_REPETITIONS as f64 / 1_000_000f64, + 100f64 * total_fft_time().as_nanos() as f64 / start.elapsed().as_nanos() as f64, + total_msm_time().as_nanos() as f64 / NUM_REPETITIONS as f64 / 1_000_000f64, + 100f64 * total_msm_time().as_nanos() as f64 / start.elapsed().as_nanos() as f64, + ) + .as_ref(), + ) + .expect("Unable to write data"); }; } @@ -169,8 +233,6 @@ fn bench_verify() { macro_rules! plonk_batch_verify_bench { ($bench_curve:ty, $bench_field:ty, $bench_plonk_type:expr, $num_proofs:expr) => { - init_timers(); - let rng = &mut ark_std::test_rng(); let cs = gen_circuit_for_bench::<$bench_field>(1024, $bench_plonk_type).unwrap(); @@ -188,6 +250,7 @@ macro_rules! plonk_batch_verify_bench { let public_inputs_ref = vec![&pub_input[..]; $num_proofs]; let proofs_ref = vec![&proof; $num_proofs]; + init_timers(); let start = ark_std::time::Instant::now(); for _ in 0..NUM_REPETITIONS { @@ -237,10 +300,16 @@ fn bench_batch_verify() { } fn bench_intense() { + let mut f = File::create(format!( + "../target/{}-threads.txt", + rayon::current_num_threads() + )) + .expect("Unable to create file"); + for i in 10..=30 { let dim = 1 << i; println!("bench with log(dim) = {}", i); - plonk_prove_bench!(Bls12_377, Fr377, PlonkType::TurboPlonk, dim); + plonk_prove_mt_bench!(Bls12_377, Fr377, PlonkType::TurboPlonk, dim, f); } for i in 10..=30 { @@ -251,9 +320,8 @@ fn bench_intense() { } fn main() { - // temporarily disable the first three benches for cloud bench + bench_intense(); // bench_prove(); // bench_verify(); // bench_batch_verify(); - bench_intense(); } diff --git a/scripts/run_mt_bench.sh b/scripts/run_mt_bench.sh index c3178c137..f6bdcaa64 100755 --- a/scripts/run_mt_bench.sh +++ b/scripts/run_mt_bench.sh @@ -1,10 +1,12 @@ #!/usr/bin/env bash -RAYON_NUM_THREADS=64 cargo bench --features=bench > 64core.txt -RAYON_NUM_THREADS=32 cargo bench --features=bench > 32core.txt -RAYON_NUM_THREADS=16 cargo bench --features=bench > 16core.txt -RAYON_NUM_THREADS=8 cargo bench --features=bench > 8core.txt -RAYON_NUM_THREADS=4 cargo bench --features=bench > 4core.txt +rm target/*.txt +rm target/*.log +RAYON_NUM_THREADS=64 cargo bench --features=bench > target/64core.log +RAYON_NUM_THREADS=32 cargo bench --features=bench > target/32core.log +RAYON_NUM_THREADS=16 cargo bench --features=bench > target/16core.log +RAYON_NUM_THREADS=8 cargo bench --features=bench > target/8core.log +RAYON_NUM_THREADS=4 cargo bench --features=bench > target/4core.log From 8f9f115e2546519b5623d15a00b5b52e6229fdde Mon Sep 17 00:00:00 2001 From: zhenfei Date: Thu, 3 Mar 2022 15:56:18 -0500 Subject: [PATCH 6/6] clean up --- plonk/benches/bench.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/plonk/benches/bench.rs b/plonk/benches/bench.rs index 929ba79fa..ecee43343 100644 --- a/plonk/benches/bench.rs +++ b/plonk/benches/bench.rs @@ -321,7 +321,7 @@ fn bench_intense() { fn main() { bench_intense(); - // bench_prove(); - // bench_verify(); - // bench_batch_verify(); + bench_prove(); + bench_verify(); + bench_batch_verify(); }