Skip to content

Commit 6974938

Browse files
avi-starkwareclaude
andcommitted
starknet_transaction_prover: proving-job duration + outcome metrics
Adds Prometheus counters / histograms recorded by `VirtualSnosProver` for each proving job: total count by outcome (`success`, `validation_error`, `internal_error`, `l1_provider_error`) and end-to-end duration. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 3790635 commit 6974938

3 files changed

Lines changed: 77 additions & 15 deletions

File tree

crates/starknet_transaction_prover/src/errors.rs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,27 @@ pub enum VirtualSnosProverError {
131131
TransactionBlocked,
132132
}
133133

134+
impl VirtualSnosProverError {
135+
/// Maps the variant to one of the bounded label values declared in
136+
/// `crate::server::metrics::outcomes`. The single match keeps the
137+
/// `prover_prove_transaction_outcome_total{outcome}` cardinality fixed —
138+
/// adding a variant requires a dashboard update at the same time.
139+
pub fn metric_outcome(&self) -> &'static str {
140+
use crate::server::metrics::outcomes;
141+
match self {
142+
VirtualSnosProverError::InvalidTransactionType(_)
143+
| VirtualSnosProverError::InvalidTransactionInput(_)
144+
| VirtualSnosProverError::ValidationError(_) => outcomes::VALIDATION,
145+
VirtualSnosProverError::TransactionBlocked => outcomes::BLOCKED,
146+
VirtualSnosProverError::RunnerError(_) => outcomes::RUNNER,
147+
VirtualSnosProverError::OutputParseError(_)
148+
| VirtualSnosProverError::ProgramOutputError(_) => outcomes::OUTPUT_PARSE,
149+
#[cfg(feature = "stwo_proving")]
150+
VirtualSnosProverError::ProvingError(_) => outcomes::PROVING,
151+
}
152+
}
153+
}
154+
134155
/// Errors that can occur during configuration.
135156
#[derive(Debug, Error)]
136157
pub enum ConfigError {

crates/starknet_transaction_prover/src/proving/virtual_snos_prover.rs

Lines changed: 34 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ use crate::blocking_check::{BlockingCheckClient, BlockingCheckResult};
2525
use crate::config::ProverConfig;
2626
use crate::errors::VirtualSnosProverError;
2727
use crate::running::runner::{RpcRunnerFactory, RunnerOutput, VirtualSnosRunner};
28+
use crate::server::metrics::{names as metric_names, outcomes};
2829

2930
/// Result of a successful prove transaction operation.
3031
///
@@ -149,7 +150,28 @@ impl<R: VirtualSnosRunner + 'static> VirtualSnosProver<R> {
149150
transaction: RpcTransaction,
150151
) -> Result<ProveTransactionResult, VirtualSnosProverError> {
151152
let start_time = Instant::now();
153+
let result = self.prove_transaction_inner(block_id, transaction).await;
154+
let total = start_time.elapsed();
155+
// One histogram observation per request. Latency is bucketed and is
156+
// recorded regardless of outcome so SLO calculations (e.g. p99 over
157+
// success-only) can be done in the query layer.
158+
metrics::histogram!(metric_names::PROVE_TRANSACTION_DURATION_SECONDS)
159+
.record(total.as_secs_f64());
160+
let outcome = match &result {
161+
Ok(_) => outcomes::SUCCESS,
162+
Err(err) => err.metric_outcome(),
163+
};
164+
metrics::counter!(metric_names::PROVE_TRANSACTION_OUTCOME_TOTAL, "outcome" => outcome)
165+
.increment(1);
166+
info!(total_duration_ms = %total.as_millis(), outcome, "prove_transaction completed");
167+
result
168+
}
152169

170+
async fn prove_transaction_inner(
171+
&self,
172+
block_id: BlockId,
173+
transaction: RpcTransaction,
174+
) -> Result<ProveTransactionResult, VirtualSnosProverError> {
153175
// Validate block_id is not pending.
154176
if matches!(block_id, BlockId::Pending) {
155177
return Err(VirtualSnosProverError::ValidationError(
@@ -162,15 +184,12 @@ impl<R: VirtualSnosRunner + 'static> VirtualSnosProver<R> {
162184
validate_transaction_input(&invoke_v3, self.validate_zero_fee_fields)?;
163185
let invoke_tx = InvokeTransaction::V3(invoke_v3.into());
164186

165-
let result = match &self.blocking_check_client {
166-
None => self.run_and_prove(block_id, vec![invoke_tx]).await?,
187+
match &self.blocking_check_client {
188+
None => self.run_and_prove(block_id, vec![invoke_tx]).await,
167189
Some(client) => {
168-
self.prove_with_blocking_check(client, block_id, transaction, invoke_tx).await?
190+
self.prove_with_blocking_check(client, block_id, transaction, invoke_tx).await
169191
}
170-
};
171-
172-
info!(total_duration_ms = %start_time.elapsed().as_millis(), "prove_transaction completed");
173-
Ok(result)
192+
}
174193
}
175194

176195
/// Runs the OS and generates a proof. This is the core proving pipeline.
@@ -186,18 +205,18 @@ impl<R: VirtualSnosRunner + 'static> VirtualSnosProver<R> {
186205
.await
187206
.map_err(|err| VirtualSnosProverError::RunnerError(Box::new(err)))?;
188207

189-
info!(
190-
os_duration_ms = %os_start.elapsed().as_millis(),
191-
"OS execution completed"
192-
);
208+
let os_duration = os_start.elapsed();
209+
metrics::histogram!(metric_names::OS_RUN_DURATION_SECONDS)
210+
.record(os_duration.as_secs_f64());
211+
info!(os_duration_ms = %os_duration.as_millis(), "OS execution completed");
193212

194213
let prove_start = Instant::now();
195214
let result = self.prove_virtual_snos_run(runner_output).await?;
196215

197-
info!(
198-
prove_duration_ms = %prove_start.elapsed().as_millis(),
199-
"Proving completed"
200-
);
216+
let prove_duration = prove_start.elapsed();
217+
metrics::histogram!(metric_names::STWO_PROVE_DURATION_SECONDS)
218+
.record(prove_duration.as_secs_f64());
219+
info!(prove_duration_ms = %prove_duration.as_millis(), "Proving completed");
201220

202221
Ok(result)
203222
}

crates/starknet_transaction_prover/src/server/metrics.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,28 @@ pub mod names {
2828
pub const BUILD_INFO: &str = "prover_build_info";
2929
/// Requests rejected because the concurrency semaphore was full.
3030
pub const CONCURRENCY_REJECTED_TOTAL: &str = "prover_concurrency_rejected_total";
31+
/// Wall-clock duration of `prove_transaction` end-to-end. Bucketed.
32+
pub const PROVE_TRANSACTION_DURATION_SECONDS: &str =
33+
"prover_prove_transaction_duration_seconds";
34+
/// `prove_transaction` outcomes by category. See [`super::outcomes`] for
35+
/// the fixed set of label values.
36+
pub const PROVE_TRANSACTION_OUTCOME_TOTAL: &str = "prover_prove_transaction_outcome_total";
37+
/// Virtual SNOS run sub-step duration. Bucketed.
38+
pub const OS_RUN_DURATION_SECONDS: &str = "prover_os_run_duration_seconds";
39+
/// Stwo proving sub-step duration. Bucketed.
40+
pub const STWO_PROVE_DURATION_SECONDS: &str = "prover_stwo_prove_duration_seconds";
41+
}
42+
43+
/// Fixed, bounded set of values for the `outcome` label on
44+
/// [`names::PROVE_TRANSACTION_OUTCOME_TOTAL`]. Adding a variant requires a
45+
/// dashboard update.
46+
pub mod outcomes {
47+
pub const SUCCESS: &str = "success";
48+
pub const VALIDATION: &str = "failure_validation";
49+
pub const BLOCKED: &str = "failure_blocked";
50+
pub const RUNNER: &str = "failure_runner";
51+
pub const OUTPUT_PARSE: &str = "failure_output_parse";
52+
pub const PROVING: &str = "failure_proving";
3153
}
3254

3355
/// Initializes the global Prometheus exporter and emits the `build_info`

0 commit comments

Comments
 (0)