diff --git a/crates/apollo_consensus_orchestrator/src/metrics.rs b/crates/apollo_consensus_orchestrator/src/metrics.rs index 156d426f566..3e379349f9b 100644 --- a/crates/apollo_consensus_orchestrator/src/metrics.rs +++ b/crates/apollo_consensus_orchestrator/src/metrics.rs @@ -33,11 +33,11 @@ define_metrics!( // Proposal validation failure metrics LabeledMetricCounter { CONSENSUS_VALIDATE_PROPOSAL_FAILURE , "consensus_validate_proposal_failure", "Number of failures while validating a proposal", init = 0, labels = VALIDATE_PROPOSAL_FAILURE_REASON }, - // SNIP-35 dynamic gas pricing metrics + // SNIP-35 dynamic gas pricing metrics. + // STRK/USD rate metrics are in `apollo_l1_gas_price`. MetricGauge { SNIP35_FEE_ACTUAL, "snip35_fee_actual", "The current fee_actual (median of recent fee_proposals sliding window)" }, MetricGauge { SNIP35_FEE_PROPOSAL, "snip35_fee_proposal", "The fee_proposal this node published in the latest block" }, MetricGauge { SNIP35_FEE_TARGET, "snip35_fee_target", "The fee_target computed from the STRK/USD oracle" }, - MetricGauge { SNIP35_STRK_USD_RATE, "snip35_strk_usd_rate", "The STRK/USD rate from the oracle" }, } ); @@ -109,5 +109,4 @@ pub(crate) fn register_metrics() { SNIP35_FEE_ACTUAL.register(); SNIP35_FEE_PROPOSAL.register(); SNIP35_FEE_TARGET.register(); - SNIP35_STRK_USD_RATE.register(); } diff --git a/crates/apollo_consensus_orchestrator/src/sequencer_consensus_context.rs b/crates/apollo_consensus_orchestrator/src/sequencer_consensus_context.rs index d52616ee0b9..8f98bae0730 100644 --- a/crates/apollo_consensus_orchestrator/src/sequencer_consensus_context.rs +++ b/crates/apollo_consensus_orchestrator/src/sequencer_consensus_context.rs @@ -97,7 +97,6 @@ use crate::metrics::{ SNIP35_FEE_ACTUAL, SNIP35_FEE_PROPOSAL, SNIP35_FEE_TARGET, - SNIP35_STRK_USD_RATE, }; use crate::snip35::{ compute_fee_actual, @@ -461,7 +460,6 @@ impl SequencerConsensusContext { let fee_target = match &self.deps.strk_to_usd_oracle { Some(oracle) => match oracle.fetch_rate(timestamp).await { Ok(rate) => { - SNIP35_STRK_USD_RATE.set_lossy(rate); let target = compute_fee_target(TARGET_ATTO_USD_PER_L2_GAS, rate); match target { Some(t) => SNIP35_FEE_TARGET.set_lossy(t.0), diff --git a/crates/apollo_dashboard/resources/dev_grafana.json b/crates/apollo_dashboard/resources/dev_grafana.json index d386dfb0bdd..8b56dc9d0e5 100644 --- a/crates/apollo_dashboard/resources/dev_grafana.json +++ b/crates/apollo_dashboard/resources/dev_grafana.json @@ -432,6 +432,56 @@ "snip35_strk_usd_rate{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"} / 1e18" ], "extra_params": {} + }, + { + "title": "SNIP-35 STRK/USD Rate Query Success (binary)", + "description": "Indicates whether the STRK→USD rate query succeeded (1m window)", + "type": "timeseries", + "exprs": [ + "changes(snip35_strk_usd_success_count{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[1m])" + ], + "extra_params": { + "log_query": "\"Caching conversion rate for timestamp\"" + } + }, + { + "title": "SNIP-35 STRK/USD Rate Query Error Count", + "description": "The number of times the STRK→USD rate query failed (10m window)", + "type": "timeseries", + "exprs": [ + "increase(snip35_strk_usd_error_count{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[10m])" + ], + "extra_params": { + "log_query": "\"Failed to resolve query to\" OR \"Timeout when resolving query to\" OR \"Query failed to join handle for timestamp\"" + } + }, + { + "title": "Seconds Since Last Successful STRK→USD Rate Update", + "description": "The number of seconds since the last successful STRK→USD rate update (assuming there was an update in the last 12 hours).", + "type": "timeseries", + "exprs": [ + "time() - max(last_over_time(snip35_strk_usd_last_success_timestamp_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[12h]))" + ], + "extra_params": { + "unit": "s", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1200.0 + }, + { + "color": "red", + "value": 1800.0 + } + ] + } + } } ], "collapsed": true diff --git a/crates/apollo_dashboard/src/panels/consensus.rs b/crates/apollo_dashboard/src/panels/consensus.rs index 6718bdce0a5..cf421b551de 100644 --- a/crates/apollo_dashboard/src/panels/consensus.rs +++ b/crates/apollo_dashboard/src/panels/consensus.rs @@ -59,7 +59,12 @@ use apollo_consensus_orchestrator::metrics::{ SNIP35_FEE_ACTUAL, SNIP35_FEE_PROPOSAL, SNIP35_FEE_TARGET, +}; +use apollo_l1_gas_price::metrics::{ + SNIP35_STRK_USD_ERROR_COUNT, + SNIP35_STRK_USD_LAST_SUCCESS_TIMESTAMP_SECONDS, SNIP35_STRK_USD_RATE, + SNIP35_STRK_USD_SUCCESS_COUNT, }; use apollo_metrics::metrics::MetricQueryName; use apollo_network::metrics::{LABEL_NAME_BROADCAST_DROP_REASON, LABEL_NAME_EVENT_TYPE}; @@ -70,6 +75,7 @@ use crate::dashboard::Row; use crate::panel::{traffic_light_thresholds, Panel, PanelType, Unit}; use crate::query_builder::{ increase, + seconds_since_last_timestamp, sum_by_label, DisplayMethod, DEFAULT_DURATION, @@ -751,6 +757,41 @@ fn get_panel_snip35_strk_usd_rate() -> Panel { ) } +fn get_panel_snip35_strk_usd_error_count() -> Panel { + Panel::new( + "SNIP-35 STRK/USD Rate Query Error Count", + format!("The number of times the STRK→USD rate query failed ({DEFAULT_DURATION} window)"), + increase(&SNIP35_STRK_USD_ERROR_COUNT, DEFAULT_DURATION), + PanelType::TimeSeries, + ) + .with_log_query( + "\"Failed to resolve query to\" OR \"Timeout when resolving query to\" OR \"Query failed \ + to join handle for timestamp\"", + ) +} + +fn get_panel_snip35_strk_usd_success_count() -> Panel { + Panel::new( + "SNIP-35 STRK/USD Rate Query Success (binary)", + "Indicates whether the STRK→USD rate query succeeded (1m window)", + format!("changes({}[1m])", SNIP35_STRK_USD_SUCCESS_COUNT.get_name_with_filter()), + PanelType::TimeSeries, + ) + .with_log_query("Caching conversion rate for timestamp") +} + +fn get_panel_snip35_strk_usd_seconds_since_last_successful_update() -> Panel { + Panel::new( + "Seconds Since Last Successful STRK→USD Rate Update", + "The number of seconds since the last successful STRK→USD rate update (assuming there was \ + an update in the last 12 hours).", + seconds_since_last_timestamp(&SNIP35_STRK_USD_LAST_SUCCESS_TIMESTAMP_SECONDS), + PanelType::TimeSeries, + ) + .with_unit(Unit::Seconds) + .with_absolute_thresholds(traffic_light_thresholds(1200.0, 1800.0)) +} + pub(crate) fn get_snip35_row() -> Row { Row::new( "SNIP-35", @@ -759,6 +800,9 @@ pub(crate) fn get_snip35_row() -> Row { get_panel_snip35_fee_proposal(), get_panel_snip35_fee_target(), get_panel_snip35_strk_usd_rate(), + get_panel_snip35_strk_usd_success_count(), + get_panel_snip35_strk_usd_error_count(), + get_panel_snip35_strk_usd_seconds_since_last_successful_update(), ], ) } diff --git a/crates/apollo_l1_gas_price/src/exchange_rate_oracle.rs b/crates/apollo_l1_gas_price/src/exchange_rate_oracle.rs index b2d443aa8bd..9a1451a7409 100644 --- a/crates/apollo_l1_gas_price/src/exchange_rate_oracle.rs +++ b/crates/apollo_l1_gas_price/src/exchange_rate_oracle.rs @@ -18,13 +18,7 @@ use tokio_util::task::AbortOnDropHandle; use tracing::{debug, info, instrument, warn}; use url::Url; -use crate::metrics::{ - register_eth_to_strk_metrics, - ETH_TO_STRK_ERROR_COUNT, - ETH_TO_STRK_LAST_SUCCESS_TIMESTAMP_SECONDS, - ETH_TO_STRK_RATE, - ETH_TO_STRK_SUCCESS_COUNT, -}; +use crate::metrics::ExchangeRateOracleMetrics; #[cfg(test)] #[path = "exchange_rate_oracle_test.rs"] @@ -54,7 +48,9 @@ pub struct UrlAndHeaderMap { type PriceQuery = AbortOnDropHandle>; -/// Client for interacting with the eth to strk Oracle API. +/// Client for interacting with an exchange-rate oracle API. +/// Concrete pair (ETH→STRK, STRK→USD, ...) is determined by `config.url_header_list` +/// and the `metrics` set passed at construction. #[derive(Clone, Debug)] pub struct ExchangeRateOracleClient { config: ExchangeRateOracleConfig, @@ -65,15 +61,16 @@ pub struct ExchangeRateOracleClient { client: reqwest::Client, cached_prices: Arc>>, queries: Arc>>, + metrics: ExchangeRateOracleMetrics, } impl ExchangeRateOracleClient { - pub fn new(config: ExchangeRateOracleConfig) -> Self { + pub fn new(config: ExchangeRateOracleConfig, metrics: ExchangeRateOracleMetrics) -> Self { info!( "Creating ExchangeRateOracleClient with: urls={:?} lag_interval_seconds={}", config.url_header_list, config.lag_interval_seconds ); - register_eth_to_strk_metrics(); + metrics.register(); let url_header_list = config .url_header_list .as_ref() @@ -95,6 +92,7 @@ impl ExchangeRateOracleClient { queries: Arc::new(Mutex::new(LruCache::new( NonZeroUsize::new(config.max_cache_size).expect("Invalid cache size"), ))), + metrics, } } @@ -112,6 +110,7 @@ impl ExchangeRateOracleClient { let index_clone = self.index.clone(); let url_header_list = self.url_header_list.clone(); let list_len = url_header_list.len(); + let metrics = self.metrics; let future = async move { let initial_index = index_clone.load(Ordering::SeqCst); for (i, url_and_headers) in @@ -133,7 +132,7 @@ impl ExchangeRateOracleClient { ))); } let body = response.text().await?; - let rate = resolve_query(body)?; + let rate = resolve_query(body, &metrics)?; Ok::<_, ExchangeRateOracleClientError>(rate) }) .await; @@ -152,7 +151,7 @@ impl ExchangeRateOracleClient { warn!("Timeout when resolving query to {url}"); } }; - ETH_TO_STRK_ERROR_COUNT.increment(1); + metrics.error_count.increment(1); } warn!("All {list_len} URLs in the list failed for timestamp {adjusted_timestamp}"); Err(ExchangeRateOracleClientError::AllUrlsFailedError( @@ -164,7 +163,10 @@ impl ExchangeRateOracleClient { } } -fn resolve_query(body: String) -> Result { +fn resolve_query( + body: String, + metrics: &ExchangeRateOracleMetrics, +) -> Result { let Ok(json): Result = serde_json::from_str(&body) else { return Err(ExchangeRateOracleClientError::ParseError(format!( "Failed to parse JSON: {body}" @@ -199,9 +201,9 @@ fn resolve_query(body: String) -> Result { decimals, )); } - ETH_TO_STRK_SUCCESS_COUNT.increment(1); - set_unix_now_seconds(Ð_TO_STRK_LAST_SUCCESS_TIMESTAMP_SECONDS); - ETH_TO_STRK_RATE.set_lossy(rate); + metrics.success_count.increment(1); + set_unix_now_seconds(metrics.last_success_timestamp); + metrics.rate.set_lossy(rate); Ok(rate) } @@ -255,7 +257,7 @@ impl ExchangeRateOracleClientTrait for ExchangeRateOracleClient { } Err(e) => { warn!("Query failed to join handle for timestamp {timestamp}: {e:?}"); - ETH_TO_STRK_ERROR_COUNT.increment(1); + self.metrics.error_count.increment(1); // Must remove failed query from the cache, to avoid re-polling it. queries.pop(&quantized_timestamp); return Err(ExchangeRateOracleClientError::JoinError(e.to_string())); diff --git a/crates/apollo_l1_gas_price/src/exchange_rate_oracle_test.rs b/crates/apollo_l1_gas_price/src/exchange_rate_oracle_test.rs index 746d01610c8..d7199d7eabd 100644 --- a/crates/apollo_l1_gas_price/src/exchange_rate_oracle_test.rs +++ b/crates/apollo_l1_gas_price/src/exchange_rate_oracle_test.rs @@ -10,6 +10,7 @@ use tokio::{self}; use url::Url; use crate::exchange_rate_oracle::{ExchangeRateOracleClient, ExchangeRateOracleConfig}; +use crate::metrics::ETH_TO_STRK_ORACLE_METRICS; async fn make_server(server: &mut ServerGuard, body: serde_json::Value) -> Mock { make_server_with_status(server, body, 200).await @@ -66,7 +67,7 @@ async fn eth_to_fri_rate_uses_cache_on_quantized_hit() { lag_interval_seconds: LAG_INTERVAL_SECONDS, ..Default::default() }; - let client = ExchangeRateOracleClient::new(config.clone()); + let client = ExchangeRateOracleClient::new(config.clone(), ETH_TO_STRK_ORACLE_METRICS); // First request should fail because the cache is empty. assert!(client.fetch_rate(TIMESTAMP1).await.is_err()); @@ -138,7 +139,7 @@ async fn eth_to_fri_rate_uses_prev_cache_when_query_not_ready() { lag_interval_seconds: LAG_INTERVAL_SECONDS, ..Default::default() }; - let client = ExchangeRateOracleClient::new(config.clone()); + let client = ExchangeRateOracleClient::new(config.clone(), ETH_TO_STRK_ORACLE_METRICS); // First request should fail because the cache is empty. assert!(client.fetch_rate(TIMESTAMP1).await.is_err()); @@ -199,7 +200,7 @@ async fn eth_to_fri_rate_two_urls() { lag_interval_seconds: LAG_INTERVAL_SECONDS, ..Default::default() }; - let client = ExchangeRateOracleClient::new(config.clone()); + let client = ExchangeRateOracleClient::new(config.clone(), ETH_TO_STRK_ORACLE_METRICS); // First request should fail because the cache is empty. assert!(client.fetch_rate(TIMESTAMP1).await.is_err()); // Wait for the query to resolve. @@ -246,7 +247,7 @@ async fn eth_to_fri_rate_non_success_status_code() { lag_interval_seconds: LAG_INTERVAL_SECONDS, ..Default::default() }; - let client = ExchangeRateOracleClient::new(config); + let client = ExchangeRateOracleClient::new(config, ETH_TO_STRK_ORACLE_METRICS); // First call triggers the background query and returns QueryNotReadyError. assert!(matches!( diff --git a/crates/apollo_l1_gas_price/src/l1_gas_price_provider.rs b/crates/apollo_l1_gas_price/src/l1_gas_price_provider.rs index 504d529d17b..7618159d892 100644 --- a/crates/apollo_l1_gas_price/src/l1_gas_price_provider.rs +++ b/crates/apollo_l1_gas_price/src/l1_gas_price_provider.rs @@ -19,6 +19,7 @@ use tracing::{info, trace, warn}; use crate::exchange_rate_oracle::ExchangeRateOracleClient; use crate::metrics::{ register_provider_metrics, + ETH_TO_STRK_ORACLE_METRICS, L1_DATA_GAS_PRICE_LATEST_MEAN_VALUE, L1_GAS_PRICE_LATEST_MEAN_VALUE, L1_GAS_PRICE_PROVIDER_INSUFFICIENT_HISTORY, @@ -70,8 +71,10 @@ impl L1GasPriceProvider { } pub fn new_with_oracle(config: L1GasPriceProviderConfig) -> Self { - let eth_to_strk_oracle_client = - ExchangeRateOracleClient::new(config.eth_to_strk_oracle_config.clone()); + let eth_to_strk_oracle_client = ExchangeRateOracleClient::new( + config.eth_to_strk_oracle_config.clone(), + ETH_TO_STRK_ORACLE_METRICS, + ); Self::new(config, Arc::new(eth_to_strk_oracle_client)) } diff --git a/crates/apollo_l1_gas_price/src/metrics.rs b/crates/apollo_l1_gas_price/src/metrics.rs index e8c258e1b7f..04f3cfdecec 100644 --- a/crates/apollo_l1_gas_price/src/metrics.rs +++ b/crates/apollo_l1_gas_price/src/metrics.rs @@ -6,6 +6,7 @@ use apollo_infra::metrics::{ RemoteServerMetrics, }; use apollo_l1_gas_price_types::L1_GAS_PRICE_REQUEST_LABELS; +use apollo_metrics::metrics::{MetricCounter, MetricDetails, MetricGauge}; use apollo_metrics::{define_infra_metrics, define_metrics}; define_infra_metrics!(l1_gas_price); @@ -18,15 +19,67 @@ define_metrics!( MetricCounter { L1_GAS_PRICE_SCRAPER_REORG_DETECTED, "l1_gas_price_scraper_reorg_detected", "Number of times the L1 gas price scraper detected a reorganization in the base layer", init=0 }, MetricCounter { ETH_TO_STRK_ERROR_COUNT, "eth_to_strk_error_count", "Number of times the query to the Eth to Strk oracle failed due to an error or timeout", init=0 }, MetricCounter { ETH_TO_STRK_SUCCESS_COUNT, "eth_to_strk_success_count", "Number of times the query to the Eth to Strk oracle succeeded", init=0 }, + MetricCounter { SNIP35_STRK_USD_ERROR_COUNT, "snip35_strk_usd_error_count", "Number of times the query to the STRK to USD oracle failed due to an error or timeout", init=0 }, + MetricCounter { SNIP35_STRK_USD_SUCCESS_COUNT, "snip35_strk_usd_success_count", "Number of times the query to the STRK to USD oracle succeeded", init=0 }, MetricGauge { L1_GAS_PRICE_SCRAPER_LAST_SUCCESS_TIMESTAMP_SECONDS, "l1_gas_price_scraper_last_success_timestamp_seconds", "Unix timestamp (seconds) of the last successful L1 gas price scrape" }, MetricGauge { ETH_TO_STRK_LAST_SUCCESS_TIMESTAMP_SECONDS, "eth_to_strk_last_success_timestamp_seconds", "Unix timestamp (seconds) of the last successful ETH→STRK oracle query" }, + MetricGauge { SNIP35_STRK_USD_LAST_SUCCESS_TIMESTAMP_SECONDS, "snip35_strk_usd_last_success_timestamp_seconds", "Unix timestamp (seconds) of the last successful STRK→USD oracle query" }, MetricGauge { L1_GAS_PRICE_SCRAPER_LATEST_SCRAPED_BLOCK, "l1_gas_price_scraper_latest_scraped_block", "The latest block number that the L1 gas price scraper has scraped" }, MetricGauge { ETH_TO_STRK_RATE, "eth_to_strk_rate", "The current rate of ETH to STRK conversion" }, + MetricGauge { SNIP35_STRK_USD_RATE, "snip35_strk_usd_rate", "The STRK/USD rate from the oracle" }, MetricGauge { L1_GAS_PRICE_LATEST_MEAN_VALUE, "l1_gas_price_latest_mean_value", "The latest L1 gas price, calculated as an average by the provider client" }, MetricGauge { L1_DATA_GAS_PRICE_LATEST_MEAN_VALUE, "l1_data_gas_price_latest_mean_value", "The latest L1 data gas price, calculated as an average by the provider client" } }, ); +/// Per-pair metric handles owned by an `ExchangeRateOracleClient`. +/// Each constructed client uses its own set so concurrent ETH→STRK and +/// STRK→USD clients update disjoint Prometheus series. +#[derive(Copy, Clone)] +pub struct ExchangeRateOracleMetrics { + pub rate: &'static MetricGauge, + pub success_count: &'static MetricCounter, + pub error_count: &'static MetricCounter, + pub last_success_timestamp: &'static MetricGauge, +} + +impl ExchangeRateOracleMetrics { + pub fn register(&self) { + self.rate.register(); + self.success_count.register(); + self.error_count.register(); + self.last_success_timestamp.register(); + } +} + +// Manual impl: `MetricGauge` / `MetricCounter` do not derive `Debug`, +// but the surrounding `ExchangeRateOracleClient` does. Printing the prom +// name of each metric is the only useful thing to surface. +impl std::fmt::Debug for ExchangeRateOracleMetrics { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ExchangeRateOracleMetrics") + .field("rate", &self.rate.get_name()) + .field("success_count", &self.success_count.get_name()) + .field("error_count", &self.error_count.get_name()) + .field("last_success_timestamp", &self.last_success_timestamp.get_name()) + .finish() + } +} + +pub const ETH_TO_STRK_ORACLE_METRICS: ExchangeRateOracleMetrics = ExchangeRateOracleMetrics { + rate: Ð_TO_STRK_RATE, + success_count: Ð_TO_STRK_SUCCESS_COUNT, + error_count: Ð_TO_STRK_ERROR_COUNT, + last_success_timestamp: Ð_TO_STRK_LAST_SUCCESS_TIMESTAMP_SECONDS, +}; + +pub const STRK_TO_USD_ORACLE_METRICS: ExchangeRateOracleMetrics = ExchangeRateOracleMetrics { + rate: &SNIP35_STRK_USD_RATE, + success_count: &SNIP35_STRK_USD_SUCCESS_COUNT, + error_count: &SNIP35_STRK_USD_ERROR_COUNT, + last_success_timestamp: &SNIP35_STRK_USD_LAST_SUCCESS_TIMESTAMP_SECONDS, +}; + pub(crate) fn register_provider_metrics() { L1_GAS_PRICE_PROVIDER_INSUFFICIENT_HISTORY.register(); L1_GAS_PRICE_LATEST_MEAN_VALUE.register(); @@ -40,10 +93,3 @@ pub(crate) fn register_scraper_metrics() { L1_GAS_PRICE_SCRAPER_LAST_SUCCESS_TIMESTAMP_SECONDS.register(); L1_GAS_PRICE_SCRAPER_LATEST_SCRAPED_BLOCK.register(); } - -pub(crate) fn register_eth_to_strk_metrics() { - ETH_TO_STRK_ERROR_COUNT.register(); - ETH_TO_STRK_SUCCESS_COUNT.register(); - ETH_TO_STRK_LAST_SUCCESS_TIMESTAMP_SECONDS.register(); - ETH_TO_STRK_RATE.register(); -}