Skip to content

Commit 5c5827e

Browse files
ShahakShamaclaude
andauthored
apollo_l1_gas_price: parameterize ExchangeRateOracleClient by metric set (#14199)
Goal: Prerequisite for PR #14158 (`02_provider_holds_oracle`). Before that PR introduces a second `ExchangeRateOracleClient` for STRK→USD, parameterize the client by metric set so the two oracles update disjoint Prometheus series instead of contaminating each other's gauges and counters. Closes the issue flagged by Cursor bugbot + matanl-starkware on PR #14158. Change summary: - `apollo_l1_gas_price::metrics` defines `ExchangeRateOracleMetrics` (`rate`, `success_count`, `error_count`, `last_success_timestamp`) as a `Copy` struct of `&'static` refs, plus two `pub const` instances: `ETH_TO_STRK_ORACLE_METRICS` and `STRK_TO_USD_ORACLE_METRICS`. - `SNIP35_STRK_USD_RATE` moves from `apollo_consensus_orchestrator::metrics` into `apollo_l1_gas_price::metrics`, keeping the Prometheus name `snip35_strk_usd_rate` so the existing Grafana panel keeps working without a query rewrite. - Adds three new SNIP-35 metrics owned by the oracle client: `snip35_strk_usd_success_count`, `snip35_strk_usd_error_count`, `snip35_strk_usd_last_success_timestamp_seconds`. - `ExchangeRateOracleClient::new(config, metrics)` now stores the metric set; `spawn_query`, `resolve_query`, and `fetch_rate` route through `self.metrics.*` instead of hardcoded `ETH_TO_STRK_*` symbols. Manual `Debug` impl on `ExchangeRateOracleMetrics` (the underlying `MetricCounter`/`MetricGauge` don't derive Debug) prints prom names. - `L1GasPriceProvider::new_with_oracle` passes `ETH_TO_STRK_ORACLE_METRICS` to the existing client; PR 02 will pass `STRK_TO_USD_ORACLE_METRICS` to its new client. - `SequencerConsensusContext::compute_snip35_fee_proposal` stops calling `SNIP35_STRK_USD_RATE.set_lossy(rate)`; the oracle client publishes that gauge itself now. Consensus still reads the rate to compute `fee_target`. - `apollo_dashboard::panels::consensus` imports `SNIP35_STRK_USD_RATE` (and the 3 new metrics) from `apollo_l1_gas_price` instead of `apollo_consensus_orchestrator`; adds 3 new panels (success / error / seconds-since-last-update) to the SNIP-35 row, matching the ETH→STRK panel patterns. - `dev_grafana.json` updated by hand to add the 3 new panel entries; the Rust toolchain is unavailable in this environment so the generator could not be re-run. Verify locally with `cargo run --bin sequencer_dashboard_generator` before pushing. Decision points: - Reuse `snip35_strk_usd_rate` rather than defining a new `strk_to_usd_rate`: the existing time series + Grafana panel keep continuity, and the SNIP-35 dashboard row is the natural home for the STRK→USD oracle gauges. - Move the metric definition into `apollo_l1_gas_price` (owner) rather than keeping it in `apollo_consensus_orchestrator` and passing a `&'static` reference down: avoids cross-crate metric ownership and preserves the existing one-way crate dependency (consensus → l1_gas_price_types). - `pub const` struct of `&'static` refs (matches the existing `TEST_LOCAL_SERVER_METRICS` pattern in `apollo_infra`) rather than `static`: lets `ExchangeRateOracleClient` store the set by value (Copy) with no lifetime annotations on the struct. - Add all 3 observability metrics (success/error/timestamp) symmetrically with the ETH→STRK side rather than just the rate: keeps the new dashboard row useful — "did the STRK→USD query just fail or just stop firing?" is answerable without leaning on log queries. Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent b419998 commit 5c5827e

8 files changed

Lines changed: 178 additions & 35 deletions

File tree

crates/apollo_consensus_orchestrator/src/metrics.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,11 @@ define_metrics!(
3333
// Proposal validation failure metrics
3434
LabeledMetricCounter { CONSENSUS_VALIDATE_PROPOSAL_FAILURE , "consensus_validate_proposal_failure", "Number of failures while validating a proposal", init = 0, labels = VALIDATE_PROPOSAL_FAILURE_REASON },
3535

36-
// SNIP-35 dynamic gas pricing metrics
36+
// SNIP-35 dynamic gas pricing metrics.
37+
// STRK/USD rate metrics are in `apollo_l1_gas_price`.
3738
MetricGauge { SNIP35_FEE_ACTUAL, "snip35_fee_actual", "The current fee_actual (median of recent fee_proposals sliding window)" },
3839
MetricGauge { SNIP35_FEE_PROPOSAL, "snip35_fee_proposal", "The fee_proposal this node published in the latest block" },
3940
MetricGauge { SNIP35_FEE_TARGET, "snip35_fee_target", "The fee_target computed from the STRK/USD oracle" },
40-
MetricGauge { SNIP35_STRK_USD_RATE, "snip35_strk_usd_rate", "The STRK/USD rate from the oracle" },
4141
}
4242
);
4343

@@ -109,5 +109,4 @@ pub(crate) fn register_metrics() {
109109
SNIP35_FEE_ACTUAL.register();
110110
SNIP35_FEE_PROPOSAL.register();
111111
SNIP35_FEE_TARGET.register();
112-
SNIP35_STRK_USD_RATE.register();
113112
}

crates/apollo_consensus_orchestrator/src/sequencer_consensus_context.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,6 @@ use crate::metrics::{
9797
SNIP35_FEE_ACTUAL,
9898
SNIP35_FEE_PROPOSAL,
9999
SNIP35_FEE_TARGET,
100-
SNIP35_STRK_USD_RATE,
101100
};
102101
use crate::snip35::{
103102
compute_fee_actual,
@@ -461,7 +460,6 @@ impl SequencerConsensusContext {
461460
let fee_target = match &self.deps.strk_to_usd_oracle {
462461
Some(oracle) => match oracle.fetch_rate(timestamp).await {
463462
Ok(rate) => {
464-
SNIP35_STRK_USD_RATE.set_lossy(rate);
465463
let target = compute_fee_target(TARGET_ATTO_USD_PER_L2_GAS, rate);
466464
match target {
467465
Some(t) => SNIP35_FEE_TARGET.set_lossy(t.0),

crates/apollo_dashboard/resources/dev_grafana.json

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -432,6 +432,56 @@
432432
"snip35_strk_usd_rate{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"} / 1e18"
433433
],
434434
"extra_params": {}
435+
},
436+
{
437+
"title": "SNIP-35 STRK/USD Rate Query Success (binary)",
438+
"description": "Indicates whether the STRK→USD rate query succeeded (1m window)",
439+
"type": "timeseries",
440+
"exprs": [
441+
"changes(snip35_strk_usd_success_count{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[1m])"
442+
],
443+
"extra_params": {
444+
"log_query": "\"Caching conversion rate for timestamp\""
445+
}
446+
},
447+
{
448+
"title": "SNIP-35 STRK/USD Rate Query Error Count",
449+
"description": "The number of times the STRK→USD rate query failed (10m window)",
450+
"type": "timeseries",
451+
"exprs": [
452+
"increase(snip35_strk_usd_error_count{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[10m])"
453+
],
454+
"extra_params": {
455+
"log_query": "\"Failed to resolve query to\" OR \"Timeout when resolving query to\" OR \"Query failed to join handle for timestamp\""
456+
}
457+
},
458+
{
459+
"title": "Seconds Since Last Successful STRK→USD Rate Update",
460+
"description": "The number of seconds since the last successful STRK→USD rate update (assuming there was an update in the last 12 hours).",
461+
"type": "timeseries",
462+
"exprs": [
463+
"time() - max(last_over_time(snip35_strk_usd_last_success_timestamp_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"$pod\"}[12h]))"
464+
],
465+
"extra_params": {
466+
"unit": "s",
467+
"thresholds": {
468+
"mode": "absolute",
469+
"steps": [
470+
{
471+
"color": "green",
472+
"value": null
473+
},
474+
{
475+
"color": "yellow",
476+
"value": 1200.0
477+
},
478+
{
479+
"color": "red",
480+
"value": 1800.0
481+
}
482+
]
483+
}
484+
}
435485
}
436486
],
437487
"collapsed": true

crates/apollo_dashboard/src/panels/consensus.rs

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,12 @@ use apollo_consensus_orchestrator::metrics::{
5959
SNIP35_FEE_ACTUAL,
6060
SNIP35_FEE_PROPOSAL,
6161
SNIP35_FEE_TARGET,
62+
};
63+
use apollo_l1_gas_price::metrics::{
64+
SNIP35_STRK_USD_ERROR_COUNT,
65+
SNIP35_STRK_USD_LAST_SUCCESS_TIMESTAMP_SECONDS,
6266
SNIP35_STRK_USD_RATE,
67+
SNIP35_STRK_USD_SUCCESS_COUNT,
6368
};
6469
use apollo_metrics::metrics::MetricQueryName;
6570
use apollo_network::metrics::{LABEL_NAME_BROADCAST_DROP_REASON, LABEL_NAME_EVENT_TYPE};
@@ -70,6 +75,7 @@ use crate::dashboard::Row;
7075
use crate::panel::{traffic_light_thresholds, Panel, PanelType, Unit};
7176
use crate::query_builder::{
7277
increase,
78+
seconds_since_last_timestamp,
7379
sum_by_label,
7480
DisplayMethod,
7581
DEFAULT_DURATION,
@@ -751,6 +757,41 @@ fn get_panel_snip35_strk_usd_rate() -> Panel {
751757
)
752758
}
753759

760+
fn get_panel_snip35_strk_usd_error_count() -> Panel {
761+
Panel::new(
762+
"SNIP-35 STRK/USD Rate Query Error Count",
763+
format!("The number of times the STRK→USD rate query failed ({DEFAULT_DURATION} window)"),
764+
increase(&SNIP35_STRK_USD_ERROR_COUNT, DEFAULT_DURATION),
765+
PanelType::TimeSeries,
766+
)
767+
.with_log_query(
768+
"\"Failed to resolve query to\" OR \"Timeout when resolving query to\" OR \"Query failed \
769+
to join handle for timestamp\"",
770+
)
771+
}
772+
773+
fn get_panel_snip35_strk_usd_success_count() -> Panel {
774+
Panel::new(
775+
"SNIP-35 STRK/USD Rate Query Success (binary)",
776+
"Indicates whether the STRK→USD rate query succeeded (1m window)",
777+
format!("changes({}[1m])", SNIP35_STRK_USD_SUCCESS_COUNT.get_name_with_filter()),
778+
PanelType::TimeSeries,
779+
)
780+
.with_log_query("Caching conversion rate for timestamp")
781+
}
782+
783+
fn get_panel_snip35_strk_usd_seconds_since_last_successful_update() -> Panel {
784+
Panel::new(
785+
"Seconds Since Last Successful STRK→USD Rate Update",
786+
"The number of seconds since the last successful STRK→USD rate update (assuming there was \
787+
an update in the last 12 hours).",
788+
seconds_since_last_timestamp(&SNIP35_STRK_USD_LAST_SUCCESS_TIMESTAMP_SECONDS),
789+
PanelType::TimeSeries,
790+
)
791+
.with_unit(Unit::Seconds)
792+
.with_absolute_thresholds(traffic_light_thresholds(1200.0, 1800.0))
793+
}
794+
754795
pub(crate) fn get_snip35_row() -> Row {
755796
Row::new(
756797
"SNIP-35",
@@ -759,6 +800,9 @@ pub(crate) fn get_snip35_row() -> Row {
759800
get_panel_snip35_fee_proposal(),
760801
get_panel_snip35_fee_target(),
761802
get_panel_snip35_strk_usd_rate(),
803+
get_panel_snip35_strk_usd_success_count(),
804+
get_panel_snip35_strk_usd_error_count(),
805+
get_panel_snip35_strk_usd_seconds_since_last_successful_update(),
762806
],
763807
)
764808
}

crates/apollo_l1_gas_price/src/exchange_rate_oracle.rs

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,7 @@ use tokio_util::task::AbortOnDropHandle;
1818
use tracing::{debug, info, instrument, warn};
1919
use url::Url;
2020

21-
use crate::metrics::{
22-
register_eth_to_strk_metrics,
23-
ETH_TO_STRK_ERROR_COUNT,
24-
ETH_TO_STRK_LAST_SUCCESS_TIMESTAMP_SECONDS,
25-
ETH_TO_STRK_RATE,
26-
ETH_TO_STRK_SUCCESS_COUNT,
27-
};
21+
use crate::metrics::ExchangeRateOracleMetrics;
2822

2923
#[cfg(test)]
3024
#[path = "exchange_rate_oracle_test.rs"]
@@ -54,7 +48,9 @@ pub struct UrlAndHeaderMap {
5448

5549
type PriceQuery = AbortOnDropHandle<Result<u128, ExchangeRateOracleClientError>>;
5650

57-
/// Client for interacting with the eth to strk Oracle API.
51+
/// Client for interacting with an exchange-rate oracle API.
52+
/// Concrete pair (ETH→STRK, STRK→USD, ...) is determined by `config.url_header_list`
53+
/// and the `metrics` set passed at construction.
5854
#[derive(Clone, Debug)]
5955
pub struct ExchangeRateOracleClient {
6056
config: ExchangeRateOracleConfig,
@@ -65,15 +61,16 @@ pub struct ExchangeRateOracleClient {
6561
client: reqwest::Client,
6662
cached_prices: Arc<Mutex<LruCache<u64, u128>>>,
6763
queries: Arc<Mutex<LruCache<u64, PriceQuery>>>,
64+
metrics: ExchangeRateOracleMetrics,
6865
}
6966

7067
impl ExchangeRateOracleClient {
71-
pub fn new(config: ExchangeRateOracleConfig) -> Self {
68+
pub fn new(config: ExchangeRateOracleConfig, metrics: ExchangeRateOracleMetrics) -> Self {
7269
info!(
7370
"Creating ExchangeRateOracleClient with: urls={:?} lag_interval_seconds={}",
7471
config.url_header_list, config.lag_interval_seconds
7572
);
76-
register_eth_to_strk_metrics();
73+
metrics.register();
7774
let url_header_list = config
7875
.url_header_list
7976
.as_ref()
@@ -95,6 +92,7 @@ impl ExchangeRateOracleClient {
9592
queries: Arc::new(Mutex::new(LruCache::new(
9693
NonZeroUsize::new(config.max_cache_size).expect("Invalid cache size"),
9794
))),
95+
metrics,
9896
}
9997
}
10098

@@ -112,6 +110,7 @@ impl ExchangeRateOracleClient {
112110
let index_clone = self.index.clone();
113111
let url_header_list = self.url_header_list.clone();
114112
let list_len = url_header_list.len();
113+
let metrics = self.metrics;
115114
let future = async move {
116115
let initial_index = index_clone.load(Ordering::SeqCst);
117116
for (i, url_and_headers) in
@@ -133,7 +132,7 @@ impl ExchangeRateOracleClient {
133132
)));
134133
}
135134
let body = response.text().await?;
136-
let rate = resolve_query(body)?;
135+
let rate = resolve_query(body, &metrics)?;
137136
Ok::<_, ExchangeRateOracleClientError>(rate)
138137
})
139138
.await;
@@ -152,7 +151,7 @@ impl ExchangeRateOracleClient {
152151
warn!("Timeout when resolving query to {url}");
153152
}
154153
};
155-
ETH_TO_STRK_ERROR_COUNT.increment(1);
154+
metrics.error_count.increment(1);
156155
}
157156
warn!("All {list_len} URLs in the list failed for timestamp {adjusted_timestamp}");
158157
Err(ExchangeRateOracleClientError::AllUrlsFailedError(
@@ -164,7 +163,10 @@ impl ExchangeRateOracleClient {
164163
}
165164
}
166165

167-
fn resolve_query(body: String) -> Result<u128, ExchangeRateOracleClientError> {
166+
fn resolve_query(
167+
body: String,
168+
metrics: &ExchangeRateOracleMetrics,
169+
) -> Result<u128, ExchangeRateOracleClientError> {
168170
let Ok(json): Result<serde_json::Value, _> = serde_json::from_str(&body) else {
169171
return Err(ExchangeRateOracleClientError::ParseError(format!(
170172
"Failed to parse JSON: {body}"
@@ -199,9 +201,9 @@ fn resolve_query(body: String) -> Result<u128, ExchangeRateOracleClientError> {
199201
decimals,
200202
));
201203
}
202-
ETH_TO_STRK_SUCCESS_COUNT.increment(1);
203-
set_unix_now_seconds(&ETH_TO_STRK_LAST_SUCCESS_TIMESTAMP_SECONDS);
204-
ETH_TO_STRK_RATE.set_lossy(rate);
204+
metrics.success_count.increment(1);
205+
set_unix_now_seconds(metrics.last_success_timestamp);
206+
metrics.rate.set_lossy(rate);
205207
Ok(rate)
206208
}
207209

@@ -255,7 +257,7 @@ impl ExchangeRateOracleClientTrait for ExchangeRateOracleClient {
255257
}
256258
Err(e) => {
257259
warn!("Query failed to join handle for timestamp {timestamp}: {e:?}");
258-
ETH_TO_STRK_ERROR_COUNT.increment(1);
260+
self.metrics.error_count.increment(1);
259261
// Must remove failed query from the cache, to avoid re-polling it.
260262
queries.pop(&quantized_timestamp);
261263
return Err(ExchangeRateOracleClientError::JoinError(e.to_string()));

crates/apollo_l1_gas_price/src/exchange_rate_oracle_test.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ use tokio::{self};
1010
use url::Url;
1111

1212
use crate::exchange_rate_oracle::{ExchangeRateOracleClient, ExchangeRateOracleConfig};
13+
use crate::metrics::ETH_TO_STRK_ORACLE_METRICS;
1314

1415
async fn make_server(server: &mut ServerGuard, body: serde_json::Value) -> Mock {
1516
make_server_with_status(server, body, 200).await
@@ -66,7 +67,7 @@ async fn eth_to_fri_rate_uses_cache_on_quantized_hit() {
6667
lag_interval_seconds: LAG_INTERVAL_SECONDS,
6768
..Default::default()
6869
};
69-
let client = ExchangeRateOracleClient::new(config.clone());
70+
let client = ExchangeRateOracleClient::new(config.clone(), ETH_TO_STRK_ORACLE_METRICS);
7071

7172
// First request should fail because the cache is empty.
7273
assert!(client.fetch_rate(TIMESTAMP1).await.is_err());
@@ -138,7 +139,7 @@ async fn eth_to_fri_rate_uses_prev_cache_when_query_not_ready() {
138139
lag_interval_seconds: LAG_INTERVAL_SECONDS,
139140
..Default::default()
140141
};
141-
let client = ExchangeRateOracleClient::new(config.clone());
142+
let client = ExchangeRateOracleClient::new(config.clone(), ETH_TO_STRK_ORACLE_METRICS);
142143

143144
// First request should fail because the cache is empty.
144145
assert!(client.fetch_rate(TIMESTAMP1).await.is_err());
@@ -199,7 +200,7 @@ async fn eth_to_fri_rate_two_urls() {
199200
lag_interval_seconds: LAG_INTERVAL_SECONDS,
200201
..Default::default()
201202
};
202-
let client = ExchangeRateOracleClient::new(config.clone());
203+
let client = ExchangeRateOracleClient::new(config.clone(), ETH_TO_STRK_ORACLE_METRICS);
203204
// First request should fail because the cache is empty.
204205
assert!(client.fetch_rate(TIMESTAMP1).await.is_err());
205206
// Wait for the query to resolve.
@@ -246,7 +247,7 @@ async fn eth_to_fri_rate_non_success_status_code() {
246247
lag_interval_seconds: LAG_INTERVAL_SECONDS,
247248
..Default::default()
248249
};
249-
let client = ExchangeRateOracleClient::new(config);
250+
let client = ExchangeRateOracleClient::new(config, ETH_TO_STRK_ORACLE_METRICS);
250251

251252
// First call triggers the background query and returns QueryNotReadyError.
252253
assert!(matches!(

crates/apollo_l1_gas_price/src/l1_gas_price_provider.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ use tracing::{info, trace, warn};
1919
use crate::exchange_rate_oracle::ExchangeRateOracleClient;
2020
use crate::metrics::{
2121
register_provider_metrics,
22+
ETH_TO_STRK_ORACLE_METRICS,
2223
L1_DATA_GAS_PRICE_LATEST_MEAN_VALUE,
2324
L1_GAS_PRICE_LATEST_MEAN_VALUE,
2425
L1_GAS_PRICE_PROVIDER_INSUFFICIENT_HISTORY,
@@ -70,8 +71,10 @@ impl L1GasPriceProvider {
7071
}
7172

7273
pub fn new_with_oracle(config: L1GasPriceProviderConfig) -> Self {
73-
let eth_to_strk_oracle_client =
74-
ExchangeRateOracleClient::new(config.eth_to_strk_oracle_config.clone());
74+
let eth_to_strk_oracle_client = ExchangeRateOracleClient::new(
75+
config.eth_to_strk_oracle_config.clone(),
76+
ETH_TO_STRK_ORACLE_METRICS,
77+
);
7578
Self::new(config, Arc::new(eth_to_strk_oracle_client))
7679
}
7780

0 commit comments

Comments
 (0)