Skip to content

Commit dfbf556

Browse files
committed
Track dynode retries separately
1 parent b7057c9 commit dfbf556

4 files changed

Lines changed: 81 additions & 11 deletions

File tree

core/apps/dynode/src/metrics/mod.rs

Lines changed: 59 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ pub struct Metrics {
1313
proxy_requests: Family<ProxyRequestLabels, Counter>,
1414
proxy_requests_by_method: Family<ProxyRequestByMethodLabels, Counter>,
1515
proxy_response_latency: Family<ResponseLabels, Histogram>,
16+
proxy_upstream_response_latency: Family<UpstreamResponseLabels, Histogram>,
17+
proxy_retries: Family<RetryLabels, Counter>,
1618
node_host_current: Family<HostCurrentStateLabels, Gauge>,
1719
cache_hits: Family<CacheLabels, Counter>,
1820
cache_misses: Family<CacheLabels, Counter>,
@@ -41,12 +43,25 @@ pub struct HostCurrentStateLabels {
4143

4244
#[derive(Clone, Hash, PartialEq, Eq, Debug, EncodeLabelSet)]
4345
struct ResponseLabels {
46+
chain: String,
47+
status: u16,
48+
}
49+
50+
#[derive(Clone, Hash, PartialEq, Eq, Debug, EncodeLabelSet)]
51+
struct UpstreamResponseLabels {
4452
chain: String,
4553
host: String,
4654
method: String,
4755
status: u16,
4856
}
4957

58+
#[derive(Clone, Hash, PartialEq, Eq, Debug, EncodeLabelSet)]
59+
struct RetryLabels {
60+
chain: String,
61+
host: String,
62+
reason: String,
63+
}
64+
5065
#[derive(Clone, Hash, PartialEq, Eq, Debug, EncodeLabelSet)]
5166
pub struct CacheLabels {
5267
chain: String,
@@ -71,6 +86,8 @@ impl Metrics {
7186
let proxy_requests = Family::<ProxyRequestLabels, Counter>::default();
7287
let proxy_requests_by_method = Family::<ProxyRequestByMethodLabels, Counter>::default();
7388
let proxy_response_latency = Family::<ResponseLabels, Histogram>::new_with_constructor(|| Histogram::new(exponential_buckets(50.0, 2.0, 6)));
89+
let proxy_upstream_response_latency = Family::<UpstreamResponseLabels, Histogram>::new_with_constructor(|| Histogram::new(exponential_buckets(50.0, 2.0, 6)));
90+
let proxy_retries = Family::<RetryLabels, Counter>::default();
7491
let node_host_current = Family::<HostCurrentStateLabels, Gauge>::default();
7592
let cache_hits = Family::<CacheLabels, Counter>::default();
7693
let cache_misses = Family::<CacheLabels, Counter>::default();
@@ -87,11 +104,13 @@ impl Metrics {
87104
"Proxy requests by host and method (HTTP path or RPC method)",
88105
proxy_requests_by_method.clone(),
89106
);
107+
registry.register("proxy_response_latency", "Proxy responses by chain and status", proxy_response_latency.clone());
90108
registry.register(
91-
"proxy_response_latency",
92-
"Proxy responses by host, path, method, and status",
93-
proxy_response_latency.clone(),
109+
"proxy_upstream_response_latency",
110+
"Upstream proxy responses by host, path, method, and status",
111+
proxy_upstream_response_latency.clone(),
94112
);
113+
registry.register("proxy_retries", "Proxy retries by chain, upstream host, and reason", proxy_retries.clone());
95114
registry.register("node_host_current", "Node current host url", node_host_current.clone());
96115
registry.register("cache_hits", "Cache hits by host and path", cache_hits.clone());
97116
registry.register("cache_misses", "Cache misses by host and path", cache_misses.clone());
@@ -105,6 +124,8 @@ impl Metrics {
105124
proxy_requests,
106125
proxy_requests_by_method,
107126
proxy_response_latency,
127+
proxy_upstream_response_latency,
128+
proxy_retries,
108129
node_host_current,
109130
cache_hits,
110131
cache_misses,
@@ -137,10 +158,10 @@ impl Metrics {
137158
}
138159
}
139160

140-
pub fn add_proxy_response(&self, chain: &str, method: &str, host: &str, status: u16, latency: u128) {
161+
pub fn add_proxy_upstream_response(&self, chain: &str, method: &str, host: &str, status: u16, latency: u128) {
141162
let method = self.truncate_method(method);
142-
self.proxy_response_latency
143-
.get_or_create(&ResponseLabels {
163+
self.proxy_upstream_response_latency
164+
.get_or_create(&UpstreamResponseLabels {
144165
chain: chain.to_string(),
145166
host: host.to_string(),
146167
method,
@@ -149,6 +170,22 @@ impl Metrics {
149170
.observe(latency as f64);
150171
}
151172

173+
pub fn add_proxy_response(&self, chain: &str, status: u16, latency: u128) {
174+
self.proxy_response_latency
175+
.get_or_create(&ResponseLabels { chain: chain.to_string(), status })
176+
.observe(latency as f64);
177+
}
178+
179+
pub fn add_proxy_retry(&self, chain: &str, host: &str, reason: &str) {
180+
self.proxy_retries
181+
.get_or_create(&RetryLabels {
182+
chain: chain.to_string(),
183+
host: host.to_string(),
184+
reason: reason.to_string(),
185+
})
186+
.inc();
187+
}
188+
152189
pub fn set_node_host_current(&self, chain: &str, host: &str) {
153190
self.node_host_current
154191
.get_or_create(&HostCurrentStateLabels {
@@ -266,4 +303,20 @@ mod tests {
266303
assert_eq!(m.truncate_method("/api/v1/blocks/by_height/12345"), "/api/v1/blocks/by_height/:number");
267304
assert_eq!(m.truncate_method("/v1/verylongsegmentthatisgreaterthan20characters"), "/v1/:value");
268305
}
306+
307+
#[test]
308+
fn records_response_once_and_retries_separately() {
309+
let metrics = create_test_metrics();
310+
311+
metrics.add_proxy_response("tron", 200, 123);
312+
metrics.add_proxy_retry("tron", "api.trongrid.io", "status=429");
313+
314+
let encoded = metrics.get_metrics();
315+
assert!(encoded.contains("test_proxy_response_latency_count"));
316+
assert!(encoded.contains("chain=\"tron\""));
317+
assert!(encoded.contains("status=\"200\""));
318+
assert!(encoded.contains("test_proxy_retries_total"));
319+
assert!(encoded.contains("host=\"api.trongrid.io\""));
320+
assert!(encoded.contains("reason=\"status=429\""));
321+
}
269322
}

core/apps/dynode/src/monitoring/service.rs

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,16 @@ impl NodeService {
101101
}
102102

103103
pub async fn handle_request(&self, request: ProxyRequest) -> Result<ProxyResponse, Box<dyn Error + Send + Sync>> {
104+
let request_for_metrics = request.clone();
105+
let result = self.handle_request_inner(request).await;
106+
if let Ok(response) = &result {
107+
self.metrics
108+
.add_proxy_response(request_for_metrics.chain.as_ref(), response.status, request_for_metrics.elapsed().as_millis());
109+
}
110+
result
111+
}
112+
113+
async fn handle_request_inner(&self, request: ProxyRequest) -> Result<ProxyResponse, Box<dyn Error + Send + Sync>> {
104114
Self::log_incoming_request(&request);
105115

106116
let chain_config = self.get_chain_config(&request)?;
@@ -144,7 +154,11 @@ impl NodeService {
144154
if !retry_enabled {
145155
return self.log_and_create_error_response(&request, Some(remote_host.as_str()), &format!("Upstream status code: {}", response.status), upstream_data);
146156
}
147-
last_error = Some(format!("status={}", response.status));
157+
let retry_reason = format!("status={}", response.status);
158+
if index + 1 < max_attempts {
159+
self.metrics.add_proxy_retry(request.chain.as_ref(), remote_host.as_str(), &retry_reason);
160+
}
161+
last_error = Some(retry_reason);
148162
last_error_data = upstream_data;
149163
}
150164
Err(e) => {
@@ -163,6 +177,9 @@ impl NodeService {
163177
error = UPSTREAM_REQUEST_FAILED,
164178
latency = latency,
165179
);
180+
if index + 1 < max_attempts {
181+
self.metrics.add_proxy_retry(request.chain.as_ref(), remote_host.as_str(), UPSTREAM_REQUEST_FAILED);
182+
}
166183
last_error = Some(UPSTREAM_REQUEST_FAILED.to_string());
167184
}
168185
}

core/apps/dynode/src/proxy/jsonrpc.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ impl JsonRpcHandler {
6868
id: Some(call.id),
6969
});
7070

71-
metrics.add_proxy_response(
71+
metrics.add_proxy_upstream_response(
7272
request.chain.as_ref(),
7373
&call.method,
7474
url.url.host_str().unwrap_or_default(),
@@ -87,7 +87,7 @@ impl JsonRpcHandler {
8787

8888
let (response, response_status, response_body) = Self::fetch_single_response(call, request, cache, url, client, forward_headers).await?;
8989

90-
metrics.add_proxy_response(
90+
metrics.add_proxy_upstream_response(
9191
request.chain.as_ref(),
9292
&call.method,
9393
url.url.host_str().unwrap_or_default(),
@@ -150,7 +150,7 @@ impl JsonRpcHandler {
150150
let (responses, response_status) = Self::fetch_batch_responses(calls, url, client, &request.method, forward_headers).await?;
151151

152152
for call in calls {
153-
metrics.add_proxy_response(
153+
metrics.add_proxy_upstream_response(
154154
request.chain.as_ref(),
155155
&call.method,
156156
url.url.host_str().unwrap_or_default(),

core/apps/dynode/src/proxy/service.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ impl ProxyRequestService {
9999

100100
fn add_proxy_response_metrics(metrics: &Metrics, request: &ProxyRequest, methods_for_metrics: &[String], host: &str, status: u16) {
101101
for method_name in methods_for_metrics {
102-
metrics.add_proxy_response(request.chain.as_ref(), method_name, host, status, request.elapsed().as_millis());
102+
metrics.add_proxy_upstream_response(request.chain.as_ref(), method_name, host, status, request.elapsed().as_millis());
103103
}
104104
}
105105

0 commit comments

Comments
 (0)