Skip to content

Commit 507b087

Browse files
gefjoncoolreader18
andauthored
Add metrics for recording HTTP requests performed by procedures. (#3745)
# Description of Changes This commit adds several new metrics to `DB_METRICS` for tracking procedures' HTTP requests: - `procedure_http_request_size_bytes`. - `procedure_http_response_size_bytes`. - `procedure_num_http_requests`. - `procedure_num_successful_http_requests`. - `procedure_num_failed_http_requests`. - `procedure_num_timeout_http_requests`. - `procedure_num_in_progress_http_requests`. See help strings in `crates/datastore/src/db_metrics/mod.rs` for details on what each of these tracks. Closes #3712 . # API and ABI breaking changes N/a - I don't think we count metrics as a stable API. # Expected complexity level and risk 2, I guess? If we intend to use these for billing, some of the choices I've made about tracking may impact our business. # Testing None; I don't know how to test Prometheus metrics. Co-authored-by: Noa <coolreader18@gmail.com>
1 parent 0a3dda7 commit 507b087

3 files changed

Lines changed: 138 additions & 6 deletions

File tree

crates/core/src/host/instance_env.rs

Lines changed: 49 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,13 @@ use crate::replica_context::ReplicaContext;
88
use crate::subscription::module_subscription_actor::{commit_and_broadcast_event, ModuleSubscriptions};
99
use crate::subscription::module_subscription_manager::{from_tx_offset, TransactionOffset};
1010
use crate::util::asyncify;
11+
use crate::util::prometheus_handle::IntGaugeExt;
1112
use chrono::{DateTime, Utc};
1213
use core::mem;
1314
use parking_lot::{Mutex, MutexGuard};
1415
use smallvec::SmallVec;
1516
use spacetimedb_client_api_messages::energy::EnergyQuanta;
17+
use spacetimedb_datastore::db_metrics::DB_METRICS;
1618
use spacetimedb_datastore::execution_context::Workload;
1719
use spacetimedb_datastore::locking_tx_datastore::state_view::StateView;
1820
use spacetimedb_datastore::locking_tx_datastore::{FuncCallType, MutTxId};
@@ -722,7 +724,20 @@ impl InstanceEnv {
722724
return Err(NodesError::WouldBlockTransaction(super::AbiCall::ProcedureHttpRequest));
723725
}
724726

725-
// TODO(procedure-metrics): record size in bytes of request.
727+
// Record in metrics that we're starting an HTTP request.
728+
DB_METRICS
729+
.procedure_num_http_requests
730+
.with_label_values(self.database_identity())
731+
.inc();
732+
DB_METRICS
733+
.procedure_http_request_size_bytes
734+
.with_label_values(self.database_identity())
735+
.inc_by((request.size_in_bytes() + body.len()) as _);
736+
// Make a guard for the `in_progress` metric that will be decremented on exit.
737+
let _in_progress_metric = DB_METRICS
738+
.procedure_num_in_progress_http_requests
739+
.with_label_values(self.database_identity())
740+
.inc_scope();
726741

727742
fn http_error<E: ToString>(err: E) -> NodesError {
728743
NodesError::HttpError(err.to_string())
@@ -752,21 +767,49 @@ impl InstanceEnv {
752767
// TODO(perf): Stash a long-lived `Client` in the env somewhere, rather than building a new one for each call.
753768
let execute_fut = reqwest::Client::new().execute(reqwest);
754769

755-
Ok(async move {
756-
let response = execute_fut.await.map_err(http_error)?;
770+
let response_fut = async {
771+
let response = execute_fut.await?;
757772

758773
// Download the response body, which in all likelihood will be a stream,
759774
// as reqwest seems to prefer that.
760775
let (response, body) = http::Response::from(response).into_parts();
761-
let body = http_body_util::BodyExt::collect(body)
776+
777+
let body = http_body_util::BodyExt::collect(body).await?.to_bytes();
778+
779+
Ok((response, body))
780+
};
781+
782+
let database_identity = *self.database_identity();
783+
784+
Ok(async move {
785+
let (response, body) = response_fut
762786
.await
763-
.map_err(http_error)?
764-
.to_bytes();
787+
.inspect_err(|err: &reqwest::Error| {
788+
// Report the request's failure in our metrics as either a timeout or a misc. failure, as appropriate.
789+
if err.is_timeout() {
790+
DB_METRICS
791+
.procedure_num_timeout_http_requests
792+
.with_label_values(&database_identity)
793+
.inc();
794+
} else {
795+
DB_METRICS
796+
.procedure_num_failed_http_requests
797+
.with_label_values(&database_identity)
798+
.inc();
799+
}
800+
})
801+
.map_err(http_error)?;
765802

766803
// Transform the `http::Response` into our `spacetimedb_lib::http::Response` type,
767804
// which has a stable BSATN encoding to pass across the WASM boundary.
768805
let response = convert_http_response(response);
769806

807+
// Record the response size in bytes.
808+
DB_METRICS
809+
.procedure_http_response_size_bytes
810+
.with_label_values(&database_identity)
811+
.inc_by((response.size_in_bytes() + body.len()) as _);
812+
770813
Ok((response, body))
771814
})
772815
}

crates/datastore/src/db_metrics/mod.rs

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,61 @@ metrics_group!(
190190
#[help = "How many queries are evaluated in each subscribe and unsubscribe"]
191191
#[labels(db: Identity, workload: WorkloadType)]
192192
pub num_queries_evaluated: IntCounterVec,
193+
194+
#[name = spacetime_procedure_http_request_size_bytes]
195+
#[help = "Size in bytes of HTTP requests performed by procedures running in databases.
196+
197+
An individual HTTP request's size in bytes is the sum of the sizes of the URI, header names, header values and body."]
198+
#[labels(db: Identity)]
199+
pub procedure_http_request_size_bytes: IntCounterVec,
200+
201+
#[name = spacetime_procedure_http_response_size_bytes]
202+
#[help = "Size in bytes of HTTP responses to requests performed by procedures running in databases.
203+
204+
An individual HTTP response's size in bytes is the sum of the sizes of the header names, header values and body."]
205+
#[labels(db: Identity)]
206+
pub procedure_http_response_size_bytes: IntCounterVec,
207+
208+
#[name = spacetime_procedure_num_http_requests]
209+
#[help = "Number of HTTP requests performed by procedures running in databases.
210+
211+
Should be the sum of `spacetime_procedure_num_successful_http_requests`,
212+
`spacetime_procedure_num_failed_http_requests`, `spacetime_procedure_num_timeout_http_requests`
213+
and `spacetime_procedure_num_in_progress_http_requests`."]
214+
#[labels(db: Identity)]
215+
pub procedure_num_http_requests: IntCounterVec,
216+
217+
#[name = spacetime_procedure_num_successful_http_requests]
218+
#[help = "Number of HTTP requests performed by procedures which terminate successfully, returning a response.
219+
220+
Each HTTP request performed by a database will be counted either here, in `spacetime_procedure_num_failed_http_requests`,
221+
`spacetime_procedure_num_timeout_http_requests` or in `spacetime_procedure_num_in_progress_http_requests`."]
222+
#[labels(db: Identity)]
223+
pub procedure_num_successful_http_requests: IntCounterVec,
224+
225+
#[name = spacetime_procedure_num_failed_http_requests]
226+
#[help = "Number of HTTP requests performed by procedures which fail for reasons other than a timeout.
227+
228+
Each HTTP request performed by a database will be counted either here, in `spacetime_procedure_num_successful_http_requests`,
229+
`spacetime_procedure_num_timeout_http_requests` or in `spacetime_procedure_num_in_progress_http_requests`."]
230+
#[labels(db: Identity)]
231+
pub procedure_num_failed_http_requests: IntCounterVec,
232+
233+
#[name = spacetime_procedure_num_timeout_http_requests]
234+
#[help = "Number of HTTP requests performed by procedures which fail due to a timeout.
235+
236+
Each HTTP request performed by a database will be counted either here, in `spacetime_procedure_num_successful_http_requests`,
237+
`spacetime_procedure_num_failed_http_requests`, or in `spacetime_procedure_num_in_progress_http_requests`."]
238+
#[labels(db: Identity)]
239+
pub procedure_num_timeout_http_requests: IntCounterVec,
240+
241+
#[name = spacetime_procedure_num_in_progress_http_requests]
242+
#[help = "Number of HTTP requests currently in progress within procedures.
243+
244+
Each HTTP request performed by a database will be counted either here, in `spacetime_procedure_num_successful_http_requests`,
245+
`spacetime_procedure_num_failed_http_requests`, or in `spacetime_procedure_num_timeout_http_requests`."]
246+
#[labels(db: Identity)]
247+
pub procedure_num_in_progress_http_requests: IntGaugeVec,
193248
}
194249
);
195250

crates/lib/src/http.rs

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,18 @@ pub struct Request {
3232
pub version: Version,
3333
}
3434

35+
impl Request {
36+
/// Return the size of this request's URI and [`Headers`]
37+
/// for purposes of metrics reporting.
38+
///
39+
/// Ignores the size of the [`Method`] and [`Version`] as they are effectively constant.
40+
///
41+
/// As the body is stored externally to the `Request`, metrics reporting must count its size separately.
42+
pub fn size_in_bytes(&self) -> usize {
43+
self.uri.len() + self.headers.size_in_bytes()
44+
}
45+
}
46+
3547
/// Represents an HTTP method.
3648
#[derive(Clone, SpacetimeType, PartialEq, Eq)]
3749
#[sats(crate = crate, name = "HttpMethod")]
@@ -112,6 +124,17 @@ impl Headers {
112124
pub fn into_iter(self) -> impl Iterator<Item = (Box<str>, Box<[u8]>)> {
113125
IntoIterator::into_iter(self.entries).map(|HttpHeaderPair { name, value }| (name, value))
114126
}
127+
128+
/// The sum of the lengths of all the header names and header values.
129+
///
130+
/// For headers with multiple values for the same header name,
131+
/// the length of the header name is counted once for each occurence.
132+
fn size_in_bytes(&self) -> usize {
133+
self.entries
134+
.iter()
135+
.map(|HttpHeaderPair { name, value }| name.len() + value.len())
136+
.sum::<usize>()
137+
}
115138
}
116139

117140
#[derive(Clone, SpacetimeType)]
@@ -131,3 +154,14 @@ pub struct Response {
131154
/// A valid HTTP response status code, sourced from an already-validated `http::StatusCode`.
132155
pub code: u16,
133156
}
157+
158+
impl Response {
159+
/// Return the size of this request's [`Headers`] for purposes of metrics reporting.
160+
///
161+
/// Ignores the size of the `code` and [`Version`] as they are effectively constant.
162+
///
163+
/// As the body is stored externally to the `Response`, metrics reporting must count its size separately.
164+
pub fn size_in_bytes(&self) -> usize {
165+
self.headers.size_in_bytes()
166+
}
167+
}

0 commit comments

Comments
 (0)