Skip to content

Commit 1c27292

Browse files
committed
Introduce telemetry for observability
This introduces the foundational telemetry infrastructure to improve the observability of LDK Server. It adds a new `/metrics` endpoint exposed on the REST service address, which serves Prometheus-compatible metrics. This endpoint is public and does not require HMAC authentication, allowing for easy integration with monitoring systems. - Added a `Metrics` utility struct to hold all the metrics we need to expose. This is the first step in a larger effort to provide comprehensive telemetry. Future updates will expand this to include other detailed metrics for channels, balances, payments, etc.
1 parent 39d199b commit 1c27292

6 files changed

Lines changed: 385 additions & 28 deletions

File tree

ldk-server-client/src/client.rs

Lines changed: 55 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,13 @@ use ldk_server_protos::api::{
2929
use ldk_server_protos::endpoints::{
3030
BOLT11_RECEIVE_PATH, BOLT11_SEND_PATH, BOLT12_RECEIVE_PATH, BOLT12_SEND_PATH,
3131
CLOSE_CHANNEL_PATH, CONNECT_PEER_PATH, DISCONNECT_PEER_PATH, EXPORT_PATHFINDING_SCORES_PATH,
32-
FORCE_CLOSE_CHANNEL_PATH, GET_BALANCES_PATH, GET_NODE_INFO_PATH, GET_PAYMENT_DETAILS_PATH,
33-
LIST_CHANNELS_PATH, LIST_FORWARDED_PAYMENTS_PATH, LIST_PAYMENTS_PATH, ONCHAIN_RECEIVE_PATH,
34-
ONCHAIN_SEND_PATH, OPEN_CHANNEL_PATH, SIGN_MESSAGE_PATH, SPLICE_IN_PATH, SPLICE_OUT_PATH,
35-
SPONTANEOUS_SEND_PATH, UPDATE_CHANNEL_CONFIG_PATH, VERIFY_SIGNATURE_PATH,
32+
FORCE_CLOSE_CHANNEL_PATH, GET_BALANCES_PATH, GET_METRICS_PATH, GET_NODE_INFO_PATH,
33+
GET_PAYMENT_DETAILS_PATH, LIST_CHANNELS_PATH, LIST_FORWARDED_PAYMENTS_PATH, LIST_PAYMENTS_PATH,
34+
ONCHAIN_RECEIVE_PATH, ONCHAIN_SEND_PATH, OPEN_CHANNEL_PATH, SIGN_MESSAGE_PATH, SPLICE_IN_PATH,
35+
SPLICE_OUT_PATH, SPONTANEOUS_SEND_PATH, UPDATE_CHANNEL_CONFIG_PATH, VERIFY_SIGNATURE_PATH,
3636
};
3737
use ldk_server_protos::error::{ErrorCode, ErrorResponse};
38+
use prost::bytes::Bytes;
3839
use prost::Message;
3940
use reqwest::header::CONTENT_TYPE;
4041
use reqwest::{Certificate, Client};
@@ -58,6 +59,11 @@ pub struct LdkServerClient {
5859
api_key: String,
5960
}
6061

62+
enum RequestType {
63+
Get,
64+
Post,
65+
}
66+
6167
impl LdkServerClient {
6268
/// Constructs a [`LdkServerClient`] using `base_url` as the ldk-server endpoint.
6369
///
@@ -103,6 +109,18 @@ impl LdkServerClient {
103109
self.post_request(&request, &url).await
104110
}
105111

112+
/// Retrieve the node metrics in Prometheus format.
113+
pub async fn get_metrics(&self) -> Result<String, LdkServerError> {
114+
let url = format!("https://{}/{GET_METRICS_PATH}", self.base_url);
115+
let payload = self.make_request(&url, RequestType::Get, None, false).await?;
116+
String::from_utf8(payload.to_vec()).map_err(|e| {
117+
LdkServerError::new(
118+
InternalError,
119+
format!("Failed to decode metrics response as string: {}", e),
120+
)
121+
})
122+
}
123+
106124
/// Retrieves an overview of all known balances.
107125
/// For API contract/usage, refer to docs for [`GetBalancesRequest`] and [`GetBalancesResponse`].
108126
pub async fn get_balances(
@@ -314,31 +332,46 @@ impl LdkServerClient {
314332
&self, request: &Rq, url: &str,
315333
) -> Result<Rs, LdkServerError> {
316334
let request_body = request.encode_to_vec();
317-
let auth_header = self.compute_auth_header(&request_body);
318-
let response_raw = self
319-
.client
320-
.post(url)
321-
.header(CONTENT_TYPE, APPLICATION_OCTET_STREAM)
322-
.header("X-Auth", auth_header)
323-
.body(request_body)
324-
.send()
325-
.await
326-
.map_err(|e| {
327-
LdkServerError::new(InternalError, format!("HTTP request failed: {}", e))
328-
})?;
335+
let payload = self.make_request(url, RequestType::Post, Some(request_body), true).await?;
336+
Rs::decode(&payload[..]).map_err(|e| {
337+
LdkServerError::new(InternalError, format!("Failed to decode success response: {}", e))
338+
})
339+
}
340+
341+
async fn make_request(
342+
&self, url: &str, request_type: RequestType, body: Option<Vec<u8>>, authenticated: bool,
343+
) -> Result<Bytes, LdkServerError> {
344+
let builder = match request_type {
345+
RequestType::Get => self.client.get(url),
346+
RequestType::Post => self.client.post(url),
347+
};
348+
349+
let body_for_auth = body.as_deref().unwrap_or(&[]);
350+
351+
let builder = if authenticated {
352+
let auth_header = self.compute_auth_header(body_for_auth);
353+
builder.header("X-Auth", auth_header)
354+
} else {
355+
builder
356+
};
357+
358+
let builder = if let Some(body_content) = body {
359+
builder.header(CONTENT_TYPE, APPLICATION_OCTET_STREAM).body(body_content)
360+
} else {
361+
builder
362+
};
363+
364+
let response_raw = builder.send().await.map_err(|e| {
365+
LdkServerError::new(InternalError, format!("HTTP request failed: {}", e))
366+
})?;
329367

330368
let status = response_raw.status();
331369
let payload = response_raw.bytes().await.map_err(|e| {
332370
LdkServerError::new(InternalError, format!("Failed to read response body: {}", e))
333371
})?;
334372

335373
if status.is_success() {
336-
Ok(Rs::decode(&payload[..]).map_err(|e| {
337-
LdkServerError::new(
338-
InternalError,
339-
format!("Failed to decode success response: {}", e),
340-
)
341-
})?)
374+
Ok(payload)
342375
} else {
343376
let error_response = ErrorResponse::decode(&payload[..]).map_err(|e| {
344377
LdkServerError::new(

ldk-server-protos/src/endpoints.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,4 @@ pub const SPONTANEOUS_SEND_PATH: &str = "SpontaneousSend";
3131
pub const SIGN_MESSAGE_PATH: &str = "SignMessage";
3232
pub const VERIFY_SIGNATURE_PATH: &str = "VerifySignature";
3333
pub const EXPORT_PATHFINDING_SCORES_PATH: &str = "ExportPathfindingScores";
34+
pub const GET_METRICS_PATH: &str = "metrics";

ldk-server/src/main.rs

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ use crate::io::persist::{
5050
use crate::service::NodeService;
5151
use crate::util::config::{load_config, ArgsConfig, ChainSource};
5252
use crate::util::logger::ServerLogger;
53+
use crate::util::metrics::{Metrics, BUILD_METRICS_INTERVAL};
5354
use crate::util::proto_adapter::{forwarded_payment_to_proto, payment_to_proto};
5455
use crate::util::tls::get_or_generate_tls_config;
5556

@@ -256,6 +257,20 @@ fn main() {
256257
}
257258
};
258259
let event_node = Arc::clone(&node);
260+
261+
let metrics_node = Arc::clone(&node);
262+
let mut interval = tokio::time::interval(BUILD_METRICS_INTERVAL);
263+
let metrics = Arc::new(Metrics::new());
264+
let metrics_bg = Arc::clone(&metrics);
265+
let event_metrics = Arc::clone(&metrics);
266+
267+
runtime.spawn(async move {
268+
loop {
269+
interval.tick().await;
270+
metrics_bg.update_all_pollable_metrics(&metrics_node);
271+
}
272+
});
273+
259274
let rest_svc_listener = TcpListener::bind(config_file.rest_service_addr)
260275
.await
261276
.expect("Failed to bind listening port");
@@ -320,6 +335,8 @@ fn main() {
320335
&event_node,
321336
Arc::clone(&event_publisher),
322337
Arc::clone(&paginated_store)).await;
338+
339+
event_metrics.update_total_successful_payments_count(&event_node);
323340
},
324341
Event::PaymentFailed {payment_id, ..} => {
325342
let payment_id = payment_id.expect("PaymentId expected for ldk-server >=0.1");
@@ -331,6 +348,8 @@ fn main() {
331348
&event_node,
332349
Arc::clone(&event_publisher),
333350
Arc::clone(&paginated_store)).await;
351+
352+
event_metrics.update_total_failed_payments_count(&event_node);
334353
},
335354
Event::PaymentClaimable {payment_id, ..} => {
336355
if let Some(payment_details) = event_node.payment(&payment_id) {
@@ -415,7 +434,7 @@ fn main() {
415434
res = rest_svc_listener.accept() => {
416435
match res {
417436
Ok((stream, _)) => {
418-
let node_service = NodeService::new(Arc::clone(&node), Arc::clone(&paginated_store), api_key.clone());
437+
let node_service = NodeService::new(Arc::clone(&node), Arc::clone(&paginated_store), api_key.clone(), Arc::clone(&metrics));
419438
let acceptor = tls_acceptor.clone();
420439
runtime.spawn(async move {
421440
match acceptor.accept(stream).await {

ldk-server/src/service.rs

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@ use ldk_node::Node;
2121
use ldk_server_protos::endpoints::{
2222
BOLT11_RECEIVE_PATH, BOLT11_SEND_PATH, BOLT12_RECEIVE_PATH, BOLT12_SEND_PATH,
2323
CLOSE_CHANNEL_PATH, CONNECT_PEER_PATH, DISCONNECT_PEER_PATH, EXPORT_PATHFINDING_SCORES_PATH,
24-
FORCE_CLOSE_CHANNEL_PATH, GET_BALANCES_PATH, GET_NODE_INFO_PATH, GET_PAYMENT_DETAILS_PATH,
25-
LIST_CHANNELS_PATH, LIST_FORWARDED_PAYMENTS_PATH, LIST_PAYMENTS_PATH, ONCHAIN_RECEIVE_PATH,
26-
ONCHAIN_SEND_PATH, OPEN_CHANNEL_PATH, SIGN_MESSAGE_PATH, SPLICE_IN_PATH, SPLICE_OUT_PATH,
27-
SPONTANEOUS_SEND_PATH, UPDATE_CHANNEL_CONFIG_PATH, VERIFY_SIGNATURE_PATH,
24+
FORCE_CLOSE_CHANNEL_PATH, GET_BALANCES_PATH, GET_METRICS_PATH, GET_NODE_INFO_PATH,
25+
GET_PAYMENT_DETAILS_PATH, LIST_CHANNELS_PATH, LIST_FORWARDED_PAYMENTS_PATH, LIST_PAYMENTS_PATH,
26+
ONCHAIN_RECEIVE_PATH, ONCHAIN_SEND_PATH, OPEN_CHANNEL_PATH, SIGN_MESSAGE_PATH, SPLICE_IN_PATH,
27+
SPLICE_OUT_PATH, SPONTANEOUS_SEND_PATH, UPDATE_CHANNEL_CONFIG_PATH, VERIFY_SIGNATURE_PATH,
2828
};
2929
use prost::Message;
3030

@@ -53,6 +53,7 @@ use crate::api::spontaneous_send::handle_spontaneous_send_request;
5353
use crate::api::update_channel_config::handle_update_channel_config_request;
5454
use crate::api::verify_signature::handle_verify_signature_request;
5555
use crate::io::persist::paginated_kv_store::PaginatedKVStore;
56+
use crate::util::metrics::Metrics;
5657
use crate::util::proto_adapter::to_error_response;
5758

5859
// Maximum request body size: 10 MB
@@ -64,13 +65,15 @@ pub struct NodeService {
6465
node: Arc<Node>,
6566
paginated_kv_store: Arc<dyn PaginatedKVStore>,
6667
api_key: String,
68+
metrics: Arc<Metrics>,
6769
}
6870

6971
impl NodeService {
7072
pub(crate) fn new(
7173
node: Arc<Node>, paginated_kv_store: Arc<dyn PaginatedKVStore>, api_key: String,
74+
metrics: Arc<Metrics>,
7275
) -> Self {
73-
Self { node, paginated_kv_store, api_key }
76+
Self { node, paginated_kv_store, api_key, metrics }
7477
}
7578
}
7679

@@ -154,6 +157,17 @@ impl Service<Request<Incoming>> for NodeService {
154157
type Future = Pin<Box<dyn Future<Output = Result<Self::Response, Self::Error>> + Send>>;
155158

156159
fn call(&self, req: Request<Incoming>) -> Self::Future {
160+
// Handle metrics endpoint separately to bypass auth and return plain text
161+
if req.uri().path().len() > 1 && &req.uri().path()[1..] == GET_METRICS_PATH {
162+
let metrics = Arc::clone(&self.metrics);
163+
return Box::pin(async move {
164+
Ok(Response::builder()
165+
.header("Content-Type", "text/plain")
166+
.body(Full::new(Bytes::from(metrics.gather_metrics())))
167+
.unwrap())
168+
});
169+
}
170+
157171
// Extract auth params from headers (validation happens after body is read)
158172
let auth_params = match extract_auth_params(&req) {
159173
Ok(params) => params,

0 commit comments

Comments
 (0)