Skip to content

Commit d1065db

Browse files
committed
Read the RGS sync timestamp from the network graph
Previously, after each successful Rapid Gossip Sync update the background task wrote `latest_rgs_snapshot_timestamp` to the persisted `NodeMetrics` immediately, while the network graph itself is only flushed to disk later by LDK's background processor. A crash in that window left the on-disk metric ahead of the on-disk graph — on restart we'd resume RGS from the newer timestamp and permanently skip the updates that were never persisted together with the graph. Instead, seed the RGS start timestamp from `NetworkGraph::get_last_rapid_gossip_sync_timestamp`, which is part of the graph's own serialized state and therefore lands on disk atomically with the channel updates it describes. The same source now backs the RGS timestamp reported via `NodeStatus::latest_rgs_snapshot_timestamp`, so the reported value always matches what's reflected in the graph. Worst case after a crash is that we refetch the snapshots since the last persisted graph — an idempotent operation — rather than silently losing them. The `latest_rgs_snapshot_timestamp` field is retired from `NodeMetrics`, and TLV slot 6 is kept readable for backwards compatibility via LDK's `legacy` TLV grammar. Old persisted records still deserialize; new records no longer carry slot 6. The dead "reset RGS timestamp on gossip-source switch" block in the P2P builder branch also goes away, since the graph's timestamp remains the correct resume point across a P2P→RGS switch. Co-Authored-By: HAL 9000
1 parent 79cfe6f commit d1065db

2 files changed

Lines changed: 57 additions & 26 deletions

File tree

src/builder.rs

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ use crate::io::sqlite_store::SqliteStore;
5757
use crate::io::utils::{
5858
read_event_queue, read_external_pathfinding_scores_from_cache, read_network_graph,
5959
read_node_metrics, read_output_sweeper, read_payments, read_peer_info, read_pending_payments,
60-
read_scorer, update_and_persist_node_metrics,
60+
read_scorer,
6161
};
6262
use crate::io::vss_store::VssStoreBuilder;
6363
use crate::io::{
@@ -1770,19 +1770,11 @@ fn build_with_store_internal(
17701770
Arc::clone(&logger),
17711771
));
17721772

1773-
// Reset the RGS sync timestamp in case we somehow switch gossip sources
1774-
update_and_persist_node_metrics(&node_metrics, &*kv_store, Arc::clone(&logger), |m| {
1775-
m.latest_rgs_snapshot_timestamp = None
1776-
})
1777-
.map_err(|e| {
1778-
log_error!(logger, "Failed writing to store: {}", e);
1779-
BuildError::WriteFailed
1780-
})?;
17811773
p2p_source
17821774
},
17831775
GossipSourceConfig::RapidGossipSync(rgs_server) => {
17841776
let latest_sync_timestamp =
1785-
node_metrics.read().expect("lock").latest_rgs_snapshot_timestamp.unwrap_or(0);
1777+
network_graph.get_last_rapid_gossip_sync_timestamp().unwrap_or(0);
17861778
Arc::new(GossipSource::new_rgs(
17871779
rgs_server.clone(),
17881780
latest_sync_timestamp,

src/lib.rs

Lines changed: 55 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -297,9 +297,7 @@ impl Node {
297297

298298
if self.gossip_source.is_rgs() {
299299
let gossip_source = Arc::clone(&self.gossip_source);
300-
let gossip_sync_store = Arc::clone(&self.kv_store);
301300
let gossip_sync_logger = Arc::clone(&self.logger);
302-
let gossip_node_metrics = Arc::clone(&self.node_metrics);
303301
let mut stop_gossip_sync = self.stop_sender.subscribe();
304302
self.runtime.spawn_cancellable_background_task(async move {
305303
let mut interval = tokio::time::interval(RGS_SYNC_INTERVAL);
@@ -315,21 +313,12 @@ impl Node {
315313
_ = interval.tick() => {
316314
let now = Instant::now();
317315
match gossip_source.update_rgs_snapshot().await {
318-
Ok(updated_timestamp) => {
316+
Ok(_updated_timestamp) => {
319317
log_info!(
320318
gossip_sync_logger,
321319
"Background sync of RGS gossip data finished in {}ms.",
322320
now.elapsed().as_millis()
323321
);
324-
update_and_persist_node_metrics(
325-
&gossip_node_metrics,
326-
&*gossip_sync_store,
327-
Arc::clone(&gossip_sync_logger),
328-
|m| m.latest_rgs_snapshot_timestamp = Some(updated_timestamp),
329-
)
330-
.unwrap_or_else(|e| {
331-
log_error!(gossip_sync_logger, "Persistence failed: {}", e);
332-
});
333322
}
334323
Err(e) => {
335324
log_error!(
@@ -780,7 +769,7 @@ impl Node {
780769
let latest_fee_rate_cache_update_timestamp =
781770
locked_node_metrics.latest_fee_rate_cache_update_timestamp;
782771
let latest_rgs_snapshot_timestamp =
783-
locked_node_metrics.latest_rgs_snapshot_timestamp.map(|val| val as u64);
772+
self.network_graph.get_last_rapid_gossip_sync_timestamp().map(|val| val as u64);
784773
let latest_pathfinding_scores_sync_timestamp =
785774
locked_node_metrics.latest_pathfinding_scores_sync_timestamp;
786775
let latest_node_announcement_broadcast_timestamp =
@@ -2110,7 +2099,6 @@ pub(crate) struct NodeMetrics {
21102099
latest_lightning_wallet_sync_timestamp: Option<u64>,
21112100
latest_onchain_wallet_sync_timestamp: Option<u64>,
21122101
latest_fee_rate_cache_update_timestamp: Option<u64>,
2113-
latest_rgs_snapshot_timestamp: Option<u32>,
21142102
latest_pathfinding_scores_sync_timestamp: Option<u64>,
21152103
latest_node_announcement_broadcast_timestamp: Option<u64>,
21162104
}
@@ -2121,7 +2109,6 @@ impl Default for NodeMetrics {
21212109
latest_lightning_wallet_sync_timestamp: None,
21222110
latest_onchain_wallet_sync_timestamp: None,
21232111
latest_fee_rate_cache_update_timestamp: None,
2124-
latest_rgs_snapshot_timestamp: None,
21252112
latest_pathfinding_scores_sync_timestamp: None,
21262113
latest_node_announcement_broadcast_timestamp: None,
21272114
}
@@ -2133,7 +2120,8 @@ impl_writeable_tlv_based!(NodeMetrics, {
21332120
(1, latest_pathfinding_scores_sync_timestamp, option),
21342121
(2, latest_onchain_wallet_sync_timestamp, option),
21352122
(4, latest_fee_rate_cache_update_timestamp, option),
2136-
(6, latest_rgs_snapshot_timestamp, option),
2123+
// 6 used to be latest_rgs_snapshot_timestamp
2124+
(6, _legacy_latest_rgs_snapshot_timestamp, (legacy, u32, |_| Ok(()), |_: &NodeMetrics| None::<Option<u32>> )),
21372125
(8, latest_node_announcement_broadcast_timestamp, option),
21382126
// 10 used to be latest_channel_monitor_archival_height
21392127
(10, _legacy_latest_channel_monitor_archival_height, (legacy, u32, |_| Ok(()), |_: &NodeMetrics| None::<Option<u32>> )),
@@ -2174,3 +2162,54 @@ pub(crate) fn new_channel_anchor_reserve_sats(
21742162
}
21752163
})
21762164
}
2165+
2166+
#[cfg(test)]
2167+
mod tests {
2168+
use super::*;
2169+
use lightning::util::ser::{Readable, Writeable};
2170+
2171+
#[test]
2172+
fn node_metrics_reads_legacy_rgs_snapshot_timestamp() {
2173+
// Pre-#615, `NodeMetrics` persisted `latest_rgs_snapshot_timestamp` as an optional
2174+
// `u32` at TLV slot 6. The field has since been retired, but we must still read
2175+
// records written by older versions without failing. The shadow struct below
2176+
// mirrors main's `NodeMetrics` layout 1:1 so the byte stream we decode matches
2177+
// what an older on-disk record actually looked like.
2178+
#[derive(Debug)]
2179+
struct OldNodeMetrics {
2180+
latest_lightning_wallet_sync_timestamp: Option<u64>,
2181+
latest_onchain_wallet_sync_timestamp: Option<u64>,
2182+
latest_fee_rate_cache_update_timestamp: Option<u64>,
2183+
latest_rgs_snapshot_timestamp: Option<u32>,
2184+
latest_pathfinding_scores_sync_timestamp: Option<u64>,
2185+
latest_node_announcement_broadcast_timestamp: Option<u64>,
2186+
}
2187+
impl_writeable_tlv_based!(OldNodeMetrics, {
2188+
(0, latest_lightning_wallet_sync_timestamp, option),
2189+
(1, latest_pathfinding_scores_sync_timestamp, option),
2190+
(2, latest_onchain_wallet_sync_timestamp, option),
2191+
(4, latest_fee_rate_cache_update_timestamp, option),
2192+
(6, latest_rgs_snapshot_timestamp, option),
2193+
(8, latest_node_announcement_broadcast_timestamp, option),
2194+
// 10 used to be latest_channel_monitor_archival_height
2195+
(10, _legacy_latest_channel_monitor_archival_height, (legacy, u32, |_| Ok(()), |_: &OldNodeMetrics| None::<Option<u32>> )),
2196+
});
2197+
2198+
let old = OldNodeMetrics {
2199+
latest_lightning_wallet_sync_timestamp: Some(1_000),
2200+
latest_onchain_wallet_sync_timestamp: Some(1_100),
2201+
latest_fee_rate_cache_update_timestamp: Some(1_200),
2202+
latest_rgs_snapshot_timestamp: Some(1_700_000_000),
2203+
latest_pathfinding_scores_sync_timestamp: Some(1_300),
2204+
latest_node_announcement_broadcast_timestamp: Some(2_000),
2205+
};
2206+
let bytes = old.encode();
2207+
2208+
let new = NodeMetrics::read(&mut &bytes[..]).unwrap();
2209+
assert_eq!(new.latest_lightning_wallet_sync_timestamp, Some(1_000));
2210+
assert_eq!(new.latest_onchain_wallet_sync_timestamp, Some(1_100));
2211+
assert_eq!(new.latest_fee_rate_cache_update_timestamp, Some(1_200));
2212+
assert_eq!(new.latest_pathfinding_scores_sync_timestamp, Some(1_300));
2213+
assert_eq!(new.latest_node_announcement_broadcast_timestamp, Some(2_000));
2214+
}
2215+
}

0 commit comments

Comments
 (0)