Skip to content

Commit f806df2

Browse files
committed
Decouple pending HTLC retry from expiry check timer
Both process_pending_htlcs and handle_expired_htlcs shared a single 5-second sleep, coupling two concerns with very different latency requirements. Expiry checks are distributed-clock housekeeping where seconds don't matter. Pending HTLC retries gate payment completion for serverless SDK clients that reconnect briefly (~20s) during webhook-driven flows — every extra second of retry latency is a second the payer is staring at a spinner. Splitting them into independent select! arms requires switching from sleep to tokio::time::interval. With sleep, the losing arm in a select! is cancelled and recreated each iteration — the 1s pending-HTLC sleep would fire every loop, resetting the 5s expiry sleep before it ever completes, starving handle_expired_htlcs entirely. interval maintains its deadline across select! iterations so both timers tick independently. MissedTickBehavior::Skip avoids burst catch-up if a handler blocks longer than its interval.
1 parent 28ec5c9 commit f806df2

1 file changed

Lines changed: 25 additions & 1 deletion

File tree

src/lib.rs

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,19 @@ use crate::scoring::setup_background_pathfinding_scores_sync;
171171

172172
const HTLC_EXPIRY_CHECK_INTERVAL_SECS: u64 = 5;
173173

174+
/// How often to retry forwarding deferred HTLCs (e.g., after channel_reestablish completes).
175+
///
176+
/// After `peer_connected`, channels are not usable until `channel_reestablish` finishes (~1s).
177+
/// HTLCs that arrive during this window are deferred and retried by `process_pending_htlcs`
178+
/// on this interval. Lower values reduce payment latency for serverless SDK clients that
179+
/// disconnect shortly after reconnecting (typically ~20s for webhook-driven flows).
180+
///
181+
/// Cost per tick: O(connected_peers × stored_htlcs) — acquires a read lock on the peer set
182+
/// and a mutex lock + full scan on the HTLC store per peer. At ~100 peers with a handful of
183+
/// pending HTLCs this is negligible. Profile if connected peer count exceeds ~10k, and
184+
/// consider indexing pending HTLCs by peer at that point rather than scanning.
185+
const PENDING_HTLC_RETRY_INTERVAL_SECS: u64 = 1;
186+
174187
#[cfg(feature = "uniffi")]
175188
uniffi::include_scaffolding!("ldk_node");
176189

@@ -625,6 +638,15 @@ impl Node {
625638
let liquidity_handler = Arc::clone(&liquidity_source);
626639
let liquidity_logger = Arc::clone(&self.logger);
627640
self.runtime.spawn_background_task(async move {
641+
let mut pending_htlc_interval =
642+
tokio::time::interval(Duration::from_secs(PENDING_HTLC_RETRY_INTERVAL_SECS));
643+
let mut expiry_check_interval =
644+
tokio::time::interval(Duration::from_secs(HTLC_EXPIRY_CHECK_INTERVAL_SECS));
645+
pending_htlc_interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
646+
expiry_check_interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
647+
// First tick fires immediately; consume it so we don't run at t=0.
648+
pending_htlc_interval.tick().await;
649+
expiry_check_interval.tick().await;
628650
loop {
629651
tokio::select! {
630652
_ = stop_liquidity_handler.changed() => {
@@ -634,8 +656,10 @@ impl Node {
634656
);
635657
return;
636658
}
637-
_ = tokio::time::sleep(Duration::from_secs(HTLC_EXPIRY_CHECK_INTERVAL_SECS)) => {
659+
_ = pending_htlc_interval.tick() => {
638660
liquidity_handler.process_pending_htlcs();
661+
}
662+
_ = expiry_check_interval.tick() => {
639663
liquidity_handler.handle_expired_htlcs().await;
640664
}
641665
_ = liquidity_handler.handle_next_event() => {}

0 commit comments

Comments
 (0)