Skip to content

Commit 9e00279

Browse files
committed
Add probing test of state recovery after restart
1 parent a635fb3 commit 9e00279

1 file changed

Lines changed: 119 additions & 2 deletions

File tree

tests/probing_tests.rs

Lines changed: 119 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,19 @@
99
// exhausted_probe_budget_blocks_new_probes
1010
// Samples locked_msat across multiple probe cycles and asserts it never
1111
// exceeds the configured max_locked_msat budget cap.
12+
//
13+
// probing_budget_restored_after_node_restart
14+
// Dispatches a probe, then stops node_b before the failure can propagate
15+
// back so the pending probe HTLC is preserved. Restarts node_a and asserts
16+
// the prober's locked_msat is rebuilt non-zero from list_recent_payments().
1217

1318
mod common;
1419
use std::sync::atomic::{AtomicBool, Ordering};
1520

1621
use common::{
1722
expect_channel_ready_event, expect_event, generate_blocks_and_wait, open_channel,
1823
premine_and_distribute_funds, random_chain_source, random_config, setup_bitcoind_and_electrsd,
19-
setup_node, wait_for_channel_ready_to_send, TestNode,
24+
setup_node, wait_for_channel_ready_to_send, TestNode, TestStoreType,
2025
};
2126

2227
use ldk_node::bitcoin::Amount;
@@ -29,7 +34,7 @@ use std::sync::{Arc, Mutex};
2934
use std::time::Duration;
3035

3136
const PROBE_AMOUNT_MSAT: u64 = 1_000_000;
32-
const PROBING_INTERVAL_MILLISECONDS: u64 = 500;
37+
const PROBING_INTERVAL_MILLISECONDS: u64 = 100;
3338

3439
/// FixedPathStrategy — returns a fixed pre-built path; used by budget tests.
3540
///
@@ -195,6 +200,118 @@ async fn probe_budget_increments_and_decrements() {
195200
node_c.stop().unwrap();
196201
}
197202

203+
/// Verifies that `locked_msat` is restored after the node is stopped and restarted
204+
/// while a probe is still in flight.
205+
///
206+
/// Race-sensitive: once a probe is dispatched, the failure round-trip
207+
/// (`A→B→C → C fails back → B → A`) resolves it within milliseconds. To keep the
208+
/// HTLC pending across the restart we observe `locked_msat > 0` and then *immediately*
209+
/// call `node_a.disconnect(node_b)`, which closes A's socket to B in-process — much
210+
/// faster than `node_b.stop()` — so any failure message from B is dropped before A
211+
/// processes it. If the race is lost on a given probe (locked_msat drops back to 0
212+
/// after the disconnect), we reconnect and let the next probe tick try again.
213+
/// The pending Probe entry persists in `node_a`'s channel manager and must be
214+
/// rebuilt by the prober's `locked_msat` on restart via `list_recent_payments()`.
215+
#[tokio::test(flavor = "multi_thread")]
216+
async fn probing_budget_restored_after_node_restart() {
217+
let (bitcoind, electrsd) = setup_bitcoind_and_electrsd();
218+
let chain_source = random_chain_source(&bitcoind, &electrsd);
219+
220+
let node_b = setup_node(&chain_source, random_config(false));
221+
let node_c = setup_node(&chain_source, random_config(false));
222+
223+
let mut config_a = random_config(false);
224+
// Use a pure on-disk store so state survives the restart.
225+
config_a.store_type = TestStoreType::Sqlite;
226+
let strategy = FixedPathStrategy::new();
227+
config_a.probing = Some(
228+
ProbingConfigBuilder::custom(strategy.clone())
229+
.interval(Duration::from_millis(PROBING_INTERVAL_MILLISECONDS))
230+
.max_locked_msat(10 * PROBE_AMOUNT_MSAT)
231+
.build(),
232+
);
233+
let restart_config = config_a.clone();
234+
let node_a = setup_node(&chain_source, config_a);
235+
236+
let addr_a = node_a.onchain_payment().new_address().unwrap();
237+
let addr_b = node_b.onchain_payment().new_address().unwrap();
238+
premine_and_distribute_funds(
239+
&bitcoind.client,
240+
&electrsd.client,
241+
vec![addr_a, addr_b],
242+
Amount::from_sat(2_000_000),
243+
)
244+
.await;
245+
node_a.sync_wallets().unwrap();
246+
node_b.sync_wallets().unwrap();
247+
248+
open_channel(&node_a, &node_b, 1_000_000, true, &electrsd).await;
249+
generate_blocks_and_wait(&bitcoind.client, &electrsd.client, 1).await;
250+
node_b.sync_wallets().unwrap();
251+
open_channel(&node_b, &node_c, 1_000_000, true, &electrsd).await;
252+
generate_blocks_and_wait(&bitcoind.client, &electrsd.client, 6).await;
253+
254+
node_a.sync_wallets().unwrap();
255+
node_b.sync_wallets().unwrap();
256+
node_c.sync_wallets().unwrap();
257+
258+
expect_channel_ready_event!(node_a, node_b.node_id());
259+
expect_event!(node_b, ChannelReady);
260+
expect_event!(node_b, ChannelReady);
261+
expect_event!(node_c, ChannelReady);
262+
263+
strategy.set_path(build_probe_path(&node_a, &node_b, &node_c, PROBE_AMOUNT_MSAT));
264+
wait_for_channel_ready_to_send(&node_a, &node_b, PROBE_AMOUNT_MSAT + 1000).await;
265+
wait_for_channel_ready_to_send(&node_b, &node_c, PROBE_AMOUNT_MSAT).await;
266+
267+
let node_b_id = node_b.node_id();
268+
let node_b_addr = node_b.listening_addresses().unwrap().into_iter().next().unwrap();
269+
270+
strategy.start_probing();
271+
272+
// Dispatch a probe and isolate node_a from node_b before the failure can
273+
// propagate back. Tight polling + in-process disconnect minimises the race
274+
// window; on a lost race we reconnect and let the prober's next tick try.
275+
let isolated = tokio::time::timeout(Duration::from_secs(30), async {
276+
loop {
277+
if node_a.prober().unwrap().locked_msat() > 0 {
278+
node_a.disconnect(node_b_id).ok();
279+
if node_a.prober().unwrap().locked_msat() > 0 {
280+
return true;
281+
}
282+
node_a.connect(node_b_id, node_b_addr.clone(), false).ok();
283+
}
284+
tokio::time::sleep(Duration::from_millis(1)).await;
285+
}
286+
})
287+
.await
288+
.unwrap_or(false);
289+
assert!(isolated, "could not preserve in-flight probe long enough to restart");
290+
strategy.stop_probing();
291+
292+
let locked_before = node_a.prober().unwrap().locked_msat();
293+
println!("Before restart: locked_msat = {}", locked_before);
294+
assert!(locked_before > 0, "probe resolved before we could isolate node_a — flaky timing");
295+
296+
node_a.stop().unwrap();
297+
298+
// Restart node_a from the same persisted state.
299+
let node_a = setup_node(&chain_source, restart_config);
300+
301+
let locked_after = node_a.prober().unwrap().locked_msat();
302+
println!("After restart: locked_msat = {}", locked_after);
303+
assert!(
304+
locked_after > 0,
305+
"locked_msat was not restored after restart (before={} after={})",
306+
locked_before,
307+
locked_after
308+
);
309+
310+
node_a.stop().unwrap();
311+
node_b.stop().unwrap();
312+
node_c.stop().unwrap();
313+
}
314+
198315
/// Verifies that `locked_msat` never exceeds `max_locked_msat` across multiple probe cycles.
199316
#[tokio::test(flavor = "multi_thread")]
200317
async fn exhausted_probe_budget_blocks_new_probes() {

0 commit comments

Comments
 (0)