Add probing test of state recovery after restart

randomlogin · randomlogin · commit 9e00279cf789 · 2026-05-10T16:59:15.000+02:00
diff --git a/tests/probing_tests.rs b/tests/probing_tests.rs
@@ -9,14 +9,19 @@
 //   exhausted_probe_budget_blocks_new_probes
 //      Samples locked_msat across multiple probe cycles and asserts it never
 //      exceeds the configured max_locked_msat budget cap.
+//
+//   probing_budget_restored_after_node_restart
+//      Dispatches a probe, then stops node_b before the failure can propagate
+//      back so the pending probe HTLC is preserved. Restarts node_a and asserts
+//      the prober's locked_msat is rebuilt non-zero from list_recent_payments().
 
 mod common;
 use std::sync::atomic::{AtomicBool, Ordering};
 
 use common::{
 	expect_channel_ready_event, expect_event, generate_blocks_and_wait, open_channel,
 	premine_and_distribute_funds, random_chain_source, random_config, setup_bitcoind_and_electrsd,
-	setup_node, wait_for_channel_ready_to_send, TestNode,
+	setup_node, wait_for_channel_ready_to_send, TestNode, TestStoreType,
 };
 
 use ldk_node::bitcoin::Amount;
@@ -29,7 +34,7 @@ use std::sync::{Arc, Mutex};
 use std::time::Duration;
 
 const PROBE_AMOUNT_MSAT: u64 = 1_000_000;
-const PROBING_INTERVAL_MILLISECONDS: u64 = 500;
+const PROBING_INTERVAL_MILLISECONDS: u64 = 100;
 
 /// FixedPathStrategy — returns a fixed pre-built path; used by budget tests.
 ///
@@ -195,6 +200,118 @@ async fn probe_budget_increments_and_decrements() {
 	node_c.stop().unwrap();
 }
 
+/// Verifies that `locked_msat` is restored after the node is stopped and restarted
+/// while a probe is still in flight.
+///
+/// Race-sensitive: once a probe is dispatched, the failure round-trip
+/// (`A→B→C → C fails back → B → A`) resolves it within milliseconds. To keep the
+/// HTLC pending across the restart we observe `locked_msat > 0` and then *immediately*
+/// call `node_a.disconnect(node_b)`, which closes A's socket to B in-process — much
+/// faster than `node_b.stop()` — so any failure message from B is dropped before A
+/// processes it. If the race is lost on a given probe (locked_msat drops back to 0
+/// after the disconnect), we reconnect and let the next probe tick try again.
+/// The pending Probe entry persists in `node_a`'s channel manager and must be
+/// rebuilt by the prober's `locked_msat` on restart via `list_recent_payments()`.
+#[tokio::test(flavor = "multi_thread")]
+async fn probing_budget_restored_after_node_restart() {
+	let (bitcoind, electrsd) = setup_bitcoind_and_electrsd();
+	let chain_source = random_chain_source(&bitcoind, &electrsd);
+
+	let node_b = setup_node(&chain_source, random_config(false));
+	let node_c = setup_node(&chain_source, random_config(false));
+
+	let mut config_a = random_config(false);
+	// Use a pure on-disk store so state survives the restart.
+	config_a.store_type = TestStoreType::Sqlite;
+	let strategy = FixedPathStrategy::new();
+	config_a.probing = Some(
+		ProbingConfigBuilder::custom(strategy.clone())
+			.interval(Duration::from_millis(PROBING_INTERVAL_MILLISECONDS))
+			.max_locked_msat(10 * PROBE_AMOUNT_MSAT)
+			.build(),
+	);
+	let restart_config = config_a.clone();
+	let node_a = setup_node(&chain_source, config_a);
+
+	let addr_a = node_a.onchain_payment().new_address().unwrap();
+	let addr_b = node_b.onchain_payment().new_address().unwrap();
+	premine_and_distribute_funds(
+		&bitcoind.client,
+		&electrsd.client,
+		vec![addr_a, addr_b],
+		Amount::from_sat(2_000_000),
+	)
+	.await;
+	node_a.sync_wallets().unwrap();
+	node_b.sync_wallets().unwrap();
+
+	open_channel(&node_a, &node_b, 1_000_000, true, &electrsd).await;
+	generate_blocks_and_wait(&bitcoind.client, &electrsd.client, 1).await;
+	node_b.sync_wallets().unwrap();
+	open_channel(&node_b, &node_c, 1_000_000, true, &electrsd).await;
+	generate_blocks_and_wait(&bitcoind.client, &electrsd.client, 6).await;
+
+	node_a.sync_wallets().unwrap();
+	node_b.sync_wallets().unwrap();
+	node_c.sync_wallets().unwrap();
+
+	expect_channel_ready_event!(node_a, node_b.node_id());
+	expect_event!(node_b, ChannelReady);
+	expect_event!(node_b, ChannelReady);
+	expect_event!(node_c, ChannelReady);
+
+	strategy.set_path(build_probe_path(&node_a, &node_b, &node_c, PROBE_AMOUNT_MSAT));
+	wait_for_channel_ready_to_send(&node_a, &node_b, PROBE_AMOUNT_MSAT + 1000).await;
+	wait_for_channel_ready_to_send(&node_b, &node_c, PROBE_AMOUNT_MSAT).await;
+
+	let node_b_id = node_b.node_id();
+	let node_b_addr = node_b.listening_addresses().unwrap().into_iter().next().unwrap();
+
+	strategy.start_probing();
+
+	// Dispatch a probe and isolate node_a from node_b before the failure can
+	// propagate back. Tight polling + in-process disconnect minimises the race
+	// window; on a lost race we reconnect and let the prober's next tick try.
+	let isolated = tokio::time::timeout(Duration::from_secs(30), async {
+		loop {
+			if node_a.prober().unwrap().locked_msat() > 0 {
+				node_a.disconnect(node_b_id).ok();
+				if node_a.prober().unwrap().locked_msat() > 0 {
+					return true;
+				}
+				node_a.connect(node_b_id, node_b_addr.clone(), false).ok();
+			}
+			tokio::time::sleep(Duration::from_millis(1)).await;
+		}
+	})
+	.await
+	.unwrap_or(false);
+	assert!(isolated, "could not preserve in-flight probe long enough to restart");
+	strategy.stop_probing();
+
+	let locked_before = node_a.prober().unwrap().locked_msat();
+	println!("Before restart: locked_msat = {}", locked_before);
+	assert!(locked_before > 0, "probe resolved before we could isolate node_a — flaky timing");
+
+	node_a.stop().unwrap();
+
+	// Restart node_a from the same persisted state.
+	let node_a = setup_node(&chain_source, restart_config);
+
+	let locked_after = node_a.prober().unwrap().locked_msat();
+	println!("After restart:  locked_msat = {}", locked_after);
+	assert!(
+		locked_after > 0,
+		"locked_msat was not restored after restart (before={} after={})",
+		locked_before,
+		locked_after
+	);
+
+	node_a.stop().unwrap();
+	node_b.stop().unwrap();
+	node_c.stop().unwrap();
+}
+
 /// Verifies that `locked_msat` never exceeds `max_locked_msat` across multiple probe cycles.
 #[tokio::test(flavor = "multi_thread")]
 async fn exhausted_probe_budget_blocks_new_probes() {