Skip to content

Commit 1050ed9

Browse files
authored
Merge pull request #4436 from joostjager/revert-mixed-mode-check
Replace dual-sync-async persistence panic with Watch contract
2 parents 20e240d + f8a955c commit 1050ed9

File tree

8 files changed

+238
-56
lines changed

8 files changed

+238
-56
lines changed

lightning/src/chain/chainmonitor.rs

Lines changed: 42 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1250,7 +1250,41 @@ where
12501250
}
12511251
}
12521252

1253-
if update_res.is_err() {
1253+
debug_assert!(
1254+
update_res.is_ok() || monitor.no_further_updates_allowed(),
1255+
"update_monitor returned Err but channel is not post-close",
1256+
);
1257+
1258+
// We also check update_res.is_err() as a defensive measure: an
1259+
// error should only occur on a post-close monitor (validated by
1260+
// the debug_assert above), but we defer here regardless to avoid
1261+
// returning Completed for a failed update.
1262+
if (update_res.is_err() || monitor.no_further_updates_allowed())
1263+
&& persist_res == ChannelMonitorUpdateStatus::Completed
1264+
{
1265+
// The channel is post-close (funding spend seen, lockdown, or
1266+
// holder tx signed). Return InProgress so ChannelManager freezes
1267+
// the channel until the force-close MonitorEvents are processed.
1268+
// Push a Completed event into pending_monitor_events so it gets
1269+
// picked up after the per-monitor events in the next
1270+
// release_pending_monitor_events call.
1271+
let funding_txo = monitor.get_funding_txo();
1272+
let channel_id = monitor.channel_id();
1273+
self.pending_monitor_events.lock().unwrap().push((
1274+
funding_txo,
1275+
channel_id,
1276+
vec![MonitorEvent::Completed {
1277+
funding_txo,
1278+
channel_id,
1279+
monitor_update_id: monitor.get_latest_update_id(),
1280+
}],
1281+
monitor.get_counterparty_node_id(),
1282+
));
1283+
log_debug!(
1284+
logger,
1285+
"Deferring completion of ChannelMonitorUpdate id {:?} (channel is post-close)",
1286+
update_id,
1287+
);
12541288
ChannelMonitorUpdateStatus::InProgress
12551289
} else {
12561290
persist_res
@@ -1614,8 +1648,9 @@ where
16141648
for (channel_id, update_id) in self.persister.get_and_clear_completed_updates() {
16151649
let _ = self.channel_monitor_updated(channel_id, update_id);
16161650
}
1617-
let mut pending_monitor_events = self.pending_monitor_events.lock().unwrap().split_off(0);
1618-
for monitor_state in self.monitors.read().unwrap().values() {
1651+
let monitors = self.monitors.read().unwrap();
1652+
let mut pending_monitor_events = Vec::new();
1653+
for monitor_state in monitors.values() {
16191654
let monitor_events = monitor_state.monitor.get_and_clear_pending_monitor_events();
16201655
if monitor_events.len() > 0 {
16211656
let monitor_funding_txo = monitor_state.monitor.get_funding_txo();
@@ -1629,6 +1664,10 @@ where
16291664
));
16301665
}
16311666
}
1667+
// Drain pending_monitor_events (which includes deferred post-close
1668+
// completions) after per-monitor events so that force-close
1669+
// MonitorEvents are processed by ChannelManager first.
1670+
pending_monitor_events.extend(self.pending_monitor_events.lock().unwrap().split_off(0));
16321671
pending_monitor_events
16331672
}
16341673
}

lightning/src/chain/channelmonitor.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7048,6 +7048,7 @@ mod tests {
70487048
let legacy_cfg = test_legacy_channel_config();
70497049
let node_chanmgrs = create_node_chanmgrs(3, &node_cfgs, &[Some(legacy_cfg.clone()), Some(legacy_cfg.clone()), Some(legacy_cfg)]);
70507050
let nodes = create_network(3, &node_cfgs, &node_chanmgrs);
7051+
nodes[1].disable_monitor_completeness_assertion();
70517052
let channel = create_announced_chan_between_nodes(&nodes, 0, 1);
70527053
create_announced_chan_between_nodes(&nodes, 1, 2);
70537054

lightning/src/chain/mod.rs

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -233,11 +233,10 @@ pub enum ChannelMonitorUpdateStatus {
233233
/// This includes performing any `fsync()` calls required to ensure the update is guaranteed to
234234
/// be available on restart even if the application crashes.
235235
///
236-
/// If you return this variant, you cannot later return [`InProgress`] from the same instance of
237-
/// [`Persist`]/[`Watch`] without first restarting.
236+
/// You cannot switch from [`InProgress`] to this variant for the same channel without first
237+
/// restarting. However, switching from this variant to [`InProgress`] is always allowed.
238238
///
239239
/// [`InProgress`]: ChannelMonitorUpdateStatus::InProgress
240-
/// [`Persist`]: chainmonitor::Persist
241240
Completed,
242241
/// Indicates that the update will happen asynchronously in the background or that a transient
243242
/// failure occurred which is being retried in the background and will eventually complete.
@@ -263,12 +262,7 @@ pub enum ChannelMonitorUpdateStatus {
263262
/// reliable, this feature is considered beta, and a handful of edge-cases remain. Until the
264263
/// remaining cases are fixed, in rare cases, *using this feature may lead to funds loss*.
265264
///
266-
/// If you return this variant, you cannot later return [`Completed`] from the same instance of
267-
/// [`Persist`]/[`Watch`] without first restarting.
268-
///
269265
/// [`InProgress`]: ChannelMonitorUpdateStatus::InProgress
270-
/// [`Completed`]: ChannelMonitorUpdateStatus::Completed
271-
/// [`Persist`]: chainmonitor::Persist
272266
InProgress,
273267
/// Indicates that an update has failed and will not complete at any point in the future.
274268
///
@@ -328,6 +322,8 @@ pub trait Watch<ChannelSigner: EcdsaChannelSigner> {
328322
/// cannot be retried, the node should shut down immediately after returning
329323
/// [`ChannelMonitorUpdateStatus::UnrecoverableError`], see its documentation for more info.
330324
///
325+
/// See [`ChannelMonitorUpdateStatus`] for requirements on when each variant may be returned.
326+
///
331327
/// [`ChannelManager`]: crate::ln::channelmanager::ChannelManager
332328
fn update_channel(
333329
&self, channel_id: ChannelId, update: &ChannelMonitorUpdate,

lightning/src/ln/chanmon_update_fail_tests.rs

Lines changed: 159 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ use crate::chain::chaininterface::LowerBoundedFeeEstimator;
1616
use crate::chain::chainmonitor::ChainMonitor;
1717
use crate::chain::channelmonitor::{ChannelMonitor, MonitorEvent, ANTI_REORG_DELAY};
1818
use crate::chain::transaction::OutPoint;
19-
use crate::chain::{ChannelMonitorUpdateStatus, Listen, Watch};
19+
use crate::chain::{ChannelMonitorUpdateStatus, Confirm, Listen, Watch};
2020
use crate::events::{ClosureReason, Event, HTLCHandlingFailureType, PaymentPurpose};
2121
use crate::ln::channel::AnnouncementSigsState;
2222
use crate::ln::channelmanager::{PaymentId, RAACommitmentOrder};
@@ -176,6 +176,7 @@ fn do_test_simple_monitor_temporary_update_fail(disconnect: bool) {
176176
let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
177177
let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
178178
let mut nodes = create_network(2, &node_cfgs, &node_chanmgrs);
179+
nodes[0].disable_monitor_completeness_assertion();
179180

180181
let node_a_id = nodes[0].node.get_our_node_id();
181182
let node_b_id = nodes[1].node.get_our_node_id();
@@ -317,6 +318,7 @@ fn do_test_monitor_temporary_update_fail(disconnect_count: usize) {
317318
let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
318319
let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
319320
let mut nodes = create_network(2, &node_cfgs, &node_chanmgrs);
321+
nodes[0].disable_monitor_completeness_assertion();
320322

321323
let node_a_id = nodes[0].node.get_our_node_id();
322324
let node_b_id = nodes[1].node.get_our_node_id();
@@ -970,6 +972,7 @@ fn do_test_monitor_update_fail_raa(test_ignore_second_cs: bool) {
970972
let node_cfgs = create_node_cfgs(3, &chanmon_cfgs);
971973
let node_chanmgrs = create_node_chanmgrs(3, &node_cfgs, &[None, None, None]);
972974
let mut nodes = create_network(3, &node_cfgs, &node_chanmgrs);
975+
nodes[1].disable_monitor_completeness_assertion();
973976

974977
let node_a_id = nodes[0].node.get_our_node_id();
975978
let node_b_id = nodes[1].node.get_our_node_id();
@@ -1501,6 +1504,7 @@ fn claim_while_disconnected_monitor_update_fail() {
15011504
let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
15021505
let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
15031506
let mut nodes = create_network(2, &node_cfgs, &node_chanmgrs);
1507+
nodes[1].disable_monitor_completeness_assertion();
15041508

15051509
let node_a_id = nodes[0].node.get_our_node_id();
15061510
let node_b_id = nodes[1].node.get_our_node_id();
@@ -1728,6 +1732,7 @@ fn first_message_on_recv_ordering() {
17281732
let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
17291733
let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
17301734
let mut nodes = create_network(2, &node_cfgs, &node_chanmgrs);
1735+
nodes[1].disable_monitor_completeness_assertion();
17311736

17321737
let node_a_id = nodes[0].node.get_our_node_id();
17331738
let node_b_id = nodes[1].node.get_our_node_id();
@@ -3850,6 +3855,7 @@ fn do_test_durable_preimages_on_closed_channel(
38503855
// Now reload node B
38513856
let manager_b = nodes[1].node.encode();
38523857
reload_node!(nodes[1], &manager_b, &[&mon_ab, &mon_bc], persister, chain_mon, node_b_reload);
3858+
nodes[1].disable_monitor_completeness_assertion();
38533859

38543860
nodes[0].node.peer_disconnected(node_b_id);
38553861
nodes[2].node.peer_disconnected(node_b_id);
@@ -3899,11 +3905,28 @@ fn do_test_durable_preimages_on_closed_channel(
38993905
}
39003906
if !close_chans_before_reload {
39013907
check_closed_broadcast(&nodes[1], 1, false);
3902-
let reason = ClosureReason::CommitmentTxConfirmed;
3903-
check_closed_event(&nodes[1], 1, reason, &[node_a_id], 100000);
3908+
// When hold=false, get_and_clear_pending_events also triggers
3909+
// process_background_events (replaying the preimage and force-close updates)
3910+
// and resolves the deferred completions, firing PaymentForwarded alongside
3911+
// ChannelClosed. When hold=true, only ChannelClosed fires.
3912+
let evs = nodes[1].node.get_and_clear_pending_events();
3913+
let expected = if hold_post_reload_mon_update { 1 } else { 2 };
3914+
assert_eq!(evs.len(), expected, "{:?}", evs);
3915+
assert!(evs.iter().any(|e| matches!(
3916+
e,
3917+
Event::ChannelClosed { reason: ClosureReason::CommitmentTxConfirmed, .. }
3918+
)));
3919+
if !hold_post_reload_mon_update {
3920+
assert!(evs.iter().any(|e| matches!(e, Event::PaymentForwarded { .. })));
3921+
check_added_monitors(&nodes[1], mons_added);
3922+
}
39043923
}
39053924
nodes[1].node.timer_tick_occurred();
3906-
check_added_monitors(&nodes[1], mons_added);
3925+
// For !close_chans_before_reload && !hold, background events were already replayed
3926+
// during get_and_clear_pending_events above, so timer_tick adds no monitors.
3927+
let expected_mons =
3928+
if !close_chans_before_reload && !hold_post_reload_mon_update { 0 } else { mons_added };
3929+
check_added_monitors(&nodes[1], expected_mons);
39073930

39083931
// Finally, check that B created a payment preimage transaction and close out the payment.
39093932
let bs_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0);
@@ -3918,44 +3941,61 @@ fn do_test_durable_preimages_on_closed_channel(
39183941
check_closed_broadcast(&nodes[0], 1, false);
39193942
expect_payment_sent(&nodes[0], payment_preimage, None, true, true);
39203943

3944+
if close_chans_before_reload && !hold_post_reload_mon_update {
3945+
// For close_chans_before_reload with hold=false, the deferred completions
3946+
// haven't been processed yet. Trigger process_pending_monitor_events now.
3947+
let _ = nodes[1].node.get_and_clear_pending_msg_events();
3948+
check_added_monitors(&nodes[1], 0);
3949+
}
3950+
39213951
if !close_chans_before_reload || close_only_a {
39223952
// Make sure the B<->C channel is still alive and well by sending a payment over it.
39233953
let mut reconnect_args = ReconnectArgs::new(&nodes[1], &nodes[2]);
39243954
reconnect_args.pending_responding_commitment_signed.1 = true;
3925-
// The B<->C `ChannelMonitorUpdate` shouldn't be allowed to complete, which is the
3926-
// equivalent to the responding `commitment_signed` being a duplicate for node B, thus we
3927-
// need to set the `pending_responding_commitment_signed_dup` flag.
3928-
reconnect_args.pending_responding_commitment_signed_dup_monitor.1 = true;
3955+
if hold_post_reload_mon_update {
3956+
// When the A-B update is still InProgress, B-C monitor updates are blocked,
3957+
// so the responding commitment_signed is a duplicate that generates no update.
3958+
reconnect_args.pending_responding_commitment_signed_dup_monitor.1 = true;
3959+
}
39293960
reconnect_args.pending_raa.1 = true;
39303961

39313962
reconnect_nodes(reconnect_args);
39323963
}
39333964

3934-
// Once the blocked `ChannelMonitorUpdate` *finally* completes, the pending
3935-
// `PaymentForwarded` event will finally be released.
3936-
let (_, ab_update_id) = nodes[1].chain_monitor.get_latest_mon_update_id(chan_id_ab);
3937-
nodes[1].chain_monitor.chain_monitor.force_channel_monitor_updated(chan_id_ab, ab_update_id);
3965+
if hold_post_reload_mon_update {
3966+
// When the persister returned InProgress, we need to manually complete the
3967+
// A-B monitor update to unblock the PaymentForwarded completion action.
3968+
let (_, ab_update_id) = nodes[1].chain_monitor.get_latest_mon_update_id(chan_id_ab);
3969+
nodes[1]
3970+
.chain_monitor
3971+
.chain_monitor
3972+
.force_channel_monitor_updated(chan_id_ab, ab_update_id);
3973+
}
39383974

39393975
// If the A<->B channel was closed before we reload, we'll replay the claim against it on
39403976
// reload, causing the `PaymentForwarded` event to get replayed.
39413977
let evs = nodes[1].node.get_and_clear_pending_events();
3942-
assert_eq!(evs.len(), if close_chans_before_reload { 2 } else { 1 });
3943-
for ev in evs {
3944-
if let Event::PaymentForwarded { claim_from_onchain_tx, next_htlcs, .. } = ev {
3945-
if !claim_from_onchain_tx {
3946-
// If the outbound channel is still open, the `next_user_channel_id` should be available.
3947-
// This was previously broken.
3948-
assert!(next_htlcs[0].user_channel_id.is_some())
3978+
if !close_chans_before_reload && !hold_post_reload_mon_update {
3979+
// PaymentForwarded already fired during get_and_clear_pending_events above.
3980+
assert!(evs.is_empty(), "{:?}", evs);
3981+
} else {
3982+
assert_eq!(evs.len(), if close_chans_before_reload { 2 } else { 1 }, "{:?}", evs);
3983+
for ev in evs {
3984+
if let Event::PaymentForwarded { claim_from_onchain_tx, next_htlcs, .. } = ev {
3985+
if !claim_from_onchain_tx {
3986+
assert!(next_htlcs[0].user_channel_id.is_some())
3987+
}
3988+
} else {
3989+
panic!("Unexpected event: {:?}", ev);
39493990
}
3950-
} else {
3951-
panic!();
39523991
}
39533992
}
39543993

39553994
if !close_chans_before_reload || close_only_a {
3956-
// Once we call `process_pending_events` the final `ChannelMonitor` for the B<->C channel
3957-
// will fly, removing the payment preimage from it.
3958-
check_added_monitors(&nodes[1], 1);
3995+
if hold_post_reload_mon_update {
3996+
// The B-C monitor update from the completion action fires now.
3997+
check_added_monitors(&nodes[1], 1);
3998+
}
39593999
assert!(nodes[1].node.get_and_clear_pending_events().is_empty());
39604000
send_payment(&nodes[1], &[&nodes[2]], 100_000);
39614001
}
@@ -5415,3 +5455,98 @@ fn test_late_counterparty_commitment_update_after_holder_commitment_spend() {
54155455
fn test_late_counterparty_commitment_update_after_holder_commitment_spend_dust() {
54165456
do_test_late_counterparty_commitment_update_after_holder_commitment_spend(true);
54175457
}
5458+
5459+
#[test]
5460+
fn test_monitor_update_after_funding_spend() {
5461+
// Test that monitor updates still work after a funding spend is detected by the
5462+
// ChainMonitor but before ChannelManager has processed the corresponding block.
5463+
//
5464+
// When the counterparty commitment transaction confirms (funding spend), the
5465+
// ChannelMonitor sets funding_spend_seen and no_further_updates_allowed() returns
5466+
// true. ChainMonitor overrides all subsequent update_channel results to InProgress
5467+
// to freeze the channel. These overridden updates complete via deferred completions
5468+
// in release_pending_monitor_events, so that MonitorUpdateCompletionActions (like
5469+
// PaymentClaimed) can still fire.
5470+
let chanmon_cfgs = create_chanmon_cfgs(2);
5471+
let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
5472+
let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
5473+
let nodes = create_network(2, &node_cfgs, &node_chanmgrs);
5474+
5475+
let node_a_id = nodes[0].node.get_our_node_id();
5476+
5477+
let (_, _, chan_id, _) = create_announced_chan_between_nodes(&nodes, 0, 1);
5478+
5479+
// Route payment 1 fully so B can claim it later.
5480+
let (payment_preimage_1, payment_hash_1, ..) =
5481+
route_payment(&nodes[0], &[&nodes[1]], 1_000_000);
5482+
5483+
// Get A's commitment tx (this is the "counterparty" commitment from B's perspective).
5484+
let as_commitment_tx = get_local_commitment_txn!(nodes[0], chan_id);
5485+
assert_eq!(as_commitment_tx.len(), 1);
5486+
5487+
// Confirm A's commitment tx on B's chain_monitor ONLY (not on B's ChannelManager).
5488+
// This sets funding_spend_seen in the monitor, making no_further_updates_allowed() true.
5489+
// We also update the best block on the chain_monitor so the broadcaster height is
5490+
// consistent when claiming HTLCs.
5491+
let (block_hash, height) = nodes[1].best_block_info();
5492+
let block = create_dummy_block(block_hash, height + 1, vec![as_commitment_tx[0].clone()]);
5493+
let txdata: Vec<_> = block.txdata.iter().enumerate().collect();
5494+
nodes[1].chain_monitor.chain_monitor.transactions_confirmed(&block.header, &txdata, height + 1);
5495+
nodes[1].chain_monitor.chain_monitor.best_block_updated(&block.header, height + 1);
5496+
nodes[1].blocks.lock().unwrap().push((block, height + 1));
5497+
5498+
// Send payment 2 from A to B.
5499+
let (route, payment_hash_2, _, payment_secret_2) =
5500+
get_route_and_payment_hash!(&nodes[0], nodes[1], 1_000_000);
5501+
nodes[0]
5502+
.node
5503+
.send_payment_with_route(
5504+
route,
5505+
payment_hash_2,
5506+
RecipientOnionFields::secret_only(payment_secret_2, 1_000_000),
5507+
PaymentId(payment_hash_2.0),
5508+
)
5509+
.unwrap();
5510+
check_added_monitors(&nodes[0], 1);
5511+
5512+
let mut events = nodes[0].node.get_and_clear_pending_msg_events();
5513+
assert_eq!(events.len(), 1);
5514+
let payment_event = SendEvent::from_event(events.remove(0));
5515+
5516+
nodes[1].node.handle_update_add_htlc(node_a_id, &payment_event.msgs[0]);
5517+
5518+
// B processes commitment_signed. The monitor applies the update but returns Err
5519+
// because no_further_updates_allowed() is true. ChainMonitor overrides to InProgress,
5520+
// freezing the channel.
5521+
nodes[1].node.handle_commitment_signed(node_a_id, &payment_event.commitment_msg[0]);
5522+
check_added_monitors(&nodes[1], 1);
5523+
5524+
// B claims payment 1. The preimage monitor update also returns InProgress (deferred),
5525+
// so no Completed-while-InProgress assertion fires.
5526+
nodes[1].node.claim_funds(payment_preimage_1);
5527+
check_added_monitors(&nodes[1], 1);
5528+
5529+
// First event cycle: the force-close MonitorEvent (CommitmentTxConfirmed) fires first,
5530+
// then the deferred completions resolve. The force-close generates a ChannelForceClosed
5531+
// update (also deferred), which blocks completion actions. So we only get ChannelClosed.
5532+
let events = nodes[1].node.get_and_clear_pending_events();
5533+
assert_eq!(events.len(), 1);
5534+
match &events[0] {
5535+
Event::ChannelClosed { reason: ClosureReason::CommitmentTxConfirmed, .. } => {},
5536+
_ => panic!("Unexpected event: {:?}", events[0]),
5537+
}
5538+
check_added_monitors(&nodes[1], 1);
5539+
nodes[1].node.get_and_clear_pending_msg_events();
5540+
5541+
// Second event cycle: the ChannelForceClosed deferred completion resolves, unblocking
5542+
// the PaymentClaimed completion action.
5543+
let events = nodes[1].node.get_and_clear_pending_events();
5544+
assert_eq!(events.len(), 1);
5545+
match &events[0] {
5546+
Event::PaymentClaimed { payment_hash, amount_msat, .. } => {
5547+
assert_eq!(payment_hash_1, *payment_hash);
5548+
assert_eq!(1_000_000, *amount_msat);
5549+
},
5550+
_ => panic!("Unexpected event: {:?}", events[0]),
5551+
}
5552+
}

0 commit comments

Comments
 (0)