Skip to content

Commit 555e614

Browse files
Persistent monitor events for HTLC forward fails
We are in the process of moving the resolution of all HTLCs from the Channel to the ChannelMonitor, via persistent MonitorEvents. This will simplify how we reconstruct/reconcile the ChannelManager's pending HTLC set on startup and avoid needing to persist pending HTLCs explicitly in the manager, as we can just replay pending monitor events to reconstruct the set instead. In recent commits, we started generating persistent monitor events whenever an HTLC is failed off-chain. We also made those monitor events the sole driver of outbound payment failures, failing those HTLCs when the monitor event is processed. This replaced the previous behavior of failing when we initially receive update_fail_htlc from our peer. In this commit, we continue that work by making persistent monitor events the sole drivers of *forwarded* payment failures. When persistent_monitor_events is enabled, we will skip existing failure paths for forwarded HTLCs, and instead (a) fail them when the monitor event is processed, and (b) similar to what we do for claims, the monitor event will not be acked and will continue to be re-provided on startup until the HTLC is durably removed on the inbound edge.
1 parent 57aa4d6 commit 555e614

6 files changed

Lines changed: 129 additions & 75 deletions

File tree

lightning/src/ln/channel.rs

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1289,7 +1289,7 @@ pub(crate) struct ShutdownResult {
12891289
/// A list of dropped outbound HTLCs that can safely be failed backwards immediately.
12901290
pub(crate) dropped_outbound_htlcs: Vec<(HTLCSource, PaymentHash, PublicKey, ChannelId)>,
12911291
/// A list of inbound HTLC ids whose forwarded monitor-event ack condition is complete because
1292-
/// the HTLC was already claimed when the channel closed.
1292+
/// the HTLC was already locally removed when the channel closed.
12931293
pub(crate) htlcs_to_ack: Vec<u64>,
12941294
/// An unbroadcasted batch funding transaction id. The closure of this channel should be
12951295
/// propagated to the remainder of the batch.
@@ -6441,8 +6441,9 @@ impl<SP: SignerProvider> ChannelContext<SP> {
64416441
self.channel_id,
64426442
));
64436443
},
6444-
HTLCUpdateAwaitingACK::ClaimHTLC { htlc_id, .. } => htlcs_to_ack.push(htlc_id),
6445-
_ => {},
6444+
HTLCUpdateAwaitingACK::ClaimHTLC { htlc_id, .. }
6445+
| HTLCUpdateAwaitingACK::FailHTLC { htlc_id, .. }
6446+
| HTLCUpdateAwaitingACK::FailMalformedHTLC { htlc_id, .. } => htlcs_to_ack.push(htlc_id),
64466447
}
64476448
}
64486449

@@ -6491,9 +6492,7 @@ impl<SP: SignerProvider> ChannelContext<SP> {
64916492

64926493
// See `ShutdownResult::htlcs_to_ack`.
64936494
for htlc in self.pending_inbound_htlcs.iter() {
6494-
if let InboundHTLCState::LocalRemoved(InboundHTLCRemovalReason::Fulfill { .. }) =
6495-
&htlc.state
6496-
{
6495+
if let InboundHTLCState::LocalRemoved(_) = &htlc.state {
64976496
htlcs_to_ack.push(htlc.htlc_id);
64986497
}
64996498
}
@@ -9255,8 +9254,8 @@ where
92559254
log_trace!(logger, " ...removing inbound LocalRemoved {}", &htlc.payment_hash);
92569255
if let &InboundHTLCRemovalReason::Fulfill { .. } = reason {
92579256
value_to_self_msat_diff += htlc.amount_msat as i64;
9258-
htlc_ids_to_ack.push(htlc.htlc_id);
92599257
}
9258+
htlc_ids_to_ack.push(htlc.htlc_id);
92609259
*expecting_peer_commitment_signed = true;
92619260
false
92629261
} else {

lightning/src/ln/channelmanager.rs

Lines changed: 81 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1633,9 +1633,9 @@ pub(crate) enum MonitorUpdateCompletionAction {
16331633
/// via [`ChannelManager::handle_monitor_event_htlc_ack`]. Only generated when
16341634
/// [`ChannelManager::persistent_monitor_events`] is enabled.
16351635
///
1636-
/// If the inbound edge is open, this action fires when an inbound edge RAA update removing an
1637-
/// HTLC completes. If closed, it fires when the inbound edge has durably persisted the preimage
1638-
/// monitor update.
1636+
/// If the inbound edge is open, this action fires when an inbound edge RAA update removing the
1637+
/// HTLC completes. If the inbound edge is closed and we're claiming, we'll also use this variant
1638+
/// to block acking the monitor event until the inbound edge has durably persisted the preimage.
16391639
///
16401640
/// For claims, we could theoretically ack the outbound edge event once the preimage is durably
16411641
/// persisted in the inbound edge monitor, but if we stop persisting the holding cell in the
@@ -8267,11 +8267,20 @@ impl<
82678267
continue;
82688268
}
82698269
},
8270-
HTLCForwardInfo::FailHTLC { .. } | HTLCForwardInfo::FailMalformedHTLC { .. } => {
8271-
// Channel went away before we could fail it. This implies
8272-
// the channel is now on chain and our counterparty is
8273-
// trying to broadcast the HTLC-Timeout, but that's their
8274-
// problem, not ours.
8270+
HTLCForwardInfo::FailHTLC { htlc_id, upstream_channel_id, .. }
8271+
| HTLCForwardInfo::FailMalformedHTLC { htlc_id, upstream_channel_id, .. } => {
8272+
// Channel went away before we could fail it. This implies the channel is now
8273+
// on chain and our counterparty is trying to broadcast the HTLC-Timeout, but
8274+
// that's their problem, not ours.
8275+
8276+
// If `persistent_monitor_events` is enabled, there may be a monitor event generated by
8277+
// the outbound edge resolving this HTLC. That needs to be acked now or it will dangle
8278+
// forever.
8279+
if let Some(upstream_channel_id) = upstream_channel_id {
8280+
if self.persistent_monitor_events {
8281+
self.handle_monitor_event_htlc_ack(htlc_id, upstream_channel_id);
8282+
}
8283+
}
82758284
},
82768285
}
82778286
}
@@ -8547,6 +8556,11 @@ impl<
85478556
} else {
85488557
panic!("Stated return value requirements in queue_fail_{{malformed_}}htlc() were not met");
85498558
}
8559+
if self.persistent_monitor_events && htlc_not_found {
8560+
// The HTLC was already fully removed from the channel, so no future RAA
8561+
// will fire the usual ack path. Ack the outbound monitor event directly.
8562+
self.handle_monitor_event_htlc_ack(htlc_id, forward_chan_id);
8563+
}
85508564
// fail-backs are best-effort, we probably already have one
85518565
// pending, and if not that's OK, if not, the channel is on
85528566
// the chain and sending the HTLC-Timeout is their problem.
@@ -10791,9 +10805,7 @@ This indicates a bug inside LDK. Please report this error at https://github.com/
1079110805
self.forward_htlcs(htlc_forwards);
1079210806
self.finalize_claims(finalized_claimed_htlcs);
1079310807
for failure in failed_htlcs {
10794-
if self.persistent_monitor_events
10795-
&& matches!(failure.0, HTLCSource::OutboundRoute { .. })
10796-
{
10808+
if self.persistent_monitor_events {
1079710809
// The MonitorEvent::HTLCEvent generated when the previous counterparty commitment
1079810810
// is pruned will drive the failure instead.
1079910811
continue;
@@ -14199,20 +14211,20 @@ This indicates a bug inside LDK. Please report this error at https://github.com/
1419914211
Some(channel_id),
1420014212
Some(htlc_update.payment_hash),
1420114213
);
14214+
if self.persistent_monitor_events {
14215+
self.register_pending_forward_monitor_event_acks(
14216+
htlc_update.payment_hash,
14217+
htlc_update.source.previous_hop_data(),
14218+
monitor_event_source,
14219+
);
14220+
}
1420214221
match htlc_update.resolution {
1420314222
OutboundHTLCResolution::Claimed { preimage, skimmed_fee_msat } => {
1420414223
log_trace!(
1420514224
logger,
1420614225
"Claiming HTLC with preimage {} from our monitor",
1420714226
preimage
1420814227
);
14209-
if self.persistent_monitor_events {
14210-
self.register_pending_forward_monitor_event_acks(
14211-
htlc_update.payment_hash,
14212-
htlc_update.source.previous_hop_data(),
14213-
monitor_event_source,
14214-
);
14215-
}
1421614228
// Claim the funds from the previous hop, if there is one. In the future we can
1421714229
// store attribution data in the `ChannelMonitor` and provide it here.
1421814230
self.claim_funds_internal(
@@ -14235,35 +14247,41 @@ This indicates a bug inside LDK. Please report this error at https://github.com/
1423514247
self.channel_is_closed(&channel_id, &counterparty_node_id);
1423614248
let we_are_sender =
1423714249
matches!(htlc_update.source, HTLCSource::OutboundRoute { .. });
14238-
if from_onchain | we_are_sender {
14239-
log_trace!(logger, "Failing HTLC from our monitor");
14240-
let failure_type = htlc_update
14241-
.source
14242-
.failure_type(counterparty_node_id, channel_id);
14243-
14244-
let completion_update =
14245-
if self.persistent_monitor_events && we_are_sender {
14246-
EventCompletionAction::AckMonitorEvent {
14247-
event_id: monitor_event_source,
14248-
}
14249-
} else {
14250-
EventCompletionAction::ReleasePaymentCompleteChannelMonitorUpdate(PaymentCompleteUpdate {
14251-
counterparty_node_id,
14252-
channel_funding_outpoint: funding_outpoint,
14253-
channel_id,
14254-
htlc_id: SentHTLCId::from_source(&htlc_update.source),
14255-
})
14256-
};
14257-
14258-
self.fail_htlc_backwards_internal(
14259-
&htlc_update.source,
14260-
&htlc_update.payment_hash,
14261-
&reason,
14262-
failure_type,
14263-
Some(completion_update),
14264-
);
14265-
}
14266-
if !we_are_sender {
14250+
log_trace!(logger, "Failing HTLC from our monitor");
14251+
let failure_type = htlc_update
14252+
.source
14253+
.failure_type(counterparty_node_id, channel_id);
14254+
14255+
let completion_update = if self.persistent_monitor_events {
14256+
if we_are_sender {
14257+
// Ack the monitor event once PaymentFailed is processed by the user
14258+
Some(EventCompletionAction::AckMonitorEvent {
14259+
event_id: monitor_event_source,
14260+
})
14261+
} else {
14262+
// We'll ack the monitor event when it's removed from the inbound edge on RAA
14263+
None
14264+
}
14265+
} else {
14266+
if from_onchain {
14267+
Some(EventCompletionAction::ReleasePaymentCompleteChannelMonitorUpdate(PaymentCompleteUpdate {
14268+
counterparty_node_id,
14269+
channel_funding_outpoint: funding_outpoint,
14270+
channel_id,
14271+
htlc_id: SentHTLCId::from_source(&htlc_update.source),
14272+
}))
14273+
} else {
14274+
None
14275+
}
14276+
};
14277+
self.fail_htlc_backwards_internal(
14278+
&htlc_update.source,
14279+
&htlc_update.payment_hash,
14280+
&reason,
14281+
failure_type,
14282+
completion_update,
14283+
);
14284+
if !self.persistent_monitor_events {
1426714285
self.chain_monitor.ack_monitor_event(monitor_event_source);
1426814286
}
1426914287
},
@@ -21327,19 +21345,22 @@ impl<
2132721345
ev_action,
2132821346
);
2132921347
}
21330-
for ((_, hash), htlcs) in already_forwarded_htlcs.into_iter() {
21331-
for (htlc, _) in htlcs {
21332-
let channel_id = htlc.channel_id;
21333-
let node_id = htlc.counterparty_node_id;
21334-
let source = HTLCSource::PreviousHopData(htlc);
21335-
let failure_reason = LocalHTLCFailureReason::TemporaryChannelFailure;
21336-
let failure_data = channel_manager.get_htlc_inbound_temp_fail_data(failure_reason);
21337-
let reason = HTLCFailReason::reason(failure_reason, failure_data);
21338-
let receiver = HTLCHandlingFailureType::Forward { node_id, channel_id };
21339-
// The event completion action is only relevant for HTLCs that originate from our node, not
21340-
// forwarded HTLCs.
21341-
channel_manager
21342-
.fail_htlc_backwards_internal(&source, &hash, &reason, receiver, None);
21348+
if !channel_manager.persistent_monitor_events {
21349+
for ((_, hash), htlcs) in already_forwarded_htlcs.into_iter() {
21350+
for (htlc, _) in htlcs {
21351+
let channel_id = htlc.channel_id;
21352+
let node_id = htlc.counterparty_node_id;
21353+
let source = HTLCSource::PreviousHopData(htlc);
21354+
let failure_reason = LocalHTLCFailureReason::TemporaryChannelFailure;
21355+
let failure_data =
21356+
channel_manager.get_htlc_inbound_temp_fail_data(failure_reason);
21357+
let reason = HTLCFailReason::reason(failure_reason, failure_data);
21358+
let receiver = HTLCHandlingFailureType::Forward { node_id, channel_id };
21359+
// The event completion action is only relevant for HTLCs that originate from our node, not
21360+
// forwarded HTLCs.
21361+
channel_manager
21362+
.fail_htlc_backwards_internal(&source, &hash, &reason, receiver, None);
21363+
}
2134321364
}
2134421365
}
2134521366

lightning/src/ln/functional_test_utils.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -806,9 +806,9 @@ impl<'a, 'b, 'c> Drop for Node<'a, 'b, 'c> {
806806
let unacked_monitor_events = monitor.drain_unacked_monitor_events();
807807
if !unacked_monitor_events.is_empty() {
808808
panic!(
809-
"Node {} channel {channel_id:?} had {} unacked monitor events at drop: {unacked_monitor_events:#?}",
810-
unacked_monitor_events.len(),
811-
self.logger.id
809+
"{} had {} unacked monitor events on channel {channel_id:?} at drop: {unacked_monitor_events:#?}",
810+
self.logger.id,
811+
unacked_monitor_events.len()
812812
);
813813
}
814814
}

lightning/src/ln/functional_tests.rs

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1821,6 +1821,13 @@ fn do_test_htlc_on_chain_timeout(connect_style: ConnectStyle) {
18211821
let commitment_tx = get_local_commitment_txn!(nodes[1], chan_1.2);
18221822
check_spends!(commitment_tx[0], chan_1.3);
18231823

1824+
// Close the channel from each node's perspective, by mining the commitment on each of their
1825+
// chains
1826+
mine_transaction(&nodes[1], &commitment_tx[0]);
1827+
check_closed_broadcast(&nodes[1], 1, true);
1828+
check_added_monitors(&nodes[1], 1);
1829+
check_closed_event(&nodes[1], 1, ClosureReason::CommitmentTxConfirmed, &[node_a_id], 100000);
1830+
18241831
mine_transaction(&nodes[0], &commitment_tx[0]);
18251832
connect_blocks(&nodes[0], TEST_FINAL_CLTV + MIN_CLTV_EXPIRY_DELTA as u32); // Confirm blocks until the HTLC expires
18261833

@@ -5681,7 +5688,7 @@ pub fn test_update_fulfill_htlc_bolt2_after_malformed_htlc_message_must_forward_
56815688
assert_eq!(events_4.len(), 1);
56825689

56835690
//Confirm that handlinge the update_malformed_htlc message produces an update_fail_htlc message to be forwarded back along the route
5684-
match events_4[0] {
5691+
let ba_fail = match events_4[0] {
56855692
MessageSendEvent::UpdateHTLCs {
56865693
updates:
56875694
msgs::CommitmentUpdate {
@@ -5690,7 +5697,7 @@ pub fn test_update_fulfill_htlc_bolt2_after_malformed_htlc_message_must_forward_
56905697
ref update_fail_htlcs,
56915698
ref update_fail_malformed_htlcs,
56925699
ref update_fee,
5693-
..
5700+
ref commitment_signed,
56945701
},
56955702
..
56965703
} => {
@@ -5699,11 +5706,18 @@ pub fn test_update_fulfill_htlc_bolt2_after_malformed_htlc_message_must_forward_
56995706
assert_eq!(update_fail_htlcs.len(), 1);
57005707
assert!(update_fail_malformed_htlcs.is_empty());
57015708
assert!(update_fee.is_none());
5709+
(update_fail_htlcs[0].clone(), commitment_signed.clone())
57025710
},
57035711
_ => panic!("Unexpected event"),
57045712
};
57055713

57065714
check_added_monitors(&nodes[1], 1);
5715+
5716+
// Push the fail back to A so the A-B inbound RAA completes and acks the outbound-edge
5717+
// monitor event on B-C.
5718+
nodes[0].node.handle_update_fail_htlc(node_b_id, &ba_fail.0);
5719+
do_commitment_signed_dance(&nodes[0], &nodes[1], &ba_fail.1, false, true);
5720+
expect_payment_failed!(nodes[0], our_payment_hash, false);
57075721
}
57085722

57095723
#[xtest(feature = "_externalize_tests")]

lightning/src/ln/payment_tests.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5090,6 +5090,19 @@ fn do_test_payment_metadata_consistency(do_reload: bool, do_modify: bool) {
50905090
let fail_type =
50915091
HTLCHandlingFailureType::Forward { node_id: Some(node_d_id), channel_id: cd_chan_id };
50925092
expect_htlc_failure_conditions(events, &[fail_type]);
5093+
5094+
expect_and_process_pending_htlcs(&nodes[2], false);
5095+
check_added_monitors(&nodes[2], 1);
5096+
let cs_fail_a = get_htlc_update_msgs(&nodes[2], &node_a_id);
5097+
nodes[0].node.handle_update_fail_htlc(node_c_id, &cs_fail_a.update_fail_htlcs[0]);
5098+
do_commitment_signed_dance(&nodes[0], &nodes[2], &cs_fail_a.commitment_signed, false, true);
5099+
// The other MPP part is still in flight at D, so only PaymentPathFailed fires here.
5100+
expect_payment_failed_conditions(
5101+
&nodes[0],
5102+
payment_hash,
5103+
true,
5104+
PaymentFailedConditions::new().mpp_parts_remain(),
5105+
);
50935106
} else {
50945107
expect_and_process_pending_htlcs(&nodes[3], false);
50955108
expect_payment_claimable!(nodes[3], payment_hash, payment_secret, amt_msat);

lightning/src/ln/reload_tests.rs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1544,7 +1544,12 @@ fn removed_payment_no_manager_persistence() {
15441544
_ => panic!("Unexpected event"),
15451545
}
15461546

1547-
expect_payment_failed!(nodes[0], payment_hash, false);
1547+
// Whether nodes[0] sees a permanent failure depends on nodes[1]'s mode: with persistent
1548+
// monitor events nodes[1] re-provides the recipient's `IncorrectPaymentDetails` rejection via
1549+
// a monitor event; otherwise it proactively fails the forwarded HTLC back with
1550+
// `ChannelClosed` (non-permanent).
1551+
let payment_failed_permanently = nodes[1].node.test_persistent_monitor_events_enabled();
1552+
expect_payment_failed!(nodes[0], payment_hash, payment_failed_permanently);
15481553
}
15491554

15501555
#[test]
@@ -2455,8 +2460,10 @@ fn test_reload_node_without_preimage_fails_htlc() {
24552460
reconnect_args.pending_cell_htlc_fails = (0, 1);
24562461
reconnect_nodes(reconnect_args);
24572462

2458-
// nodes[0] should now have received the failure and generate PaymentFailed.
2459-
expect_payment_failed_conditions(&nodes[0], payment_hash, false, PaymentFailedConditions::new());
2463+
// nodes[0] should now have received the failure and generate PaymentFailed. If persistent
2464+
// monitor events are enabled, nodes[1] will remember that the recipient rejected the payment.
2465+
let payment_failed_permanently = nodes[1].node.test_persistent_monitor_events_enabled();
2466+
expect_payment_failed_conditions(&nodes[0], payment_hash, payment_failed_permanently, PaymentFailedConditions::new());
24602467
}
24612468

24622469
#[test]

0 commit comments

Comments
 (0)