Skip to content

Commit e1021bc

Browse files
Persistent monitor events for HTLC forward fails
We are in the process of moving the resolution of all HTLCs from the Channel to the ChannelMonitor, via persistent MonitorEvents. This will simplify how we reconstruct/reconcile the ChannelManager's pending HTLC set on startup and avoid needing to persist pending HTLCs explicitly in the manager, as we can just replay pending monitor events to reconstruct the set instead. In recent commits, we started generating persistent monitor events whenever an HTLC is failed off-chain. We also made those monitor events the sole driver of outbound payment failures, failing those HTLCs when the monitor event is processed. This replaced the previous behavior of failing when we initially receive update_fail_htlc from our peer. In this commit, we continue that work by making persistent monitor events the sole drivers of *forwarded* payment failures. When persistent_monitor_events is enabled, we will skip existing failure paths for forwarded HTLCs, and instead (a) fail them when the monitor event is processed, and (b) similar to what we do for claims, the monitor event will not be acked and will continue to be re-provided on startup until the HTLC is durably removed on the inbound edge.
1 parent 0344807 commit e1021bc

6 files changed

Lines changed: 129 additions & 75 deletions

File tree

lightning/src/ln/channel.rs

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1293,7 +1293,7 @@ pub(crate) struct ShutdownResult {
12931293
/// A list of dropped outbound HTLCs that can safely be failed backwards immediately.
12941294
pub(crate) dropped_outbound_htlcs: Vec<(HTLCSource, PaymentHash, PublicKey, ChannelId)>,
12951295
/// A list of inbound HTLC ids whose forwarded monitor-event ack condition is complete because
1296-
/// the HTLC was already claimed when the channel closed.
1296+
/// the HTLC was already locally removed when the channel closed.
12971297
pub(crate) htlcs_to_ack: Vec<u64>,
12981298
/// An unbroadcasted batch funding transaction id. The closure of this channel should be
12991299
/// propagated to the remainder of the batch.
@@ -6451,8 +6451,9 @@ impl<SP: SignerProvider> ChannelContext<SP> {
64516451
self.channel_id,
64526452
));
64536453
},
6454-
HTLCUpdateAwaitingACK::ClaimHTLC { htlc_id, .. } => htlcs_to_ack.push(htlc_id),
6455-
_ => {},
6454+
HTLCUpdateAwaitingACK::ClaimHTLC { htlc_id, .. }
6455+
| HTLCUpdateAwaitingACK::FailHTLC { htlc_id, .. }
6456+
| HTLCUpdateAwaitingACK::FailMalformedHTLC { htlc_id, .. } => htlcs_to_ack.push(htlc_id),
64566457
}
64576458
}
64586459

@@ -6501,9 +6502,7 @@ impl<SP: SignerProvider> ChannelContext<SP> {
65016502

65026503
// See `ShutdownResult::htlcs_to_ack`.
65036504
for htlc in self.pending_inbound_htlcs.iter() {
6504-
if let InboundHTLCState::LocalRemoved(InboundHTLCRemovalReason::Fulfill { .. }) =
6505-
&htlc.state
6506-
{
6505+
if let InboundHTLCState::LocalRemoved(_) = &htlc.state {
65076506
htlcs_to_ack.push(htlc.htlc_id);
65086507
}
65096508
}
@@ -9265,8 +9264,8 @@ where
92659264
log_trace!(logger, " ...removing inbound LocalRemoved {}", &htlc.payment_hash);
92669265
if let &InboundHTLCRemovalReason::Fulfill { .. } = reason {
92679266
value_to_self_msat_diff += htlc.amount_msat as i64;
9268-
htlc_ids_to_ack.push(htlc.htlc_id);
92699267
}
9268+
htlc_ids_to_ack.push(htlc.htlc_id);
92709269
*expecting_peer_commitment_signed = true;
92719270
false
92729271
} else {

lightning/src/ln/channelmanager.rs

Lines changed: 81 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1633,9 +1633,9 @@ pub(crate) enum MonitorUpdateCompletionAction {
16331633
/// via [`ChannelManager::handle_monitor_event_htlc_ack`]. Only generated when
16341634
/// [`ChannelManager::persistent_monitor_events`] is enabled.
16351635
///
1636-
/// If the inbound edge is open, this action fires when an inbound edge RAA update removing an
1637-
/// HTLC completes. If closed, it fires when the inbound edge has durably persisted the preimage
1638-
/// monitor update.
1636+
/// If the inbound edge is open, this action fires when an inbound edge RAA update removing the
1637+
/// HTLC completes. If the inbound edge is closed and we're claiming, we'll also use this variant
1638+
/// to block acking the monitor event until the inbound edge has durably persisted the preimage.
16391639
///
16401640
/// For claims, we could theoretically ack the outbound edge event once the preimage is durably
16411641
/// persisted in the inbound edge monitor, but if we stop persisting the holding cell in the
@@ -8272,11 +8272,20 @@ impl<
82728272
continue;
82738273
}
82748274
},
8275-
HTLCForwardInfo::FailHTLC { .. } | HTLCForwardInfo::FailMalformedHTLC { .. } => {
8276-
// Channel went away before we could fail it. This implies
8277-
// the channel is now on chain and our counterparty is
8278-
// trying to broadcast the HTLC-Timeout, but that's their
8279-
// problem, not ours.
8275+
HTLCForwardInfo::FailHTLC { htlc_id, upstream_channel_id, .. }
8276+
| HTLCForwardInfo::FailMalformedHTLC { htlc_id, upstream_channel_id, .. } => {
8277+
// Channel went away before we could fail it. This implies the channel is now
8278+
// on chain and our counterparty is trying to broadcast the HTLC-Timeout, but
8279+
// that's their problem, not ours.
8280+
8281+
// If `persistent_monitor_events` is enabled, there may be a monitor event generated by
8282+
// the outbound edge resolving this HTLC. That needs to be acked now or it will dangle
8283+
// forever.
8284+
if let Some(upstream_channel_id) = upstream_channel_id {
8285+
if self.persistent_monitor_events {
8286+
self.handle_monitor_event_htlc_ack(htlc_id, upstream_channel_id);
8287+
}
8288+
}
82808289
},
82818290
}
82828291
}
@@ -8552,6 +8561,11 @@ impl<
85528561
} else {
85538562
panic!("Stated return value requirements in queue_fail_{{malformed_}}htlc() were not met");
85548563
}
8564+
if self.persistent_monitor_events && htlc_not_found {
8565+
// The HTLC was already fully removed from the channel, so no future RAA
8566+
// will fire the usual ack path. Ack the outbound monitor event directly.
8567+
self.handle_monitor_event_htlc_ack(htlc_id, forward_chan_id);
8568+
}
85558569
// fail-backs are best-effort, we probably already have one
85568570
// pending, and if not that's OK, if not, the channel is on
85578571
// the chain and sending the HTLC-Timeout is their problem.
@@ -10796,9 +10810,7 @@ This indicates a bug inside LDK. Please report this error at https://github.com/
1079610810
self.forward_htlcs(htlc_forwards);
1079710811
self.finalize_claims(finalized_claimed_htlcs);
1079810812
for failure in failed_htlcs {
10799-
if self.persistent_monitor_events
10800-
&& matches!(failure.0, HTLCSource::OutboundRoute { .. })
10801-
{
10813+
if self.persistent_monitor_events {
1080210814
// The MonitorEvent::HTLCEvent generated when the previous counterparty commitment
1080310815
// is pruned will drive the failure instead.
1080410816
continue;
@@ -14298,20 +14310,20 @@ This indicates a bug inside LDK. Please report this error at https://github.com/
1429814310
Some(channel_id),
1429914311
Some(htlc_update.payment_hash),
1430014312
);
14313+
if self.persistent_monitor_events {
14314+
self.register_pending_forward_monitor_event_acks(
14315+
htlc_update.payment_hash,
14316+
htlc_update.source.previous_hop_data(),
14317+
monitor_event_source,
14318+
);
14319+
}
1430114320
match htlc_update.resolution {
1430214321
OutboundHTLCResolution::Claimed { preimage, skimmed_fee_msat } => {
1430314322
log_trace!(
1430414323
logger,
1430514324
"Claiming HTLC with preimage {} from our monitor",
1430614325
preimage
1430714326
);
14308-
if self.persistent_monitor_events {
14309-
self.register_pending_forward_monitor_event_acks(
14310-
htlc_update.payment_hash,
14311-
htlc_update.source.previous_hop_data(),
14312-
monitor_event_source,
14313-
);
14314-
}
1431514327
// Claim the funds from the previous hop, if there is one. In the future we can
1431614328
// store attribution data in the `ChannelMonitor` and provide it here.
1431714329
self.claim_funds_internal(
@@ -14334,35 +14346,41 @@ This indicates a bug inside LDK. Please report this error at https://github.com/
1433414346
self.channel_is_closed(&channel_id, &counterparty_node_id);
1433514347
let we_are_sender =
1433614348
matches!(htlc_update.source, HTLCSource::OutboundRoute { .. });
14337-
if from_onchain | we_are_sender {
14338-
log_trace!(logger, "Failing HTLC from our monitor");
14339-
let failure_type = htlc_update
14340-
.source
14341-
.failure_type(counterparty_node_id, channel_id);
14342-
14343-
let completion_update =
14344-
if self.persistent_monitor_events && we_are_sender {
14345-
EventCompletionAction::AckMonitorEvent {
14346-
event_id: monitor_event_source,
14347-
}
14348-
} else {
14349-
EventCompletionAction::ReleasePaymentCompleteChannelMonitorUpdate(PaymentCompleteUpdate {
14350-
counterparty_node_id,
14351-
channel_funding_outpoint: funding_outpoint,
14352-
channel_id,
14353-
htlc_id: SentHTLCId::from_source(&htlc_update.source),
14354-
})
14355-
};
14356-
14357-
self.fail_htlc_backwards_internal(
14358-
&htlc_update.source,
14359-
&htlc_update.payment_hash,
14360-
&reason,
14361-
failure_type,
14362-
Some(completion_update),
14363-
);
14364-
}
14365-
if !we_are_sender {
14349+
log_trace!(logger, "Failing HTLC from our monitor");
14350+
let failure_type = htlc_update
14351+
.source
14352+
.failure_type(counterparty_node_id, channel_id);
14353+
14354+
let completion_update = if self.persistent_monitor_events {
14355+
if we_are_sender {
14356+
// Ack the monitor event once PaymentFailed is processed by the user
14357+
Some(EventCompletionAction::AckMonitorEvent {
14358+
event_id: monitor_event_source,
14359+
})
14360+
} else {
14361+
// We'll ack the monitor event when it's removed from the inbound edge on RAA
14362+
None
14363+
}
14364+
} else {
14365+
if from_onchain {
14366+
Some(EventCompletionAction::ReleasePaymentCompleteChannelMonitorUpdate(PaymentCompleteUpdate {
14367+
counterparty_node_id,
14368+
channel_funding_outpoint: funding_outpoint,
14369+
channel_id,
14370+
htlc_id: SentHTLCId::from_source(&htlc_update.source),
14371+
}))
14372+
} else {
14373+
None
14374+
}
14375+
};
14376+
self.fail_htlc_backwards_internal(
14377+
&htlc_update.source,
14378+
&htlc_update.payment_hash,
14379+
&reason,
14380+
failure_type,
14381+
completion_update,
14382+
);
14383+
if !self.persistent_monitor_events {
1436614384
self.chain_monitor.ack_monitor_event(monitor_event_source);
1436714385
}
1436814386
},
@@ -21423,19 +21441,22 @@ impl<
2142321441
ev_action,
2142421442
);
2142521443
}
21426-
for ((_, hash), htlcs) in already_forwarded_htlcs.into_iter() {
21427-
for (htlc, _) in htlcs {
21428-
let channel_id = htlc.channel_id;
21429-
let node_id = htlc.counterparty_node_id;
21430-
let source = HTLCSource::PreviousHopData(htlc);
21431-
let failure_reason = LocalHTLCFailureReason::TemporaryChannelFailure;
21432-
let failure_data = channel_manager.get_htlc_inbound_temp_fail_data(failure_reason);
21433-
let reason = HTLCFailReason::reason(failure_reason, failure_data);
21434-
let receiver = HTLCHandlingFailureType::Forward { node_id, channel_id };
21435-
// The event completion action is only relevant for HTLCs that originate from our node, not
21436-
// forwarded HTLCs.
21437-
channel_manager
21438-
.fail_htlc_backwards_internal(&source, &hash, &reason, receiver, None);
21444+
if !channel_manager.persistent_monitor_events {
21445+
for ((_, hash), htlcs) in already_forwarded_htlcs.into_iter() {
21446+
for (htlc, _) in htlcs {
21447+
let channel_id = htlc.channel_id;
21448+
let node_id = htlc.counterparty_node_id;
21449+
let source = HTLCSource::PreviousHopData(htlc);
21450+
let failure_reason = LocalHTLCFailureReason::TemporaryChannelFailure;
21451+
let failure_data =
21452+
channel_manager.get_htlc_inbound_temp_fail_data(failure_reason);
21453+
let reason = HTLCFailReason::reason(failure_reason, failure_data);
21454+
let receiver = HTLCHandlingFailureType::Forward { node_id, channel_id };
21455+
// The event completion action is only relevant for HTLCs that originate from our node, not
21456+
// forwarded HTLCs.
21457+
channel_manager
21458+
.fail_htlc_backwards_internal(&source, &hash, &reason, receiver, None);
21459+
}
2143921460
}
2144021461
}
2144121462

lightning/src/ln/functional_test_utils.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -806,9 +806,9 @@ impl<'a, 'b, 'c> Drop for Node<'a, 'b, 'c> {
806806
let unacked_monitor_events = monitor.drain_unacked_monitor_events();
807807
if !unacked_monitor_events.is_empty() {
808808
panic!(
809-
"Node {} channel {channel_id:?} had {} unacked monitor events at drop: {unacked_monitor_events:#?}",
810-
unacked_monitor_events.len(),
811-
self.logger.id
809+
"{} had {} unacked monitor events on channel {channel_id:?} at drop: {unacked_monitor_events:#?}",
810+
self.logger.id,
811+
unacked_monitor_events.len()
812812
);
813813
}
814814
}

lightning/src/ln/functional_tests.rs

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1821,6 +1821,13 @@ fn do_test_htlc_on_chain_timeout(connect_style: ConnectStyle) {
18211821
let commitment_tx = get_local_commitment_txn!(nodes[1], chan_1.2);
18221822
check_spends!(commitment_tx[0], chan_1.3);
18231823

1824+
// Close the channel from each node's perspective, by mining the commitment on each of their
1825+
// chains
1826+
mine_transaction(&nodes[1], &commitment_tx[0]);
1827+
check_closed_broadcast(&nodes[1], 1, true);
1828+
check_added_monitors(&nodes[1], 1);
1829+
check_closed_event(&nodes[1], 1, ClosureReason::CommitmentTxConfirmed, &[node_a_id], 100000);
1830+
18241831
mine_transaction(&nodes[0], &commitment_tx[0]);
18251832
connect_blocks(&nodes[0], TEST_FINAL_CLTV + MIN_CLTV_EXPIRY_DELTA as u32); // Confirm blocks until the HTLC expires
18261833

@@ -5681,7 +5688,7 @@ pub fn test_update_fulfill_htlc_bolt2_after_malformed_htlc_message_must_forward_
56815688
assert_eq!(events_4.len(), 1);
56825689

56835690
//Confirm that handlinge the update_malformed_htlc message produces an update_fail_htlc message to be forwarded back along the route
5684-
match events_4[0] {
5691+
let ba_fail = match events_4[0] {
56855692
MessageSendEvent::UpdateHTLCs {
56865693
updates:
56875694
msgs::CommitmentUpdate {
@@ -5690,7 +5697,7 @@ pub fn test_update_fulfill_htlc_bolt2_after_malformed_htlc_message_must_forward_
56905697
ref update_fail_htlcs,
56915698
ref update_fail_malformed_htlcs,
56925699
ref update_fee,
5693-
..
5700+
ref commitment_signed,
56945701
},
56955702
..
56965703
} => {
@@ -5699,11 +5706,18 @@ pub fn test_update_fulfill_htlc_bolt2_after_malformed_htlc_message_must_forward_
56995706
assert_eq!(update_fail_htlcs.len(), 1);
57005707
assert!(update_fail_malformed_htlcs.is_empty());
57015708
assert!(update_fee.is_none());
5709+
(update_fail_htlcs[0].clone(), commitment_signed.clone())
57025710
},
57035711
_ => panic!("Unexpected event"),
57045712
};
57055713

57065714
check_added_monitors(&nodes[1], 1);
5715+
5716+
// Push the fail back to A so the A-B inbound RAA completes and acks the outbound-edge
5717+
// monitor event on B-C.
5718+
nodes[0].node.handle_update_fail_htlc(node_b_id, &ba_fail.0);
5719+
do_commitment_signed_dance(&nodes[0], &nodes[1], &ba_fail.1, false, true);
5720+
expect_payment_failed!(nodes[0], our_payment_hash, false);
57075721
}
57085722

57095723
#[xtest(feature = "_externalize_tests")]

lightning/src/ln/payment_tests.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5278,6 +5278,19 @@ fn do_test_payment_metadata_consistency(do_reload: bool, do_modify: bool) {
52785278
let fail_type =
52795279
HTLCHandlingFailureType::Forward { node_id: Some(node_d_id), channel_id: cd_chan_id };
52805280
expect_htlc_failure_conditions(events, &[fail_type]);
5281+
5282+
expect_and_process_pending_htlcs(&nodes[2], false);
5283+
check_added_monitors(&nodes[2], 1);
5284+
let cs_fail_a = get_htlc_update_msgs(&nodes[2], &node_a_id);
5285+
nodes[0].node.handle_update_fail_htlc(node_c_id, &cs_fail_a.update_fail_htlcs[0]);
5286+
do_commitment_signed_dance(&nodes[0], &nodes[2], &cs_fail_a.commitment_signed, false, true);
5287+
// The other MPP part is still in flight at D, so only PaymentPathFailed fires here.
5288+
expect_payment_failed_conditions(
5289+
&nodes[0],
5290+
payment_hash,
5291+
true,
5292+
PaymentFailedConditions::new().mpp_parts_remain(),
5293+
);
52815294
} else {
52825295
expect_and_process_pending_htlcs(&nodes[3], false);
52835296
expect_payment_claimable!(nodes[3], payment_hash, payment_secret, amt_msat);

lightning/src/ln/reload_tests.rs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1544,7 +1544,12 @@ fn removed_payment_no_manager_persistence() {
15441544
_ => panic!("Unexpected event"),
15451545
}
15461546

1547-
expect_payment_failed!(nodes[0], payment_hash, false);
1547+
// Whether nodes[0] sees a permanent failure depends on nodes[1]'s mode: with persistent
1548+
// monitor events nodes[1] re-provides the recipient's `IncorrectPaymentDetails` rejection via
1549+
// a monitor event; otherwise it proactively fails the forwarded HTLC back with
1550+
// `ChannelClosed` (non-permanent).
1551+
let payment_failed_permanently = nodes[1].node.test_persistent_monitor_events_enabled();
1552+
expect_payment_failed!(nodes[0], payment_hash, payment_failed_permanently);
15481553
}
15491554

15501555
#[test]
@@ -2455,8 +2460,10 @@ fn test_reload_node_without_preimage_fails_htlc() {
24552460
reconnect_args.pending_cell_htlc_fails = (0, 1);
24562461
reconnect_nodes(reconnect_args);
24572462

2458-
// nodes[0] should now have received the failure and generate PaymentFailed.
2459-
expect_payment_failed_conditions(&nodes[0], payment_hash, false, PaymentFailedConditions::new());
2463+
// nodes[0] should now have received the failure and generate PaymentFailed. If persistent
2464+
// monitor events are enabled, nodes[1] will remember that the recipient rejected the payment.
2465+
let payment_failed_permanently = nodes[1].node.test_persistent_monitor_events_enabled();
2466+
expect_payment_failed_conditions(&nodes[0], payment_hash, payment_failed_permanently, PaymentFailedConditions::new());
24602467
}
24612468

24622469
#[test]

0 commit comments

Comments
 (0)