Skip to content

Commit f2f3f26

Browse files
Fix replay of preimage monitor events for fwds
Currently, the resolution of HTLCs (and decisions on when HTLCs can be forwarded) is the responsibility of Channel objects (a part of ChannelManager) until the channel is closed, and then the ChannelMonitor thereafter. This leads to some complexity around race conditions for HTLCs right around channel closure. Additionally, there is lots of complexity reconstructing the state of all HTLCs in the ChannelManager deserialization/loading logic. Instead, we want to do all resolution in ChannelMonitors (in response to ChannelMonitorUpdates) and pass them back to ChannelManager in the form of MonitorEvents (similar to how HTLCs are resolved after channels are closed). In order to have reliable resolution, we'll need to keep MonitorEvents around in the ChannelMonitor until the ChannelManager has finished processing them. This will simplify things - on restart instead of examining the set of HTLCs in monitors we can simply replay all the pending MonitorEvents. If the outbound edge of a forwarded payment is resolved on-chain via preimage, and we get a monitor event for it, we want to mark that monitor event resolved once the preimage is durably persisted in the inbound edge. Hence, we add the monitor event resolution to the completion of the inbound edge's preimage monitor update.
1 parent 550df93 commit f2f3f26

1 file changed

Lines changed: 38 additions & 0 deletions

File tree

lightning/src/ln/channelmanager.rs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1475,6 +1475,9 @@ pub(crate) enum MonitorUpdateCompletionAction {
14751475
EmitEventOptionAndFreeOtherChannel {
14761476
event: Option<events::Event>,
14771477
downstream_counterparty_and_funding_outpoint: EventUnblockedChannel,
1478+
/// If this action originated from a [`MonitorEvent`], the source to acknowledge once the
1479+
/// action is handled.
1480+
monitor_event_source: Option<MonitorEventSource>,
14781481
},
14791482
/// Indicates we should immediately resume the operation of another channel, unless there is
14801483
/// some other reason why the channel is blocked. In practice this simply means immediately
@@ -1492,6 +1495,9 @@ pub(crate) enum MonitorUpdateCompletionAction {
14921495
downstream_counterparty_node_id: PublicKey,
14931496
blocking_action: RAAMonitorUpdateBlockingAction,
14941497
downstream_channel_id: ChannelId,
1498+
/// If this action originated from a [`MonitorEvent`], the source to acknowledge once the
1499+
/// action is handled.
1500+
monitor_event_source: Option<MonitorEventSource>,
14951501
},
14961502
/// Indicates that one or more [`MonitorEvent`]s should be acknowledged once the associated
14971503
/// [`ChannelMonitorUpdate`] has been durably persisted.
@@ -1509,13 +1515,15 @@ impl_writeable_tlv_based_enum_upgradable!(MonitorUpdateCompletionAction,
15091515
(0, downstream_counterparty_node_id, required),
15101516
(4, blocking_action, upgradable_required),
15111517
(5, downstream_channel_id, required),
1518+
(7, monitor_event_source, option),
15121519
},
15131520
(2, EmitEventOptionAndFreeOtherChannel) => {
15141521
// LDK prior to 0.3 required this field. It will not be present for trampoline payments
15151522
// with multiple incoming HTLCS, so nodes cannot downgrade while trampoline payments
15161523
// are in the process of being resolved.
15171524
(0, event, upgradable_option),
15181525
(1, downstream_counterparty_and_funding_outpoint, upgradable_required),
1526+
(3, monitor_event_source, option),
15191527
},
15201528
(3, AckMonitorEvents) => {
15211529
(0, monitor_events_to_ack, required_vec),
@@ -9438,6 +9446,7 @@ impl<
94389446
startup_replay: bool, next_channel_counterparty_node_id: PublicKey,
94399447
next_channel_outpoint: OutPoint, next_channel_id: ChannelId, hop_data: HTLCPreviousHopData,
94409448
attribution_data: Option<AttributionData>, send_timestamp: Option<Duration>,
9449+
monitor_event_source: Option<MonitorEventSource>,
94419450
) {
94429451
let _prev_channel_id = hop_data.channel_id;
94439452
let completed_blocker = RAAMonitorUpdateBlockingAction::from_prev_hop_data(&hop_data);
@@ -9527,6 +9536,7 @@ impl<
95279536
downstream_counterparty_node_id: chan_to_release.counterparty_node_id,
95289537
downstream_channel_id: chan_to_release.channel_id,
95299538
blocking_action: chan_to_release.blocking_action,
9539+
monitor_event_source,
95309540
}),
95319541
None,
95329542
)
@@ -9542,6 +9552,7 @@ impl<
95429552
Some(MonitorUpdateCompletionAction::EmitEventOptionAndFreeOtherChannel {
95439553
event,
95449554
downstream_counterparty_and_funding_outpoint: chan_to_release,
9555+
monitor_event_source,
95459556
}),
95469557
None,
95479558
)
@@ -9726,8 +9737,12 @@ impl<
97269737
downstream_counterparty_node_id: node_id,
97279738
blocking_action: blocker,
97289739
downstream_channel_id: channel_id,
9740+
monitor_event_source,
97299741
} = action
97309742
{
9743+
if let Some(source) = monitor_event_source {
9744+
self.chain_monitor.ack_monitor_event(source);
9745+
}
97319746
if let Some(peer_state_mtx) = per_peer_state.get(&node_id) {
97329747
let mut peer_state = peer_state_mtx.lock().unwrap();
97339748
if let Some(blockers) = peer_state
@@ -9986,6 +10001,8 @@ This indicates a bug inside LDK. Please report this error at https://github.com/
998610001
hop_data,
998710002
attribution_data,
998810003
send_timestamp,
10004+
monitor_event_id
10005+
.map(|id| MonitorEventSource { event_id: id, channel_id: next_channel_id }),
998910006
);
999010007
},
999110008
HTLCSource::TrampolineForward { previous_hop_data, .. } => {
@@ -10029,6 +10046,19 @@ This indicates a bug inside LDK. Please report this error at https://github.com/
1002910046
current_previous_hop_data,
1003010047
attribution_data.clone(),
1003110048
send_timestamp,
10049+
// Only pass the monitor event ID for the first hop. Once one
10050+
// inbound channel durably has the preimage, the outbound
10051+
// monitor event can be acked. The preimage remains in the
10052+
// outbound monitor's storage and will be replayed to all
10053+
// remaining inbound hops on restart via pending_claims_to_replay.
10054+
if i == 0 {
10055+
monitor_event_id.map(|id| MonitorEventSource {
10056+
event_id: id,
10057+
channel_id: next_channel_id,
10058+
})
10059+
} else {
10060+
None
10061+
},
1003210062
);
1003310063
}
1003410064
},
@@ -10255,7 +10285,11 @@ This indicates a bug inside LDK. Please report this error at https://github.com/
1025510285
MonitorUpdateCompletionAction::EmitEventOptionAndFreeOtherChannel {
1025610286
event,
1025710287
downstream_counterparty_and_funding_outpoint,
10288+
monitor_event_source,
1025810289
} => {
10290+
if let Some(source) = monitor_event_source {
10291+
self.chain_monitor.ack_monitor_event(source);
10292+
}
1025910293
if let Some(event) = event {
1026010294
self.pending_events.lock().unwrap().push_back((event, None));
1026110295
}
@@ -10269,7 +10303,11 @@ This indicates a bug inside LDK. Please report this error at https://github.com/
1026910303
downstream_counterparty_node_id,
1027010304
downstream_channel_id,
1027110305
blocking_action,
10306+
monitor_event_source,
1027210307
} => {
10308+
if let Some(source) = monitor_event_source {
10309+
self.chain_monitor.ack_monitor_event(source);
10310+
}
1027310311
self.handle_monitor_update_release(
1027410312
downstream_counterparty_node_id,
1027510313
downstream_channel_id,

0 commit comments

Comments
 (0)