Skip to content

Commit 4c48b24

Browse files
Check pruned HTLCs were resolved on startup
In a recent commit, we added support for pruning an inbound HTLC's persisted onion once the HTLC has been irrevocably forwarded to the outbound edge. Here, we add a check on startup that those inbound HTLCs were actually handled. Specifically, we check that the inbound HTLC is either (a) currently present in the outbound edge or (b) was removed via claim. If neither of those are true, we infer that the HTLC was removed from the outbound edge via fail and fail the inbound HTLC backwards.
1 parent 3d14c13 commit 4c48b24

4 files changed

Lines changed: 373 additions & 21 deletions

File tree

lightning/src/ln/channel.rs

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -314,8 +314,8 @@ impl InboundHTLCState {
314314
/// `ChannelManager` persist.
315315
///
316316
/// Useful for reconstructing the pending HTLC set on startup.
317-
#[derive(Debug)]
318-
enum InboundUpdateAdd {
317+
#[derive(Debug, Clone)]
318+
pub(super) enum InboundUpdateAdd {
319319
/// The inbound committed HTLC's update_add_htlc message.
320320
WithOnion { update_add_htlc: msgs::UpdateAddHTLC },
321321
/// This inbound HTLC is a forward that was irrevocably committed to the outbound edge, allowing
@@ -7862,7 +7862,9 @@ where
78627862
}
78637863

78647864
/// Useful for reconstructing the set of pending HTLCs when deserializing the `ChannelManager`.
7865-
pub(super) fn inbound_committed_unresolved_htlcs(&self) -> Vec<msgs::UpdateAddHTLC> {
7865+
pub(super) fn inbound_committed_unresolved_htlcs(
7866+
&self,
7867+
) -> Vec<(PaymentHash, InboundUpdateAdd)> {
78667868
// We don't want to return an HTLC as needing processing if it already has a resolution that's
78677869
// pending in the holding cell.
78687870
let htlc_resolution_in_holding_cell = |id: u64| -> bool {
@@ -7880,13 +7882,11 @@ where
78807882
.pending_inbound_htlcs
78817883
.iter()
78827884
.filter_map(|htlc| match &htlc.state {
7883-
InboundHTLCState::Committed {
7884-
update_add_htlc: InboundUpdateAdd::WithOnion { update_add_htlc },
7885-
} => {
7885+
InboundHTLCState::Committed { update_add_htlc } => {
78867886
if htlc_resolution_in_holding_cell(htlc.htlc_id) {
78877887
return None;
78887888
}
7889-
Some(update_add_htlc.clone())
7889+
Some((htlc.payment_hash, update_add_htlc.clone()))
78907890
},
78917891
_ => None,
78927892
})
@@ -7944,6 +7944,12 @@ where
79447944
debug_assert!(false, "If we go to prune an inbound HTLC it should be present")
79457945
}
79467946

7947+
/// Useful for testing crash scenarios where the holding cell is not persisted.
7948+
#[cfg(test)]
7949+
pub(super) fn test_clear_holding_cell(&mut self) {
7950+
self.context.holding_cell_htlc_updates.clear()
7951+
}
7952+
79477953
/// Marks an outbound HTLC which we have received update_fail/fulfill/malformed
79487954
#[inline]
79497955
fn mark_outbound_htlc_removed(

lightning/src/ln/channelmanager.rs

Lines changed: 121 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,9 @@ use crate::ln::chan_utils::selected_commitment_sat_per_1000_weight;
5858
use crate::ln::channel::QuiescentAction;
5959
use crate::ln::channel::{
6060
self, hold_time_since, Channel, ChannelError, ChannelUpdateStatus, DisconnectResult,
61-
FundedChannel, FundingTxSigned, InboundV1Channel, OutboundV1Channel, PendingV2Channel,
62-
ReconnectionMsg, ShutdownResult, SpliceFundingFailed, StfuResponse, UpdateFulfillCommitFetch,
63-
WithChannelContext,
61+
FundedChannel, FundingTxSigned, InboundUpdateAdd, InboundV1Channel, OutboundV1Channel,
62+
PendingV2Channel, ReconnectionMsg, ShutdownResult, SpliceFundingFailed, StfuResponse,
63+
UpdateFulfillCommitFetch, WithChannelContext,
6464
};
6565
use crate::ln::channel_state::ChannelDetails;
6666
use crate::ln::funding::SpliceContribution;
@@ -10145,7 +10145,20 @@ This indicates a bug inside LDK. Please report this error at https://github.com/
1014510145
let per_peer_state = self.per_peer_state.read().unwrap();
1014610146
let peer_state = per_peer_state.get(&cp_id).map(|state| state.lock().unwrap()).unwrap();
1014710147
let chan = peer_state.channel_by_id.get(&chan_id).and_then(|c| c.as_funded()).unwrap();
10148-
chan.inbound_committed_unresolved_htlcs().len()
10148+
chan.inbound_committed_unresolved_htlcs()
10149+
.iter()
10150+
.filter(|(_, htlc)| matches!(htlc, InboundUpdateAdd::WithOnion { .. }))
10151+
.count()
10152+
}
10153+
10154+
#[cfg(test)]
10155+
/// Useful for testing crash scenarios where the holding cell of a channel is not persisted.
10156+
pub(crate) fn test_clear_channel_holding_cell(&self, cp_id: PublicKey, chan_id: ChannelId) {
10157+
let per_peer_state = self.per_peer_state.read().unwrap();
10158+
let mut peer_state = per_peer_state.get(&cp_id).map(|state| state.lock().unwrap()).unwrap();
10159+
let chan =
10160+
peer_state.channel_by_id.get_mut(&chan_id).and_then(|c| c.as_funded_mut()).unwrap();
10161+
chan.test_clear_holding_cell();
1014910162
}
1015010163

1015110164
/// Completes channel resumption after locks have been released.
@@ -18158,7 +18171,7 @@ impl<
1815818171
}
1815918172

1816018173
// Post-deserialization processing
18161-
let mut decode_update_add_htlcs = new_hash_map();
18174+
let mut decode_update_add_htlcs: HashMap<u64, Vec<msgs::UpdateAddHTLC>> = new_hash_map();
1816218175
if fake_scid_rand_bytes.is_none() {
1816318176
fake_scid_rand_bytes = Some(args.entropy_source.get_secure_random_bytes());
1816418177
}
@@ -18450,6 +18463,22 @@ impl<
1845018463
// have a fully-constructed `ChannelManager` at the end.
1845118464
let mut pending_claims_to_replay = Vec::new();
1845218465

18466+
// If we find an inbound HTLC that claims to already be forwarded to the outbound edge, we
18467+
// store an identifier for it here and verify that it is either (a) present in the outbound
18468+
// edge or (b) removed from the outbound edge via claim. If it's in neither of these states, we
18469+
// infer that it was removed from the outbound edge via fail, and fail it backwards to ensure
18470+
// that it is handled.
18471+
let mut already_forwarded_htlcs = Vec::new();
18472+
let prune_forwarded_htlc =
18473+
|already_forwarded_htlcs: &mut Vec<(PaymentHash, HTLCPreviousHopData, u64)>,
18474+
prev_hop: &HTLCPreviousHopData| {
18475+
if let Some(idx) = already_forwarded_htlcs.iter().position(|(_, htlc, _)| {
18476+
prev_hop.htlc_id == htlc.htlc_id
18477+
&& prev_hop.prev_outbound_scid_alias == htlc.prev_outbound_scid_alias
18478+
}) {
18479+
already_forwarded_htlcs.swap_remove(idx);
18480+
}
18481+
};
1845318482
{
1845418483
// If we're tracking pending payments, ensure we haven't lost any by looking at the
1845518484
// ChannelMonitor data for any channels for which we do not have authorative state
@@ -18472,16 +18501,38 @@ impl<
1847218501
if reconstruct_manager_from_monitors {
1847318502
if let Some(chan) = peer_state.channel_by_id.get(channel_id) {
1847418503
if let Some(funded_chan) = chan.as_funded() {
18504+
let scid_alias = funded_chan.context.outbound_scid_alias();
1847518505
let inbound_committed_update_adds =
1847618506
funded_chan.inbound_committed_unresolved_htlcs();
18477-
if !inbound_committed_update_adds.is_empty() {
18478-
// Reconstruct `ChannelManager::decode_update_add_htlcs` from the serialized
18479-
// `Channel`, as part of removing the requirement to regularly persist the
18480-
// `ChannelManager`.
18481-
decode_update_add_htlcs.insert(
18482-
funded_chan.context.outbound_scid_alias(),
18483-
inbound_committed_update_adds,
18484-
);
18507+
for (payment_hash, htlc) in inbound_committed_update_adds {
18508+
match htlc {
18509+
InboundUpdateAdd::WithOnion { update_add_htlc } => {
18510+
// Reconstruct `ChannelManager::decode_update_add_htlcs` from the serialized
18511+
// `Channel` as part of removing the requirement to regularly persist the
18512+
// `ChannelManager`.
18513+
match decode_update_add_htlcs.entry(scid_alias) {
18514+
hash_map::Entry::Occupied(mut entry) => {
18515+
entry.get_mut().push(update_add_htlc);
18516+
},
18517+
hash_map::Entry::Vacant(entry) => {
18518+
entry.insert(vec![update_add_htlc]);
18519+
},
18520+
}
18521+
},
18522+
InboundUpdateAdd::Forwarded {
18523+
hop_data,
18524+
outbound_amt_msat,
18525+
} => {
18526+
already_forwarded_htlcs.push((
18527+
payment_hash,
18528+
hop_data,
18529+
outbound_amt_msat,
18530+
));
18531+
},
18532+
InboundUpdateAdd::Legacy => {
18533+
return Err(DecodeError::InvalidValue)
18534+
},
18535+
}
1848518536
}
1848618537
}
1848718538
}
@@ -18535,6 +18586,7 @@ impl<
1853518586
"HTLC already forwarded to the outbound edge",
1853618587
&args.logger,
1853718588
);
18589+
prune_forwarded_htlc(&mut already_forwarded_htlcs, &prev_hop);
1853818590
}
1853918591
}
1854018592
}
@@ -18569,6 +18621,10 @@ impl<
1856918621
"HTLC already forwarded to the outbound edge",
1857018622
&&logger,
1857118623
);
18624+
prune_forwarded_htlc(
18625+
&mut already_forwarded_htlcs,
18626+
&prev_hop_data,
18627+
);
1857218628
}
1857318629

1857418630
// The ChannelMonitor is now responsible for this HTLC's
@@ -19093,6 +19149,7 @@ impl<
1909319149
"HTLC was failed backwards during manager read",
1909419150
&args.logger,
1909519151
);
19152+
prune_forwarded_htlc(&mut already_forwarded_htlcs, prev_hop_data);
1909619153
}
1909719154
}
1909819155

@@ -19238,9 +19295,47 @@ impl<
1923819295
};
1923919296

1924019297
let mut processed_claims: HashSet<Vec<MPPClaimHTLCSource>> = new_hash_set();
19241-
for (_, monitor) in args.channel_monitors.iter() {
19298+
for (channel_id, monitor) in args.channel_monitors.iter() {
1924219299
for (payment_hash, (payment_preimage, payment_claims)) in monitor.get_stored_preimages()
1924319300
{
19301+
// If we have unresolved inbound committed HTLCs that were already forwarded to the
19302+
// outbound edge and removed via claim, we need to make sure to claim them backwards via
19303+
// adding them to `pending_claims_to_replay`.
19304+
for (hash, hop_data, outbound_amt_msat) in
19305+
mem::take(&mut already_forwarded_htlcs).drain(..)
19306+
{
19307+
if hash != payment_hash {
19308+
already_forwarded_htlcs.push((hash, hop_data, outbound_amt_msat));
19309+
continue;
19310+
}
19311+
let new_pending_claim = !pending_claims_to_replay.iter().any(|(src, _, _, _, _, _, _)| {
19312+
matches!(src, HTLCSource::PreviousHopData(hop) if hop.htlc_id == hop_data.htlc_id && hop.prev_outbound_scid_alias == hop_data.prev_outbound_scid_alias)
19313+
});
19314+
if new_pending_claim {
19315+
let counterparty_node_id = monitor.get_counterparty_node_id();
19316+
let is_channel_closed = channel_manager
19317+
.per_peer_state
19318+
.read()
19319+
.unwrap()
19320+
.get(&counterparty_node_id)
19321+
.map_or(true, |peer_state_mtx| {
19322+
!peer_state_mtx
19323+
.lock()
19324+
.unwrap()
19325+
.channel_by_id
19326+
.contains_key(channel_id)
19327+
});
19328+
pending_claims_to_replay.push((
19329+
HTLCSource::PreviousHopData(hop_data),
19330+
payment_preimage,
19331+
outbound_amt_msat,
19332+
is_channel_closed,
19333+
counterparty_node_id,
19334+
monitor.get_funding_txo(),
19335+
*channel_id,
19336+
));
19337+
}
19338+
}
1924419339
if !payment_claims.is_empty() {
1924519340
for payment_claim in payment_claims {
1924619341
if processed_claims.contains(&payment_claim.mpp_parts) {
@@ -19482,6 +19577,18 @@ impl<
1948219577
channel_manager
1948319578
.fail_htlc_backwards_internal(&source, &hash, &reason, receiver, ev_action);
1948419579
}
19580+
for (hash, htlc, _) in already_forwarded_htlcs {
19581+
let channel_id = htlc.channel_id;
19582+
let node_id = htlc.counterparty_node_id;
19583+
let source = HTLCSource::PreviousHopData(htlc);
19584+
let failure_reason = LocalHTLCFailureReason::TemporaryChannelFailure;
19585+
let failure_data = channel_manager.get_htlc_inbound_temp_fail_data(failure_reason);
19586+
let reason = HTLCFailReason::reason(failure_reason, failure_data);
19587+
let receiver = HTLCHandlingFailureType::Forward { node_id, channel_id };
19588+
// The event completion action is only relevant for HTLCs that originate from our node, not
19589+
// forwarded HTLCs.
19590+
channel_manager.fail_htlc_backwards_internal(&source, &hash, &reason, receiver, None);
19591+
}
1948519592

1948619593
for (
1948719594
source,

lightning/src/ln/functional_test_utils.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1423,6 +1423,23 @@ macro_rules! reload_node {
14231423
None
14241424
);
14251425
};
1426+
// Reload the node and have the `ChannelManager` use new codepaths that reconstruct its set of
1427+
// pending HTLCs from `Channel{Monitor}` data.
1428+
($node: expr, $chanman_encoded: expr, $monitors_encoded: expr, $persister:
1429+
ident, $new_chain_monitor: ident, $new_channelmanager: ident, $reconstruct_pending_htlcs: expr
1430+
) => {
1431+
let config = $node.node.get_current_config();
1432+
_reload_node_inner!(
1433+
$node,
1434+
config,
1435+
$chanman_encoded,
1436+
$monitors_encoded,
1437+
$persister,
1438+
$new_chain_monitor,
1439+
$new_channelmanager,
1440+
$reconstruct_pending_htlcs
1441+
);
1442+
};
14261443
}
14271444

14281445
pub fn create_funding_transaction<'a, 'b, 'c>(

0 commit comments

Comments
 (0)