Skip to content

Commit dce31b7

Browse files
committed
Avoid re-locking same UTXO future
UtxoLookup implementations may cache and return the same async future for repeated requests for a short channel id. When a replacement channel announcement arrives while that future is in-flight, the pending-entry comparison may point back to the future state already held by the async path. Detect that case with Arc::ptr_eq inside check_replace_previous_entry and compare against the held messages instead of taking the mutex again. This keeps duplicate-announcement filtering intact while letting replacement announcements update the pending entry without re-entering the lock. Co-Authored-By: HAL 9000 This finding was discovered by Project Loupe
1 parent c9260ee commit dce31b7

1 file changed

Lines changed: 105 additions & 28 deletions

File tree

lightning/src/routing/utxo.rs

Lines changed: 105 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -293,42 +293,69 @@ impl PendingChecks {
293293
Ok(())
294294
}
295295

296+
fn pending_channel_announcement_matches(
297+
msg: &msgs::UnsignedChannelAnnouncement, full_msg: Option<&msgs::ChannelAnnouncement>,
298+
pending_state: &UtxoMessages,
299+
) -> bool {
300+
match &pending_state.channel_announce {
301+
Some(ChannelAnnouncement::Full(pending_msg)) => Some(pending_msg) == full_msg,
302+
Some(ChannelAnnouncement::Unsigned(pending_msg)) => pending_msg == msg,
303+
None => {
304+
// This can be reached if `resolve_single_future` has already consumed
305+
// `channel_announce` via `.take()` while the `Arc<Mutex<UtxoMessages>>` is still
306+
// alive (e.g. held on the stack of `check_resolved_futures`). In that case,
307+
// `complete` should also have been taken. Treat it as non-matching and let the
308+
// new request fly.
309+
debug_assert!(
310+
pending_state.complete.is_none(),
311+
"channel_announce is None but complete is still pending"
312+
);
313+
false
314+
},
315+
}
316+
}
317+
296318
fn check_replace_previous_entry(
297319
msg: &msgs::UnsignedChannelAnnouncement, full_msg: Option<&msgs::ChannelAnnouncement>,
298-
replacement: Option<Weak<Mutex<UtxoMessages>>>,
320+
replacement: Option<(&Arc<Mutex<UtxoMessages>>, &UtxoMessages)>,
299321
pending_channels: &mut HashMap<u64, Weak<Mutex<UtxoMessages>>>,
300322
) -> Result<(), msgs::LightningError> {
323+
let replacement_state = replacement.map(|(state, _)| state);
301324
match pending_channels.entry(msg.short_channel_id) {
302325
hash_map::Entry::Occupied(mut e) => {
303326
// There's already a pending lookup for the given SCID. Check if the messages
304327
// are the same and, if so, return immediately (don't bother spawning another
305328
// lookup if we haven't gotten that far yet).
306329
match Weak::upgrade(&e.get()) {
307330
Some(pending_msgs) => {
308-
// This may be called with the mutex held on a different UtxoMessages
309-
// struct, however in that case we have a global lockorder of new messages
310-
// -> old messages, which makes this safe.
311-
let pending_state = pending_msgs.unsafe_well_ordered_double_lock_self();
312-
let pending_matches = match &pending_state.channel_announce {
313-
Some(ChannelAnnouncement::Full(pending_msg)) => {
314-
Some(pending_msg) == full_msg
331+
let pending_matches = match replacement {
332+
Some((replacement, replacement_messages))
333+
if Arc::ptr_eq(&pending_msgs, replacement) =>
334+
{
335+
// The pending entry points to the state whose mutex the caller
336+
// already holds. Compare through the held guard instead of locking
337+
// it again.
338+
Self::pending_channel_announcement_matches(
339+
msg,
340+
full_msg,
341+
replacement_messages,
342+
)
315343
},
316-
Some(ChannelAnnouncement::Unsigned(pending_msg)) => pending_msg == msg,
317-
None => {
318-
// This can be reached if `resolve_single_future` has already
319-
// consumed `channel_announce` via `.take()` while the
320-
// `Arc<Mutex<UtxoMessages>>` is still alive (e.g. held on
321-
// the stack of `check_resolved_futures`). In that case,
322-
// `complete` should also have been taken. Treat it as
323-
// non-matching and let the new request fly.
324-
debug_assert!(
325-
pending_state.complete.is_none(),
326-
"channel_announce is None but complete is still pending"
344+
_ => {
345+
// This may be called with the mutex held on a different
346+
// UtxoMessages struct, however in that case we have a global
347+
// lockorder of new messages -> old messages, which makes this safe.
348+
let pending_state =
349+
pending_msgs.unsafe_well_ordered_double_lock_self();
350+
let matches = Self::pending_channel_announcement_matches(
351+
msg,
352+
full_msg,
353+
&pending_state,
327354
);
328-
false
355+
drop(pending_state);
356+
matches
329357
},
330358
};
331-
drop(pending_state);
332359
if pending_matches {
333360
return Err(LightningError {
334361
err: "Channel announcement is already being checked".to_owned(),
@@ -340,25 +367,25 @@ impl PendingChecks {
340367
// Note that in the replace case whether to replace is somewhat
341368
// arbitrary - both results will be handled, we're just updating the
342369
// value that will be compared to future lookups with the same SCID.
343-
if let Some(item) = replacement {
344-
*e.get_mut() = item;
370+
if let Some(item) = replacement_state {
371+
*e.get_mut() = Arc::downgrade(item);
345372
}
346373
}
347374
},
348375
None => {
349376
// The earlier lookup already resolved. We can't be sure its the same
350377
// so just remove/replace it and move on.
351-
if let Some(item) = replacement {
352-
*e.get_mut() = item;
378+
if let Some(item) = replacement_state {
379+
*e.get_mut() = Arc::downgrade(item);
353380
} else {
354381
e.remove();
355382
}
356383
},
357384
}
358385
},
359386
hash_map::Entry::Vacant(v) => {
360-
if let Some(item) = replacement {
361-
v.insert(item);
387+
if let Some(item) = replacement_state {
388+
v.insert(Arc::downgrade(item));
362389
}
363390
},
364391
}
@@ -442,7 +469,7 @@ impl PendingChecks {
442469
Self::check_replace_previous_entry(
443470
msg,
444471
full_msg,
445-
Some(Arc::downgrade(&future.state)),
472+
Some((&future.state, &async_messages)),
446473
&mut pending_checks.channels,
447474
)?;
448475
async_messages.channel_announce = Some(if let Some(msg) = full_msg {
@@ -1028,6 +1055,56 @@ mod tests {
10281055
assert!(!is_test_feature_set);
10291056
}
10301057

1058+
#[test]
1059+
fn test_no_deadlock_same_future_different_announcement() {
1060+
// A user's UtxoLookup may return the same UtxoFuture for repeated lookups for a
1061+
// given SCID. A different channel_announcement with that SCID should replace the
1062+
// pending message without re-locking the already-held future state.
1063+
let (valid_announcement, chain_source, network_graph, good_script, ..) = get_test_objects();
1064+
let scid = valid_announcement.contents.short_channel_id;
1065+
1066+
let notifier = Arc::new(Notifier::new());
1067+
let future = UtxoFuture::new(Arc::clone(&notifier));
1068+
*chain_source.utxo_ret.lock().unwrap() = UtxoResult::Async(future.clone());
1069+
1070+
assert_eq!(
1071+
network_graph
1072+
.update_channel_from_announcement(&valid_announcement, &Some(&chain_source))
1073+
.unwrap_err()
1074+
.err,
1075+
"Channel being checked async"
1076+
);
1077+
assert_eq!(chain_source.get_utxo_call_count.load(Ordering::Relaxed), 1);
1078+
1079+
let secp_ctx = Secp256k1::new();
1080+
let replacement_pk_1 = &SecretKey::from_slice(&[99; 32]).unwrap();
1081+
let replacement_pk_2 = &SecretKey::from_slice(&[98; 32]).unwrap();
1082+
let replacement_announcement = get_signed_channel_announcement(
1083+
|msg| msg.features.set_unknown_feature_optional(),
1084+
replacement_pk_1,
1085+
replacement_pk_2,
1086+
&secp_ctx,
1087+
);
1088+
assert_eq!(
1089+
network_graph
1090+
.update_channel_from_announcement(&replacement_announcement, &Some(&chain_source))
1091+
.unwrap_err()
1092+
.err,
1093+
"Channel being checked async"
1094+
);
1095+
assert_eq!(chain_source.get_utxo_call_count.load(Ordering::Relaxed), 2);
1096+
1097+
future
1098+
.resolve(Ok(TxOut { value: Amount::from_sat(1_000_000), script_pubkey: good_script }));
1099+
assert!(notifier.notify_pending());
1100+
network_graph.pending_checks.check_resolved_futures(&network_graph);
1101+
#[rustfmt::skip]
1102+
let is_replacement_feature_set =
1103+
network_graph.read_only().channels().get(&scid).unwrap().announcement_message
1104+
.as_ref().unwrap().contents.features.supports_unknown_test_feature();
1105+
assert!(is_replacement_feature_set);
1106+
}
1107+
10311108
#[test]
10321109
fn test_checks_backpressure() {
10331110
// Test that too_many_checks_pending returns true when there are many checks pending, and

0 commit comments

Comments
 (0)