Skip to content

Commit 2c0c24e

Browse files
authored
Rework channel reestablish (#2036)
In an "outdated commitment" scenario where we are on the up-to-date side, we always react by force-closing the channel immediately, not giving our peer a chance to fix their data and restart. On top of that, we consider this a commitment sync error, instead of clearly logging that our counterparty is using outdated data. Addressing this turned out to be rabbit-holey: our sync code is quite complicated and is a bit redundant because we separate between: - checking whether we are late - deciding what messages we need to retransmit Also, discovered a missing corner case when syncing in SHUTDOWN state.
1 parent 2e9f8d9 commit 2c0c24e

5 files changed

Lines changed: 208 additions & 176 deletions

File tree

eclair-core/src/main/scala/fr/acinq/eclair/channel/Channel.scala

Lines changed: 71 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ import fr.acinq.eclair.blockchain.bitcoind.ZmqWatcher
3232
import fr.acinq.eclair.blockchain.bitcoind.ZmqWatcher._
3333
import fr.acinq.eclair.blockchain.bitcoind.rpc.BitcoinCoreClient
3434
import fr.acinq.eclair.channel.Commitments.PostRevocationAction
35-
import fr.acinq.eclair.channel.Helpers.{Closing, Funding, getRelayFees}
35+
import fr.acinq.eclair.channel.Helpers.Syncing.SyncResult
36+
import fr.acinq.eclair.channel.Helpers.{Closing, Funding, Syncing, getRelayFees}
3637
import fr.acinq.eclair.channel.Monitoring.Metrics.ProcessMessage
3738
import fr.acinq.eclair.channel.Monitoring.{Metrics, Tags}
3839
import fr.acinq.eclair.channel.publish.TxPublisher
@@ -1662,44 +1663,38 @@ class Channel(val nodeParams: NodeParams, val wallet: OnChainChannelFunder, remo
16621663
goto(WAIT_FOR_FUNDING_LOCKED) sending fundingLocked
16631664

16641665
case Event(channelReestablish: ChannelReestablish, d: DATA_NORMAL) =>
1665-
var sendQueue = Queue.empty[LightningMessage]
1666-
val channelKeyPath = keyManager.keyPath(d.commitments.localParams, d.commitments.channelConfig)
1667-
channelReestablish match {
1668-
case ChannelReestablish(_, _, nextRemoteRevocationNumber, yourLastPerCommitmentSecret, _, _) if !Helpers.checkLocalCommit(d, nextRemoteRevocationNumber) =>
1669-
// if next_remote_revocation_number is greater than our local commitment index, it means that either we are using an outdated commitment, or they are lying
1670-
// but first we need to make sure that the last per_commitment_secret that they claim to have received from us is correct for that next_remote_revocation_number minus 1
1671-
if (keyManager.commitmentSecret(channelKeyPath, nextRemoteRevocationNumber - 1) == yourLastPerCommitmentSecret) {
1672-
log.warning(s"counterparty proved that we have an outdated (revoked) local commitment!!! ourCommitmentNumber=${d.commitments.localCommit.index} theirCommitmentNumber=$nextRemoteRevocationNumber")
1673-
// their data checks out, we indeed seem to be using an old revoked commitment, and must absolutely *NOT* publish it, because that would be a cheating attempt and they
1674-
// would punish us by taking all the funds in the channel
1675-
val exc = PleasePublishYourCommitment(d.channelId)
1676-
val error = Error(d.channelId, exc.getMessage)
1677-
goto(WAIT_FOR_REMOTE_PUBLISH_FUTURE_COMMITMENT) using DATA_WAIT_FOR_REMOTE_PUBLISH_FUTURE_COMMITMENT(d.commitments, channelReestablish) storing() sending error
1678-
} else {
1679-
// they lied! the last per_commitment_secret they claimed to have received from us is invalid
1680-
throw InvalidRevokedCommitProof(d.channelId, d.commitments.localCommit.index, nextRemoteRevocationNumber, yourLastPerCommitmentSecret)
1681-
}
1682-
case ChannelReestablish(_, nextLocalCommitmentNumber, _, _, _, _) if !Helpers.checkRemoteCommit(d, nextLocalCommitmentNumber) =>
1683-
// if next_local_commit_number is more than one more our remote commitment index, it means that either we are using an outdated commitment, or they are lying
1684-
log.warning(s"counterparty says that they have a more recent commitment than the one we know of!!! ourCommitmentNumber=${d.commitments.remoteNextCommitInfo.left.toOption.map(_.nextRemoteCommit.index).getOrElse(d.commitments.remoteCommit.index)} theirCommitmentNumber=$nextLocalCommitmentNumber")
1685-
// there is no way to make sure that they are saying the truth, the best thing to do is ask them to publish their commitment right now
1686-
// maybe they will publish their commitment, in that case we need to remember their commitment point in order to be able to claim our outputs
1687-
// not that if they don't comply, we could publish our own commitment (it is not stale, otherwise we would be in the case above)
1688-
val exc = PleasePublishYourCommitment(d.channelId)
1689-
val error = Error(d.channelId, exc.getMessage)
1690-
goto(WAIT_FOR_REMOTE_PUBLISH_FUTURE_COMMITMENT) using DATA_WAIT_FOR_REMOTE_PUBLISH_FUTURE_COMMITMENT(d.commitments, channelReestablish) storing() sending error
1691-
case _ =>
1666+
Syncing.checkSync(keyManager, d, channelReestablish) match {
1667+
case syncFailure: SyncResult.Failure =>
1668+
handleSyncFailure(channelReestablish, syncFailure, d)
1669+
case syncSuccess: SyncResult.Success =>
1670+
var sendQueue = Queue.empty[LightningMessage]
16921671
// normal case, our data is up-to-date
16931672
if (channelReestablish.nextLocalCommitmentNumber == 1 && d.commitments.localCommit.index == 0) {
16941673
// If next_local_commitment_number is 1 in both the channel_reestablish it sent and received, then the node MUST retransmit funding_locked, otherwise it MUST NOT
16951674
log.debug("re-sending fundingLocked")
1675+
val channelKeyPath = keyManager.keyPath(d.commitments.localParams, d.commitments.channelConfig)
16961676
val nextPerCommitmentPoint = keyManager.commitmentPoint(channelKeyPath, 1)
16971677
val fundingLocked = FundingLocked(d.commitments.channelId, nextPerCommitmentPoint)
16981678
sendQueue = sendQueue :+ fundingLocked
16991679
}
17001680

1701-
val (commitments1, sendQueue1) = handleSync(channelReestablish, d)
1702-
sendQueue = sendQueue ++ sendQueue1
1681+
// we may need to retransmit updates and/or commit_sig and/or revocation
1682+
sendQueue = sendQueue ++ syncSuccess.retransmit
1683+
1684+
// then we clean up unsigned updates
1685+
val commitments1 = Commitments.discardUnsignedUpdates(d.commitments)
1686+
1687+
commitments1.remoteNextCommitInfo match {
1688+
case Left(_) =>
1689+
// we expect them to (re-)send the revocation immediately
1690+
startSingleTimer(RevocationTimeout.toString, RevocationTimeout(commitments1.remoteCommit.index, peer), nodeParams.revocationTimeout)
1691+
case _ => ()
1692+
}
1693+
1694+
// do I have something to sign?
1695+
if (Commitments.localHasChanges(commitments1)) {
1696+
self ! CMD_SIGN()
1697+
}
17031698

17041699
// BOLT 2: A node if it has sent a previous shutdown MUST retransmit shutdown.
17051700
d.localShutdown.foreach {
@@ -1756,11 +1751,15 @@ class Channel(val nodeParams: NodeParams, val wallet: OnChainChannelFunder, remo
17561751
case Event(c: CMD_ADD_HTLC, d: DATA_NORMAL) => handleAddDisconnected(c, d)
17571752

17581753
case Event(channelReestablish: ChannelReestablish, d: DATA_SHUTDOWN) =>
1759-
var sendQueue = Queue.empty[LightningMessage]
1760-
val (commitments1, sendQueue1) = handleSync(channelReestablish, d)
1761-
sendQueue = sendQueue ++ sendQueue1 :+ d.localShutdown
1762-
// BOLT 2: A node if it has sent a previous shutdown MUST retransmit shutdown.
1763-
goto(SHUTDOWN) using d.copy(commitments = commitments1) sending sendQueue
1754+
Syncing.checkSync(keyManager, d, channelReestablish) match {
1755+
case syncFailure: SyncResult.Failure =>
1756+
handleSyncFailure(channelReestablish, syncFailure, d)
1757+
case syncSuccess: SyncResult.Success =>
1758+
val commitments1 = Commitments.discardUnsignedUpdates(d.commitments)
1759+
val sendQueue = Queue.empty[LightningMessage] ++ syncSuccess.retransmit :+ d.localShutdown
1760+
// BOLT 2: A node if it has sent a previous shutdown MUST retransmit shutdown.
1761+
goto(SHUTDOWN) using d.copy(commitments = commitments1) sending sendQueue
1762+
}
17641763

17651764
case Event(_: ChannelReestablish, d: DATA_NEGOTIATING) =>
17661765
// BOLT 2: A node if it has sent a previous shutdown MUST retransmit shutdown.
@@ -2229,6 +2228,29 @@ class Channel(val nodeParams: NodeParams, val wallet: OnChainChannelFunder, remo
22292228
stay() using d.copy(channelUpdate = channelUpdate1) storing()
22302229
}
22312230

2231+
private def handleSyncFailure(channelReestablish: ChannelReestablish, syncFailure: SyncResult.Failure, d: HasCommitments) = {
2232+
syncFailure match {
2233+
case res: SyncResult.LocalLateProven =>
2234+
log.error(s"counterparty proved that we have an outdated (revoked) local commitment!!! ourLocalCommitmentNumber=${res.ourLocalCommitmentNumber} theirRemoteCommitmentNumber=${res.theirRemoteCommitmentNumber}")
2235+
// their data checks out, we indeed seem to be using an old revoked commitment, and must absolutely *NOT* publish it, because that would be a cheating attempt and they
2236+
// would punish us by taking all the funds in the channel
2237+
handleOutdatedCommitment(channelReestablish, d)
2238+
case res: Syncing.SyncResult.LocalLateUnproven =>
2239+
log.error(s"our local commitment is in sync, but counterparty says that they have a more recent remote commitment than the one we know of (they could be lying)!!! ourRemoteCommitmentNumber=${res.ourRemoteCommitmentNumber} theirCommitmentNumber=${res.theirLocalCommitmentNumber}")
2240+
// there is no way to make sure that they are saying the truth, the best thing to do is "call their bluff" and
2241+
// ask them to publish their commitment right now. If they weren't lying and they do publish their commitment,
2242+
// we need to remember their commitment point in order to be able to claim our outputs
2243+
handleOutdatedCommitment(channelReestablish, d)
2244+
case res: Syncing.SyncResult.RemoteLying =>
2245+
log.error(s"counterparty is lying about us having an outdated commitment!!! ourLocalCommitmentNumber=${res.ourLocalCommitmentNumber} theirRemoteCommitmentNumber=${res.theirRemoteCommitmentNumber}")
2246+
// they are deliberately trying to fool us into thinking we have a late commitment
2247+
handleLocalError(InvalidRevokedCommitProof(d.channelId, res.ourLocalCommitmentNumber, res.theirRemoteCommitmentNumber, res.invalidPerCommitmentSecret), d, Some(channelReestablish))
2248+
case SyncResult.RemoteLate =>
2249+
log.error("counterparty appears to be using an outdated commitment, they may request a force-close, standing by...")
2250+
stay()
2251+
}
2252+
}
2253+
22322254
private def maybeEmitChannelUpdateChangedEvent(newUpdate: ChannelUpdate, oldUpdate_opt: Option[ChannelUpdate], d: DATA_NORMAL): Unit = {
22332255
if (oldUpdate_opt.isEmpty || !Announcements.areSameIgnoreFlags(newUpdate, oldUpdate_opt.get)) {
22342256
context.system.eventStream.publish(ChannelUpdateParametersChanged(self, d.channelId, newUpdate.shortChannelId, d.commitments.remoteNodeId, newUpdate))
@@ -2267,13 +2289,16 @@ class Channel(val nodeParams: NodeParams, val wallet: OnChainChannelFunder, remo
22672289
private def handleLocalError(cause: Throwable, d: ChannelData, msg: Option[Any]) = {
22682290
cause match {
22692291
case _: ForcedLocalCommit => log.warning(s"force-closing channel at user request")
2270-
case _ if stateName == WAIT_FOR_OPEN_CHANNEL => log.warning(s"${cause.getMessage} while processing msg=${msg.getOrElse("n/a").getClass.getSimpleName} in state=$stateName")
2271-
case _ => log.error(s"${cause.getMessage} while processing msg=${msg.getOrElse("n/a").getClass.getSimpleName} in state=$stateName")
2272-
}
2273-
cause match {
2274-
case _: ChannelException => ()
2275-
case _ => log.error(cause, s"msg=${msg.getOrElse("n/a")} stateData=$stateData")
2292+
case _ if msg.exists(_.isInstanceOf[OpenChannel]) || msg.exists(_.isInstanceOf[AcceptChannel]) =>
2293+
// invalid remote channel parameters are logged as warning
2294+
log.warning(s"${cause.getMessage} while processing msg=${msg.getOrElse("n/a").getClass.getSimpleName} in state=$stateName")
2295+
case _: ChannelException =>
2296+
log.error(s"${cause.getMessage} while processing msg=${msg.getOrElse("n/a").getClass.getSimpleName} in state=$stateName")
2297+
case _ =>
2298+
// unhandled error: we dump the channel data, and print the stack trace
2299+
log.error(cause, s"msg=${msg.getOrElse("n/a")} stateData=$stateData:")
22762300
}
2301+
22772302
val error = Error(d.channelId, cause.getMessage)
22782303
context.system.eventStream.publish(ChannelErrorOccurred(self, stateData.channelId, remoteNodeId, stateData, LocalError(cause), isFatal = true))
22792304

@@ -2535,79 +2560,10 @@ class Channel(val nodeParams: NodeParams, val wallet: OnChainChannelFunder, remo
25352560
goto(ERR_INFORMATION_LEAK) calling doPublish(localCommitPublished, d.commitments) sending error
25362561
}
25372562

2538-
private def handleSync(channelReestablish: ChannelReestablish, d: HasCommitments): (Commitments, Queue[LightningMessage]) = {
2539-
var sendQueue = Queue.empty[LightningMessage]
2540-
// first we clean up unacknowledged updates
2541-
log.debug("discarding proposed OUT: {}", d.commitments.localChanges.proposed.map(Commitments.msg2String(_)).mkString(","))
2542-
log.debug("discarding proposed IN: {}", d.commitments.remoteChanges.proposed.map(Commitments.msg2String(_)).mkString(","))
2543-
val commitments1 = d.commitments.copy(
2544-
localChanges = d.commitments.localChanges.copy(proposed = Nil),
2545-
remoteChanges = d.commitments.remoteChanges.copy(proposed = Nil),
2546-
localNextHtlcId = d.commitments.localNextHtlcId - d.commitments.localChanges.proposed.collect { case u: UpdateAddHtlc => u }.size,
2547-
remoteNextHtlcId = d.commitments.remoteNextHtlcId - d.commitments.remoteChanges.proposed.collect { case u: UpdateAddHtlc => u }.size)
2548-
log.debug(s"localNextHtlcId=${d.commitments.localNextHtlcId}->${commitments1.localNextHtlcId}")
2549-
log.debug(s"remoteNextHtlcId=${d.commitments.remoteNextHtlcId}->${commitments1.remoteNextHtlcId}")
2550-
2551-
def resendRevocation(): Unit = {
2552-
// let's see the state of remote sigs
2553-
if (commitments1.localCommit.index == channelReestablish.nextRemoteRevocationNumber) {
2554-
// nothing to do
2555-
} else if (commitments1.localCommit.index == channelReestablish.nextRemoteRevocationNumber + 1) {
2556-
// our last revocation got lost, let's resend it
2557-
log.debug("re-sending last revocation")
2558-
val channelKeyPath = keyManager.keyPath(d.commitments.localParams, d.commitments.channelConfig)
2559-
val localPerCommitmentSecret = keyManager.commitmentSecret(channelKeyPath, d.commitments.localCommit.index - 1)
2560-
val localNextPerCommitmentPoint = keyManager.commitmentPoint(channelKeyPath, d.commitments.localCommit.index + 1)
2561-
val revocation = RevokeAndAck(
2562-
channelId = commitments1.channelId,
2563-
perCommitmentSecret = localPerCommitmentSecret,
2564-
nextPerCommitmentPoint = localNextPerCommitmentPoint
2565-
)
2566-
sendQueue = sendQueue :+ revocation
2567-
} else throw RevocationSyncError(d.channelId)
2568-
}
2569-
2570-
// re-sending sig/rev (in the right order)
2571-
commitments1.remoteNextCommitInfo match {
2572-
case Left(waitingForRevocation) if waitingForRevocation.nextRemoteCommit.index + 1 == channelReestablish.nextLocalCommitmentNumber =>
2573-
// we had sent a new sig and were waiting for their revocation
2574-
// they had received the new sig but their revocation was lost during the disconnection
2575-
// they will send us the revocation, nothing to do here
2576-
log.debug("waiting for them to re-send their last revocation")
2577-
resendRevocation()
2578-
case Left(waitingForRevocation) if waitingForRevocation.nextRemoteCommit.index == channelReestablish.nextLocalCommitmentNumber =>
2579-
// we had sent a new sig and were waiting for their revocation
2580-
// they didn't receive the new sig because of the disconnection
2581-
// we just resend the same updates and the same sig
2582-
2583-
val revWasSentLast = commitments1.localCommit.index > waitingForRevocation.sentAfterLocalCommitIndex
2584-
if (!revWasSentLast) resendRevocation()
2585-
2586-
log.debug("re-sending previously local signed changes: {}", commitments1.localChanges.signed.map(Commitments.msg2String(_)).mkString(","))
2587-
commitments1.localChanges.signed.foreach(revocation => sendQueue = sendQueue :+ revocation)
2588-
log.debug("re-sending the exact same previous sig")
2589-
sendQueue = sendQueue :+ waitingForRevocation.sent
2590-
2591-
if (revWasSentLast) resendRevocation()
2592-
case Right(_) if commitments1.remoteCommit.index + 1 == channelReestablish.nextLocalCommitmentNumber =>
2593-
// there wasn't any sig in-flight when the disconnection occurred
2594-
resendRevocation()
2595-
case _ => throw CommitmentSyncError(d.channelId)
2596-
}
2597-
2598-
commitments1.remoteNextCommitInfo match {
2599-
case Left(_) =>
2600-
// we expect them to (re-)send the revocation immediately
2601-
startSingleTimer(RevocationTimeout.toString, RevocationTimeout(commitments1.remoteCommit.index, peer), nodeParams.revocationTimeout)
2602-
case _ => ()
2603-
}
2604-
2605-
// have I something to sign?
2606-
if (Commitments.localHasChanges(commitments1)) {
2607-
self ! CMD_SIGN()
2608-
}
2609-
2610-
(commitments1, sendQueue)
2563+
private def handleOutdatedCommitment(channelReestablish: ChannelReestablish, d: HasCommitments) = {
2564+
val exc = PleasePublishYourCommitment(d.channelId)
2565+
val error = Error(d.channelId, exc.getMessage)
2566+
goto(WAIT_FOR_REMOTE_PUBLISH_FUTURE_COMMITMENT) using DATA_WAIT_FOR_REMOTE_PUBLISH_FUTURE_COMMITMENT(d.commitments, channelReestablish) storing() sending error
26112567
}
26122568

26132569
/**
@@ -2696,7 +2652,7 @@ class Channel(val nodeParams: NodeParams, val wallet: OnChainChannelFunder, remo
26962652
}
26972653

26982654
// we let the peer decide what to do
2699-
override val supervisorStrategy = OneForOneStrategy(loggingEnabled = true) { case _ => SupervisorStrategy.Escalate }
2655+
override val supervisorStrategy: OneForOneStrategy = OneForOneStrategy(loggingEnabled = true) { case _ => SupervisorStrategy.Escalate }
27002656

27012657
override def aroundReceive(receive: Actor.Receive, msg: Any): Unit = {
27022658
KamonExt.time(ProcessMessage.withTag("MessageType", msg.getClass.getSimpleName)) {

eclair-core/src/main/scala/fr/acinq/eclair/channel/ChannelExceptions.scala

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,9 +91,7 @@ case class CannotSignWithoutChanges (override val channelId: Byte
9191
case class CannotSignBeforeRevocation (override val channelId: ByteVector32) extends ChannelException(channelId, "cannot sign until next revocation hash is received")
9292
case class UnexpectedRevocation (override val channelId: ByteVector32) extends ChannelException(channelId, "received unexpected RevokeAndAck message")
9393
case class InvalidRevocation (override val channelId: ByteVector32) extends ChannelException(channelId, "invalid revocation")
94-
case class InvalidRevokedCommitProof (override val channelId: ByteVector32, ourCommitmentNumber: Long, theirCommitmentNumber: Long, perCommitmentSecret: PrivateKey) extends ChannelException(channelId, s"counterparty claimed that we have a revoked commit but their proof doesn't check out: ourCommitmentNumber=$ourCommitmentNumber theirCommitmentNumber=$theirCommitmentNumber perCommitmentSecret=$perCommitmentSecret")
95-
case class CommitmentSyncError (override val channelId: ByteVector32) extends ChannelException(channelId, "commitment sync error")
96-
case class RevocationSyncError (override val channelId: ByteVector32) extends ChannelException(channelId, "revocation sync error")
94+
case class InvalidRevokedCommitProof (override val channelId: ByteVector32, ourLocalCommitmentNumber: Long, theirRemoteCommitmentNumber: Long, invalidPerCommitmentSecret: PrivateKey) extends ChannelException(channelId, s"counterparty claimed that we have a revoked commit but their proof doesn't check out: ourCommitmentNumber=$ourLocalCommitmentNumber theirCommitmentNumber=$theirRemoteCommitmentNumber perCommitmentSecret=$invalidPerCommitmentSecret")
9795
case class InvalidFailureCode (override val channelId: ByteVector32) extends ChannelException(channelId, "UpdateFailMalformedHtlc message doesn't have BADONION bit set")
9896
case class PleasePublishYourCommitment (override val channelId: ByteVector32) extends ChannelException(channelId, "please publish your local commitment")
9997
case class CommandUnavailableInThisState (override val channelId: ByteVector32, command: String, state: ChannelState) extends ChannelException(channelId, s"cannot execute command=$command in state=$state")

0 commit comments

Comments
 (0)