Skip to content

Commit 3502b0d

Browse files
committed
Error handling for not enough majority nodes to sign
1 parent f87446c commit 3502b0d

4 files changed

Lines changed: 92 additions & 23 deletions

File tree

cmd/mpcium/main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ func runNode(ctx context.Context, c *cli.Command) error {
194194
timeoutConsumer.Run()
195195
defer timeoutConsumer.Close()
196196
keygenConsumer := eventconsumer.NewKeygenConsumer(natsConn, keygenBroker, pubsub, peerRegistry)
197-
signingConsumer := eventconsumer.NewSigningConsumer(natsConn, signingBroker, pubsub, peerRegistry)
197+
signingConsumer := eventconsumer.NewSigningConsumer(natsConn, signingBroker, pubsub, peerRegistry, singingResultQueue)
198198

199199
// Make the node ready before starting the signing consumer
200200
if err := peerRegistry.Ready(); err != nil {

pkg/event/types.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ const (
9191
// Context and cancellation errors
9292
ErrorCodeContextCancelled ErrorCode = "ERROR_CONTEXT_CANCELLED"
9393
ErrorCodeOperationAborted ErrorCode = "ERROR_OPERATION_ABORTED"
94+
ErrorCodeNotMajority ErrorCode = "ERROR_NOT_MAJORITY"
9495
)
9596

9697
// GetErrorCodeFromError attempts to categorize a generic error into a specific error code

pkg/eventconsumer/sign_consumer.go

Lines changed: 71 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,15 @@ package eventconsumer
22

33
import (
44
"context"
5+
"encoding/json"
56
"fmt"
67
"time"
78

89
"github.com/fystack/mpcium/pkg/event"
910
"github.com/fystack/mpcium/pkg/logger"
1011
"github.com/fystack/mpcium/pkg/messaging"
1112
"github.com/fystack/mpcium/pkg/mpc"
13+
"github.com/fystack/mpcium/pkg/types"
1214
"github.com/google/uuid"
1315
"github.com/nats-io/nats.go"
1416
"github.com/nats-io/nats.go/jetstream"
@@ -34,25 +36,27 @@ type SigningConsumer interface {
3436

3537
// signingConsumer implements SigningConsumer.
3638
type signingConsumer struct {
37-
natsConn *nats.Conn
38-
pubsub messaging.PubSub
39-
jsBroker messaging.MessageBroker
40-
peerRegistry mpc.PeerRegistry
41-
mpcThreshold int
39+
natsConn *nats.Conn
40+
pubsub messaging.PubSub
41+
jsBroker messaging.MessageBroker
42+
peerRegistry mpc.PeerRegistry
43+
mpcThreshold int
44+
signingResultQueue messaging.MessageQueue
4245

4346
// jsSub holds the JetStream subscription, so it can be cleaned up during Close().
4447
jsSub messaging.Subscription
4548
}
4649

4750
// NewSigningConsumer returns a new instance of SigningConsumer.
48-
func NewSigningConsumer(natsConn *nats.Conn, jsBroker messaging.MessageBroker, pubsub messaging.PubSub, peerRegistry mpc.PeerRegistry) SigningConsumer {
51+
func NewSigningConsumer(natsConn *nats.Conn, jsBroker messaging.MessageBroker, pubsub messaging.PubSub, peerRegistry mpc.PeerRegistry, signingResultQueue messaging.MessageQueue) SigningConsumer {
4952
mpcThreshold := viper.GetInt("mpc_threshold")
5053
return &signingConsumer{
51-
natsConn: natsConn,
52-
pubsub: pubsub,
53-
jsBroker: jsBroker,
54-
peerRegistry: peerRegistry,
55-
mpcThreshold: mpcThreshold,
54+
natsConn: natsConn,
55+
pubsub: pubsub,
56+
jsBroker: jsBroker,
57+
peerRegistry: peerRegistry,
58+
mpcThreshold: mpcThreshold,
59+
signingResultQueue: signingResultQueue,
5660
}
5761
}
5862

@@ -136,18 +140,25 @@ func (sc *signingConsumer) Run(ctx context.Context) error {
136140
// When signing completes, the session publishes the result to a queue and calls the onSuccess callback, which sends a reply to the inbox that the SigningConsumer is monitoring.
137141
// The reply signals completion, allowing the SigningConsumer to acknowledge the original message.
138142
func (sc *signingConsumer) handleSigningEvent(msg jetstream.Msg) {
139-
// Check if we still have enough peers before processing the message
140-
requiredPeers := int64(sc.mpcThreshold + 1)
141-
readyPeers := sc.peerRegistry.GetReadyPeersCount()
142-
143-
if readyPeers < requiredPeers {
144-
logger.Warn("SigningConsumer: Not enough peers to process signing request, rejecting message",
145-
"ready", readyPeers,
146-
"required", requiredPeers)
147-
// Immediately return and let nats redeliver the message with backoff
143+
// Parse the signing request message to extract transaction details
144+
raw := msg.Data()
145+
var signingMsg types.SignTxMessage
146+
sessionID := msg.Headers().Get("SessionID")
147+
148+
err := json.Unmarshal(raw, &signingMsg)
149+
if err != nil {
150+
logger.Error("SigningConsumer: Failed to unmarshal signing message", err)
151+
sc.handleSigningError(signingMsg, event.ErrorCodeUnmarshalFailure, err, sessionID)
152+
_ = msg.Nak()
148153
return
149154
}
150155

156+
if !sc.peerRegistry.AreMajorityReady() {
157+
requiredPeers := int64(sc.mpcThreshold + 1)
158+
err := fmt.Errorf("not enough peers to process signing request: ready=%d, required=%d", sc.peerRegistry.GetReadyPeersCount(), requiredPeers)
159+
sc.handleSigningError(signingMsg, event.ErrorCodeNotMajority, err, sessionID)
160+
return
161+
}
151162
// Create a reply inbox to receive the signing event response.
152163
replyInbox := nats.NewInbox()
153164

@@ -199,6 +210,36 @@ func (sc *signingConsumer) handleSigningEvent(msg jetstream.Msg) {
199210
_ = msg.Nak()
200211
}
201212

213+
func (sc *signingConsumer) handleSigningError(signMsg types.SignTxMessage, errorCode event.ErrorCode, err error, sessionID string) {
214+
signingResult := event.SigningResultEvent{
215+
ResultType: event.ResultTypeError,
216+
ErrorCode: errorCode,
217+
NetworkInternalCode: signMsg.NetworkInternalCode,
218+
WalletID: signMsg.WalletID,
219+
TxID: signMsg.TxID,
220+
ErrorReason: err.Error(),
221+
}
222+
223+
signingResultBytes, err := json.Marshal(signingResult)
224+
if err != nil {
225+
logger.Error("Failed to marshal signing result event", err,
226+
"walletID", signMsg.WalletID,
227+
"txID", signMsg.TxID,
228+
)
229+
return
230+
}
231+
232+
err = sc.signingResultQueue.Enqueue(event.SigningResultCompleteTopic, signingResultBytes, &messaging.EnqueueOptions{
233+
IdempotententKey: buildSigningIdempotentKey(signMsg.TxID, sessionID, mpc.TypeSigningResultFmt),
234+
})
235+
if err != nil {
236+
logger.Error("Failed to enqueue signing result event", err,
237+
"walletID", signMsg.WalletID,
238+
"txID", signMsg.TxID,
239+
)
240+
}
241+
}
242+
202243
// Close unsubscribes from the JetStream subject and cleans up resources.
203244
func (sc *signingConsumer) Close() error {
204245
if sc.jsSub != nil {
@@ -210,3 +251,13 @@ func (sc *signingConsumer) Close() error {
210251
}
211252
return nil
212253
}
254+
255+
func buildSigningIdempotentKey(baseID string, sessionID string, formatTemplate string) string {
256+
var uniqueKey string
257+
if sessionID != "" {
258+
uniqueKey = fmt.Sprintf("%s:%s", baseID, sessionID)
259+
} else {
260+
uniqueKey = baseID
261+
}
262+
return fmt.Sprintf(formatTemplate, uniqueKey)
263+
}

pkg/mpc/registry.go

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"github.com/fystack/mpcium/pkg/messaging"
1515
"github.com/hashicorp/consul/api"
1616
"github.com/samber/lo"
17+
"github.com/spf13/viper"
1718
)
1819

1920
const (
@@ -23,6 +24,7 @@ const (
2324
type PeerRegistry interface {
2425
Ready() error
2526
ArePeersReady() bool
27+
AreMajorityReady() bool
2628
WatchPeersReady()
2729
// Resign is called by the node when it is going to shutdown
2830
Resign() error
@@ -49,6 +51,7 @@ type registry struct {
4951
pubSub messaging.PubSub
5052
identityStore identity.Store
5153
ecdhSession ECDHSession
54+
mpcThreshold int
5255

5356
onPeerConnected func(peerID string)
5457
onPeerDisconnected func(peerID string)
@@ -64,6 +67,10 @@ func NewRegistry(
6467
identityStore identity.Store,
6568
) *registry {
6669
ecdhSession := NewECDHSession(nodeID, peerNodeIDs, pubSub, identityStore)
70+
mpcThreshold := viper.GetInt("mpc_threshold")
71+
if mpcThreshold <= 2 {
72+
logger.Fatal("mpc_threshold must be greater than 2", nil)
73+
}
6774

6875
return &registry{
6976
consulKV: consulKV,
@@ -75,6 +82,7 @@ func NewRegistry(
7582
pubSub: pubSub,
7683
identityStore: identityStore,
7784
ecdhSession: ecdhSession,
85+
mpcThreshold: mpcThreshold,
7886
}
7987
}
8088

@@ -286,6 +294,15 @@ func (r *registry) ArePeersReady() bool {
286294
return r.ready && r.isECDHReady()
287295
}
288296

297+
// AreMajorityReady checks if a majority of peers are ready.
298+
// Returns true only if:
299+
// 1. The number of ready peers (including self) is greater than mpcThreshold+1
300+
// 2. Symmetric keys are fully established among all ready peers (excluding self).
301+
func (r *registry) AreMajorityReady() bool {
302+
readyCount := r.GetReadyPeersCount()
303+
return int(readyCount) >= r.mpcThreshold+1 && r.isECDHReady()
304+
}
305+
289306
func (r *registry) GetTotalPeersCount() int64 {
290307
var self int64 = 1
291308
return int64(len(r.peerNodeIDs)) + self
@@ -332,8 +349,8 @@ func (r *registry) GetReadyPeersCountExcludeSelf() int64 {
332349
}
333350

334351
func (r *registry) isECDHReady() bool {
335-
requiredKeyCount := int(r.GetReadyPeersCount()) - 1
336-
return r.identityStore.CheckSymmetricKeyComplete(requiredKeyCount)
352+
requiredKeyCount := r.GetReadyPeersCountExcludeSelf()
353+
return r.identityStore.CheckSymmetricKeyComplete(int(requiredKeyCount))
337354
}
338355

339356
func (r *registry) composeHealthCheckTopic(nodeID string) string {

0 commit comments

Comments
 (0)