diff --git a/pkg/tbtc/signing.go b/pkg/tbtc/signing.go index c609afe73e..1c5e9437a2 100644 --- a/pkg/tbtc/signing.go +++ b/pkg/tbtc/signing.go @@ -373,6 +373,7 @@ func (se *signingExecutor) signWithTaprootMerkleRoot( doneCheck := newSigningDoneCheck( se.groupParameters.GroupSize, + se.groupParameters.HonestThreshold, se.broadcastChannel, se.membershipValidator, ) diff --git a/pkg/tbtc/signing_done.go b/pkg/tbtc/signing_done.go index f14426d87f..7b3b717054 100644 --- a/pkg/tbtc/signing_done.go +++ b/pkg/tbtc/signing_done.go @@ -47,23 +47,42 @@ func (sdm *signingDoneMessage) Type() string { // successful signature calculation across all signing group members. type signingDoneCheck struct { groupSize int + honestThreshold int broadcastChannel net.BroadcastChannel membershipValidator *group.MembershipValidator - receiveCtx context.Context - cancelReceiveCtx context.CancelFunc - expectedSignersCount int - doneSigners map[group.MemberIndex]*signingDoneMessage - doneSignersMutex sync.RWMutex + receiveCtx context.Context + cancelReceiveCtx context.CancelFunc + // attemptMemberCount is len(attemptMembersIndexes) for the live attempt - the + // number of confirmations the legacy (non-oversized) rule waits for. + attemptMemberCount int + // oversized is true when the attempt's included set is larger than the honest + // threshold: the RFC-21 Phase 7.3 t-of-included case, where the attempt is + // signed by a t-subset so the non-subset / offline members never report done + // and the legacy all-members rule would hang. It selects the + // quorum-by-signature completion rule. When false (included == threshold, the + // pre-oversizing selector output and the whole coarse path) the legacy rule is + // used UNCHANGED. + oversized bool + // attemptTimeoutBlock is the deterministic block the attempt concludes by + // (announcementEndBlock + signingAttemptMaximumProtocolBlocks). On the oversized + // path it is returned as the result end block instead of a network-order- + // dependent max over done messages, so every honest node feeds signBatch the + // same next-signature start block (signingStartBlock = prev endBlock + interlude). + attemptTimeoutBlock uint64 + doneSigners map[group.MemberIndex]*signingDoneMessage + doneSignersMutex sync.RWMutex } func newSigningDoneCheck( groupSize int, + honestThreshold int, broadcastChannel net.BroadcastChannel, membershipValidator *group.MembershipValidator, ) *signingDoneCheck { return &signingDoneCheck{ groupSize: groupSize, + honestThreshold: honestThreshold, broadcastChannel: broadcastChannel, membershipValidator: membershipValidator, } @@ -91,7 +110,17 @@ func (sdc *signingDoneCheck) listen( sdc.receiveCtx, sdc.cancelReceiveCtx = context.WithCancel(ctx) sdc.doneSignersMutex.Lock() - sdc.expectedSignersCount = len(attemptMembersIndexes) + sdc.attemptMemberCount = len(attemptMembersIndexes) + // An included set larger than the honest threshold is the RFC-21 Phase 7.3 + // t-of-included case: the attempt is signed by a t-subset and the non-subset / + // offline members never report done, so the all-members rule would hang. The + // oversized path concludes on a quorum of >= honestThreshold matching + // signatures instead, with a deterministic end block. included == threshold + // (today's selector output and the whole coarse path) keeps the legacy rule + // byte-for-byte, so behavior is unchanged until participant selection oversizes + // the set. + sdc.oversized = len(attemptMembersIndexes) > sdc.honestThreshold + sdc.attemptTimeoutBlock = attemptTimeoutBlock sdc.doneSigners = make(map[group.MemberIndex]*signingDoneMessage) sdc.doneSignersMutex.Unlock() @@ -149,12 +178,12 @@ func (sdc *signingDoneCheck) signalDone( }, net.BackoffRetransmissionStrategy) } -// waitUntilAllDone blocks until it receives all the required done checks from -// members or until the passed context is done. In the first case, it returns -// the signature computed by the signing members and the block at which the -// slowest signer completed the signature computation process. If the expected -// done checks are not received on time, the function returns an error. If at -// least one signature is different from others, the function returns an error. +// waitUntilAllDone blocks until the attempt's completion rule is met or the +// passed context is done. On success it returns the agreed signature and a +// deterministic end block (the same value on every honest node): on the legacy +// path the block at which the slowest attempt member completed, on the oversized +// path the attempt timeout block. It returns errWaitDoneTimedOut if the rule is +// not met on time, and a non-nil error on a fatal divergence (legacy path only). func (sdc *signingDoneCheck) waitUntilAllDone(ctx context.Context) ( *signing.Result, uint64, @@ -171,35 +200,123 @@ func (sdc *signingDoneCheck) waitUntilAllDone(ctx context.Context) ( return nil, 0, errWaitDoneTimedOut case <-ticker.C: - expectedSignersCount, doneSigners := sdc.snapshotDoneSigners() - if expectedSignersCount == len(doneSigners) { - var signature *frost.Signature - var latestEndBlock uint64 - - for _, doneMessage := range doneSigners { - if signature == nil { - signature = doneMessage.signature - } else { - if !signature.Equals(doneMessage.signature) { - return nil, 0, fmt.Errorf( - "not matching signatures detected: [%v] and [%v]", - signature, - doneMessage.signature, - ) - } - } - - if doneMessage.endBlock > latestEndBlock { - latestEndBlock = doneMessage.endBlock - } - } - - return &signing.Result{Signature: signature}, latestEndBlock, nil + result, endBlock, concluded, err := sdc.evaluateDone() + if err != nil { + return nil, 0, err + } + if concluded { + return result, endBlock, nil } } } } +// evaluateDone snapshots the done checks collected so far and applies the +// attempt's completion rule. It returns (result, endBlock, true, nil) once the +// attempt can conclude, (nil, 0, false, nil) while still waiting, or +// (nil, 0, false, err) on a fatal divergence. The legacy (non-oversized) rule +// and the oversized t-of-included rule are kept fully separate so the legacy / +// coarse path is byte-for-byte unchanged. +func (sdc *signingDoneCheck) evaluateDone() (*signing.Result, uint64, bool, error) { + sdc.doneSignersMutex.RLock() + oversized := sdc.oversized + attemptMemberCount := sdc.attemptMemberCount + attemptTimeoutBlock := sdc.attemptTimeoutBlock + honestThreshold := sdc.honestThreshold + doneSigners := make([]*signingDoneMessage, 0, len(sdc.doneSigners)) + for _, doneMessage := range sdc.doneSigners { + doneSigners = append(doneSigners, doneMessage.clone()) + } + sdc.doneSignersMutex.RUnlock() + + if oversized { + return concludeOversizedDone(doneSigners, honestThreshold, attemptTimeoutBlock) + } + return concludeLegacyDone(doneSigners, attemptMemberCount) +} + +// concludeLegacyDone is the pre-7.3 rule, UNCHANGED: conclude once every attempt +// member confirmed, require all signatures equal, and return the max end block. +// The attemptMemberCount > 0 guard rejects the pre-listen state (no attempt +// configured) so an empty done set is never read as success. +func concludeLegacyDone( + doneSigners []*signingDoneMessage, + attemptMemberCount int, +) (*signing.Result, uint64, bool, error) { + if attemptMemberCount == 0 || len(doneSigners) != attemptMemberCount { + return nil, 0, false, nil + } + + var signature *frost.Signature + var latestEndBlock uint64 + for _, doneMessage := range doneSigners { + if signature == nil { + signature = doneMessage.signature + } else if !signature.Equals(doneMessage.signature) { + return nil, 0, false, fmt.Errorf( + "not matching signatures detected: [%v] and [%v]", + signature, + doneMessage.signature, + ) + } + + if doneMessage.endBlock > latestEndBlock { + latestEndBlock = doneMessage.endBlock + } + } + + return &signing.Result{Signature: signature}, latestEndBlock, true, nil +} + +// concludeOversizedDone is the RFC-21 Phase 7.3 t-of-included rule: bucket the +// done checks by signature (one done message per sender, so each member is in +// exactly one bucket) and conclude once a bucket holds >= honestThreshold +// distinct senders - the minimum that proves a valid threshold signature. +// Minority buckets (divergent or adversarial signatures) are IGNORED, never +// fatal, so a single bad done message cannot fracture the group. The end block +// is the deterministic attempt timeout block, not a network-order-dependent max, +// so every honest node returns the same value for batch scheduling. +func concludeOversizedDone( + doneSigners []*signingDoneMessage, + honestThreshold int, + attemptTimeoutBlock uint64, +) (*signing.Result, uint64, bool, error) { + if honestThreshold <= 0 { + return nil, 0, false, nil + } + + bucketSig := map[string]*frost.Signature{} + bucketCount := map[string]int{} + for _, doneMessage := range doneSigners { + serialized := doneMessage.signature.Serialize() + key := string(serialized[:]) + if _, ok := bucketSig[key]; !ok { + bucketSig[key] = doneMessage.signature + } + bucketCount[key]++ + } + + var quorumSig *frost.Signature + quorums := 0 + for key, count := range bucketCount { + if count >= honestThreshold { + quorums++ + quorumSig = bucketSig[key] + } + } + + // Exactly one >= t bucket is the only reachable outcome under honest majority + // (honestThreshold > groupSize/2 means two disjoint >= t buckets cannot + // coexist). It carries the one valid signature and concludes deterministically. + // quorums == 0 keeps waiting; quorums > 1 is unreachable and intentionally NOT + // concluded (the attempt fails via the ctx timeout rather than picking a bucket + // nondeterministically - a bare done-message split is not coordinator blame). + if quorums == 1 { + return &signing.Result{Signature: quorumSig}, attemptTimeoutBlock, true, nil + } + return nil, 0, false, nil +} + // isValidDoneMessage validates the given signingDoneMessage in the context // of the given signing attempt. func (sdc *signingDoneCheck) isValidDoneMessage( @@ -250,21 +367,6 @@ func (sdc *signingDoneCheck) recordDoneMessage( return true } -func (sdc *signingDoneCheck) snapshotDoneSigners() ( - int, - []*signingDoneMessage, -) { - sdc.doneSignersMutex.RLock() - defer sdc.doneSignersMutex.RUnlock() - - result := make([]*signingDoneMessage, 0, len(sdc.doneSigners)) - for _, doneMessage := range sdc.doneSigners { - result = append(result, doneMessage.clone()) - } - - return sdc.expectedSignersCount, result -} - func (sdm *signingDoneMessage) clone() *signingDoneMessage { if sdm == nil { return nil diff --git a/pkg/tbtc/signing_done_test.go b/pkg/tbtc/signing_done_test.go index 911cf9c69d..6a08dece7c 100644 --- a/pkg/tbtc/signing_done_test.go +++ b/pkg/tbtc/signing_done_test.go @@ -281,16 +281,198 @@ func TestSigningDoneCheck_AnotherSignature(t *testing.T) { } } +// TestSigningDoneCheck_ThresholdSubsetConcludes covers RFC-21 Phase 7.3 +// t-of-included finalize: the included set is oversized (larger than the honest +// threshold) and only the t-subset that actually signed reports done - the rest +// are offline. The check must conclude on a quorum of t matching signatures +// rather than hanging for the absent members, and must return the DETERMINISTIC +// attempt timeout block as the end block (not a network-order-dependent max over +// the done messages, which would desync batch scheduling across nodes). +func TestSigningDoneCheck_ThresholdSubsetConcludes(t *testing.T) { + groupParameters := &GroupParameters{ + GroupSize: 5, + GroupQuorum: 4, + HonestThreshold: 3, + } + + doneCheck := setupSigningDoneCheck(t, groupParameters) + + memberIndexes := make([]group.MemberIndex, doneCheck.groupSize) + for i := range memberIndexes { + memberIndexes[i] = group.MemberIndex(i + 1) + } + + ctx, cancelCtx := context.WithTimeout(context.Background(), 5*time.Second) + defer cancelCtx() + + message := big.NewInt(100) + attemptNumber := uint64(1) + attemptTimeoutBlock := uint64(1000) + // Oversized included set: ALL five members are included, but only the first + // three (the chosen signing subset) report done; members 4 and 5 are offline. + attemptMemberIndexes := memberIndexes + result := &signing.Result{ + Signature: mustFrostSignatureFromBigInts(big.NewInt(200), big.NewInt(300)), + } + + doneCheck.listen( + ctx, + message, + attemptNumber, + attemptTimeoutBlock, + attemptMemberIndexes, + ) + + // The three reporters carry DIFFERENT end blocks (501, 502, 503); the result + // must still be the deterministic attempt timeout block, proving the returned + // end block does not depend on which/how-many done messages arrived. + for i := 1; i <= groupParameters.HonestThreshold; i++ { + err := doneCheck.signalDone( + ctx, + uint8(i), + message, + attemptNumber, + result, + 500+uint64(i), + ) + if err != nil { + t.Fatal(err) + } + } + + returnedResult, endBlock, err := doneCheck.waitUntilAllDone(ctx) + if err != nil { + t.Fatalf("expected conclusion on the t-subset, got error: [%v]", err) + } + if returnedResult == nil || !result.Signature.Equals(returnedResult.Signature) { + t.Fatalf("unexpected result: [%v]", returnedResult) + } + testutils.AssertIntsEqual(t, "end block", int(attemptTimeoutBlock), int(endBlock)) +} + +// TestSigningDoneCheck_OversizedIgnoresMinorityDivergentSignature proves the +// oversized rule is robust to a minority adversary: a single done message +// carrying a DIFFERENT signature must not fail the check or fracture the group. +// t members report the correct signature (a quorum) while one reports a +// divergent one; the check concludes on the quorum and ignores the minority, +// rather than erroring the way the legacy all-members match-all rule would. +func TestSigningDoneCheck_OversizedIgnoresMinorityDivergentSignature(t *testing.T) { + groupParameters := &GroupParameters{ + GroupSize: 5, + GroupQuorum: 4, + HonestThreshold: 3, + } + + doneCheck := setupSigningDoneCheck(t, groupParameters) + + memberIndexes := make([]group.MemberIndex, doneCheck.groupSize) + for i := range memberIndexes { + memberIndexes[i] = group.MemberIndex(i + 1) + } + + ctx, cancelCtx := context.WithTimeout(context.Background(), 5*time.Second) + defer cancelCtx() + + message := big.NewInt(100) + attemptNumber := uint64(1) + attemptTimeoutBlock := uint64(1000) + attemptMemberIndexes := memberIndexes // oversized (5 > threshold 3) + correctResult := &signing.Result{ + Signature: mustFrostSignatureFromBigInts(big.NewInt(200), big.NewInt(300)), + } + divergentResult := &signing.Result{ + Signature: mustFrostSignatureFromBigInts(big.NewInt(201), big.NewInt(300)), + } + + doneCheck.listen(ctx, message, attemptNumber, attemptTimeoutBlock, attemptMemberIndexes) + + // Members 1..3 report the correct signature (a quorum); member 4 reports a + // divergent one. + for i := 1; i <= groupParameters.HonestThreshold; i++ { + if err := doneCheck.signalDone(ctx, uint8(i), message, attemptNumber, correctResult, 100); err != nil { + t.Fatal(err) + } + } + if err := doneCheck.signalDone(ctx, uint8(4), message, attemptNumber, divergentResult, 100); err != nil { + t.Fatal(err) + } + + returnedResult, endBlock, err := doneCheck.waitUntilAllDone(ctx) + if err != nil { + t.Fatalf("a minority divergent signature must be ignored, not fatal; got error: [%v]", err) + } + if returnedResult == nil || !correctResult.Signature.Equals(returnedResult.Signature) { + t.Fatalf("expected the quorum signature, got: [%v]", returnedResult) + } + testutils.AssertIntsEqual(t, "end block", int(attemptTimeoutBlock), int(endBlock)) +} + +// TestSigningDoneCheck_OversizedSplitBelowQuorumTimesOut covers the no-quorum +// case (e.g. a coordinator equivocation splitting honest nodes across two +// signatures, plus offline members): with the reporters split below the +// threshold in every signature bucket, no bucket reaches t, so the check must +// time out rather than conclude on a non-quorum subset. +func TestSigningDoneCheck_OversizedSplitBelowQuorumTimesOut(t *testing.T) { + groupParameters := &GroupParameters{ + GroupSize: 5, + GroupQuorum: 4, + HonestThreshold: 3, + } + + doneCheck := setupSigningDoneCheck(t, groupParameters) + + memberIndexes := make([]group.MemberIndex, doneCheck.groupSize) + for i := range memberIndexes { + memberIndexes[i] = group.MemberIndex(i + 1) + } + + ctx, cancelCtx := context.WithTimeout(context.Background(), 1*time.Second) + defer cancelCtx() + + message := big.NewInt(100) + attemptNumber := uint64(1) + attemptTimeoutBlock := uint64(1000) + attemptMemberIndexes := memberIndexes // oversized (5 > threshold 3) + resultA := &signing.Result{ + Signature: mustFrostSignatureFromBigInts(big.NewInt(200), big.NewInt(300)), + } + resultB := &signing.Result{ + Signature: mustFrostSignatureFromBigInts(big.NewInt(201), big.NewInt(300)), + } + + doneCheck.listen(ctx, message, attemptNumber, attemptTimeoutBlock, attemptMemberIndexes) + + // 2 report signature A, 2 report signature B, 1 offline: no bucket reaches t=3. + for _, i := range []uint8{1, 2} { + if err := doneCheck.signalDone(ctx, i, message, attemptNumber, resultA, 100); err != nil { + t.Fatal(err) + } + } + for _, i := range []uint8{3, 4} { + if err := doneCheck.signalDone(ctx, i, message, attemptNumber, resultB, 100); err != nil { + t.Fatal(err) + } + } + + returnedResult, endBlock, err := doneCheck.waitUntilAllDone(ctx) + if returnedResult != nil { + t.Errorf("expected nil result on a below-quorum split, has [%v]", returnedResult) + } + testutils.AssertIntsEqual(t, "end block", 0, int(endBlock)) + testutils.AssertErrorsSame(t, errWaitDoneTimedOut, err) +} + // signingDoneCheckComponents holds the shared state used to construct one or // more signingDoneCheck instances that communicate over the same channel. type signingDoneCheckComponents struct { groupSize int + honestThreshold int broadcastChannel net.BroadcastChannel membershipValidator *group.MembershipValidator } func (c *signingDoneCheckComponents) newCheck() *signingDoneCheck { - return newSigningDoneCheck(c.groupSize, c.broadcastChannel, c.membershipValidator) + return newSigningDoneCheck(c.groupSize, c.honestThreshold, c.broadcastChannel, c.membershipValidator) } // setupSigningDoneCheckComponents builds the shared channel and validator @@ -341,6 +523,7 @@ func setupSigningDoneCheckComponents( return &signingDoneCheckComponents{ groupSize: groupParameters.GroupSize, + honestThreshold: groupParameters.HonestThreshold, broadcastChannel: broadcastChannel, membershipValidator: membershipValidator, }