diff --git a/pkg/frost/signing/roast_retry_attempt_handle_frost_roast_retry.go b/pkg/frost/signing/roast_retry_attempt_handle_frost_roast_retry.go index 1651d1d9e1..3921ffe9d0 100644 --- a/pkg/frost/signing/roast_retry_attempt_handle_frost_roast_retry.go +++ b/pkg/frost/signing/roast_retry_attempt_handle_frost_roast_retry.go @@ -161,6 +161,10 @@ func sessionHandleSweepLoop(stop <-chan struct{}) { // anything past the TTL. evictStaleObservedAttempts(ObservedAttemptRegistryTTL) evictStaleRoastTransitions(RoastTransitionRegistryTTL) + // Stashed coarse-path evidence is normally consumed by + // BroadcastForcedSnapshot or cleared on success/session-end (RFC-21 + // Phase 7.3 PR2b-2 step 2); sweep any orphaned by an abnormal end. + evictStalePendingEvidence(PendingEvidenceRegistryTTL) } } } diff --git a/pkg/frost/signing/roast_retry_evidence_stash_default_build.go b/pkg/frost/signing/roast_retry_evidence_stash_default_build.go new file mode 100644 index 0000000000..61c10087d2 --- /dev/null +++ b/pkg/frost/signing/roast_retry_evidence_stash_default_build.go @@ -0,0 +1,20 @@ +//go:build !frost_roast_retry + +package signing + +import ( + "github.com/keep-network/keep-core/pkg/frost/roast/attempt" + "github.com/keep-network/keep-core/pkg/protocol/group" +) + +// stashPendingEvidence is a no-op in the default build: with no ROAST-retry +// orchestration there is never a session-handle binding, so submitSnapshotIfActive +// returns before reaching this call. The build-tagged implementation does the real +// stashing the transition exchange's BroadcastForcedSnapshot consumes. +func stashPendingEvidence( + _ string, + _ group.MemberIndex, + _ [attempt.MessageDigestLength]byte, + _ attempt.Evidence, +) { +} diff --git a/pkg/frost/signing/roast_retry_evidence_stash_frost_roast_retry.go b/pkg/frost/signing/roast_retry_evidence_stash_frost_roast_retry.go new file mode 100644 index 0000000000..ae77698b96 --- /dev/null +++ b/pkg/frost/signing/roast_retry_evidence_stash_frost_roast_retry.go @@ -0,0 +1,204 @@ +//go:build frost_roast_retry + +package signing + +import ( + "sync" + "time" + + "github.com/keep-network/keep-core/pkg/frost/roast/attempt" + "github.com/keep-network/keep-core/pkg/protocol/group" +) + +// PendingEvidenceRegistryTTL is how long a stashed evidence entry is retained +// before the background sweeper evicts it. Matches the observe-binding TTL: a +// stashed snapshot is useless once the session it tracks is archived. +const PendingEvidenceRegistryTTL = SessionHandleBindingTTL + +// pendingEvidenceKey scopes one local seat's captured coarse-path evidence to a +// specific attempt of a ROAST session. +// +// RFC-21 Phase 7.3 PR2b-2 step 2 (the blame bridge): the coarse receive loop +// captures real rejects/overflows/conflicts into an EvidenceRecorder and, at +// end-of-collect, stashes the raw snapshot here (submitSnapshotIfActive). The +// transition exchange's BroadcastForcedSnapshot then reads it so the seat's +// broadcast snapshot -- and therefore the elected coordinator's aggregated bundle +// -- carries real evidence instead of a forced-empty proof-of-attendance one, so +// NextAttempt's f+1 accuser tally can finally fire. +// +// The key is (RoastSessionID, member, attemptHash): the same coordinate space the +// observe binding uses (attemptHash == ctx.Hash() is namespace-independent, and +// the stashing ctx.SessionID is the STABLE RoastSessionID == the exchange's +// e.roastSessionID). The member component isolates a multi-seat operator's sibling +// seats: they share a RoastSessionID and attemptHash but each stashes -- and +// broadcasts -- its OWN evidence, so neither overwrites the other. +type pendingEvidenceKey struct { + sessionID string + member group.MemberIndex + attemptHash [attempt.MessageDigestLength]byte +} + +type pendingEvidenceEntry struct { + evidence attempt.Evidence + createdAt time.Time +} + +var ( + pendingEvidenceMu sync.Mutex + pendingEvidenceRegistry = map[pendingEvidenceKey]pendingEvidenceEntry{} +) + +// stashPendingEvidence stores a deep COPY of the captured evidence for +// (sessionID, member, attemptHash). A later call for the same key overwrites the +// earlier one (a re-driven attempt re-stashes). The deep copy means a later +// mutation of the caller's recorder snapshot cannot race the exchange's read. +func stashPendingEvidence( + sessionID string, + member group.MemberIndex, + attemptHash [attempt.MessageDigestLength]byte, + evidence attempt.Evidence, +) { + key := pendingEvidenceKey{sessionID, member, attemptHash} + pendingEvidenceMu.Lock() + defer pendingEvidenceMu.Unlock() + pendingEvidenceRegistry[key] = pendingEvidenceEntry{ + evidence: copyEvidence(evidence), + createdAt: time.Now(), + } +} + +// takePendingEvidence returns the stashed evidence for (sessionID, member, +// attemptHash) and a presence flag, REMOVING it (consume-on-read). The returned +// value is the sole reference -- the entry is deleted from the registry -- so the +// caller owns it exclusively without a further copy. BroadcastForcedSnapshot calls +// it: a present entry means the coarse receive loop observed real evidence for the +// attempt; absent means none was captured (the broadcast then carries an empty +// proof-of-attendance snapshot). +func takePendingEvidence( + sessionID string, + member group.MemberIndex, + attemptHash [attempt.MessageDigestLength]byte, +) (attempt.Evidence, bool) { + key := pendingEvidenceKey{sessionID, member, attemptHash} + pendingEvidenceMu.Lock() + defer pendingEvidenceMu.Unlock() + entry, ok := pendingEvidenceRegistry[key] + if !ok { + return attempt.Evidence{}, false + } + delete(pendingEvidenceRegistry, key) + return entry.evidence, true +} + +// clearPendingEvidence removes any stashed evidence for (sessionID, member, +// attemptHash). +func clearPendingEvidence( + sessionID string, + member group.MemberIndex, + attemptHash [attempt.MessageDigestLength]byte, +) { + key := pendingEvidenceKey{sessionID, member, attemptHash} + pendingEvidenceMu.Lock() + defer pendingEvidenceMu.Unlock() + delete(pendingEvidenceRegistry, key) +} + +// ClearPendingEvidenceOnLocalSuccess removes the stashed evidence for an attempt +// this seat completed successfully. A succeeded attempt produces no transition +// bundle, so BroadcastForcedSnapshot never consumes the stash; clearing here +// prevents a leak until the TTL backstop. Exported so the pkg/tbtc transition +// controller can call it alongside ClearObservedAttemptOnLocalSuccess (RFC-21 +// Phase 7.3 PR2b-2). +func ClearPendingEvidenceOnLocalSuccess( + sessionID string, + member group.MemberIndex, + attemptHash [attempt.MessageDigestLength]byte, +) { + clearPendingEvidence(sessionID, member, attemptHash) +} + +// clearPendingEvidenceForSession removes every stashed evidence entry for +// (sessionID, member), regardless of attempt hash. The transition exchange calls +// it when the session ends (its listener context is done), mirroring +// clearObservedAttemptsForSession, so a signing whose attempts succeeded -- and +// therefore never consumed the stash via BroadcastForcedSnapshot -- does not leave +// entries behind. +func clearPendingEvidenceForSession(sessionID string, member group.MemberIndex) { + pendingEvidenceMu.Lock() + defer pendingEvidenceMu.Unlock() + for key := range pendingEvidenceRegistry { + if key.sessionID == sessionID && key.member == member { + delete(pendingEvidenceRegistry, key) + } + } +} + +// evictStalePendingEvidence sweeps the registry and removes entries older than +// maxAge. Exposed at the package level so tests can invoke it directly with small +// maxAge values and so the shared session-handle sweeper can fold it into one +// background goroutine: a session that ends abnormally must not orphan a stash +// entry past its backstop TTL. +func evictStalePendingEvidence(maxAge time.Duration) int { + cutoff := time.Now().Add(-maxAge) + pendingEvidenceMu.Lock() + defer pendingEvidenceMu.Unlock() + evicted := 0 + for key, entry := range pendingEvidenceRegistry { + if entry.createdAt.Before(cutoff) { + delete(pendingEvidenceRegistry, key) + evicted++ + } + } + return evicted +} + +// ResetPendingEvidenceRegistryForTest clears every stashed entry. Test-only seam; +// not for production code paths. +func ResetPendingEvidenceRegistryForTest() { + pendingEvidenceMu.Lock() + defer pendingEvidenceMu.Unlock() + pendingEvidenceRegistry = map[pendingEvidenceKey]pendingEvidenceEntry{} +} + +// PendingEvidenceStashedForTest reports whether any stash entry exists for +// (sessionID, member), regardless of attempt hash. Exported test seam so +// downstream-package tests can assert the stash without reaching into the +// unexported registry. +func PendingEvidenceStashedForTest(sessionID string, member group.MemberIndex) bool { + pendingEvidenceMu.Lock() + defer pendingEvidenceMu.Unlock() + for key := range pendingEvidenceRegistry { + if key.sessionID == sessionID && key.member == member { + return true + } + } + return false +} + +// copyEvidence returns a deep copy of an attempt.Evidence: the three maps are +// re-allocated and the per-sender RejectEntry slices are cloned. RejectEntry is a +// flat value type (Reason string, Count uint), so a slice copy is a full deep +// copy. nil maps stay nil (a nil category means "did not fire", which +// NewLocalEvidenceSnapshot and NextAttempt both treat as empty). +func copyEvidence(e attempt.Evidence) attempt.Evidence { + out := attempt.Evidence{} + if e.Overflows != nil { + out.Overflows = make(map[group.MemberIndex]uint, len(e.Overflows)) + for k, v := range e.Overflows { + out.Overflows[k] = v + } + } + if e.Rejects != nil { + out.Rejects = make(map[group.MemberIndex][]attempt.RejectEntry, len(e.Rejects)) + for k, v := range e.Rejects { + out.Rejects[k] = append([]attempt.RejectEntry(nil), v...) + } + } + if e.Conflicts != nil { + out.Conflicts = make(map[group.MemberIndex]uint, len(e.Conflicts)) + for k, v := range e.Conflicts { + out.Conflicts[k] = v + } + } + return out +} diff --git a/pkg/frost/signing/roast_retry_evidence_stash_frost_roast_retry_test.go b/pkg/frost/signing/roast_retry_evidence_stash_frost_roast_retry_test.go new file mode 100644 index 0000000000..c88a54aad1 --- /dev/null +++ b/pkg/frost/signing/roast_retry_evidence_stash_frost_roast_retry_test.go @@ -0,0 +1,165 @@ +//go:build frost_roast_retry + +package signing + +import ( + "testing" + "time" + + "github.com/keep-network/keep-core/pkg/frost/roast/attempt" + "github.com/keep-network/keep-core/pkg/protocol/group" +) + +func stashTestHash(b byte) [attempt.MessageDigestLength]byte { + var h [attempt.MessageDigestLength]byte + h[0] = b + return h +} + +func stashTestEvidence() attempt.Evidence { + return attempt.Evidence{ + Overflows: map[group.MemberIndex]uint{2: 1}, + Rejects: map[group.MemberIndex][]attempt.RejectEntry{ + 3: {{Reason: "r", Count: 1}}, + }, + Conflicts: map[group.MemberIndex]uint{4: 1}, + } +} + +func TestPendingEvidenceStash_StoreTakeConsumes(t *testing.T) { + ResetPendingEvidenceRegistryForTest() + t.Cleanup(ResetPendingEvidenceRegistryForTest) + + hash := stashTestHash(0x11) + stashPendingEvidence("s", 1, hash, stashTestEvidence()) + + if !PendingEvidenceStashedForTest("s", 1) { + t.Fatal("entry must be present after store") + } + got, ok := takePendingEvidence("s", 1, hash) + if !ok { + t.Fatal("take must find the stored entry") + } + if got.Overflows[2] != 1 || got.Conflicts[4] != 1 || len(got.Rejects[3]) != 1 { + t.Fatalf("taken evidence does not match stored: %+v", got) + } + if _, ok := takePendingEvidence("s", 1, hash); ok { + t.Fatal("take must consume: a second take finds nothing") + } + if PendingEvidenceStashedForTest("s", 1) { + t.Fatal("entry must be gone after consume") + } +} + +// TestPendingEvidenceStash_DeepCopyIsolatesCallerMutation proves copyEvidence +// deep-copies on store: mutating the caller's Evidence (maps AND the per-sender +// reject slice) after stashing must not change what the exchange later reads. +func TestPendingEvidenceStash_DeepCopyIsolatesCallerMutation(t *testing.T) { + ResetPendingEvidenceRegistryForTest() + t.Cleanup(ResetPendingEvidenceRegistryForTest) + + hash := stashTestHash(0x22) + src := stashTestEvidence() + stashPendingEvidence("s", 1, hash, src) + + // Mutate every layer of the source after the store. + src.Overflows[2] = 99 + src.Overflows[5] = 7 // new key + src.Rejects[3][0].Count = 99 + src.Conflicts[4] = 99 + + got, ok := takePendingEvidence("s", 1, hash) + if !ok { + t.Fatal("take must find the stored entry") + } + if got.Overflows[2] != 1 { + t.Fatalf("overflow count must reflect store-time value 1; got %d", got.Overflows[2]) + } + if _, present := got.Overflows[5]; present { + t.Fatal("a key added to the source after store must not appear in the stash") + } + if got.Rejects[3][0].Count != 1 { + t.Fatalf("reject slice element must reflect store-time value 1; got %d", got.Rejects[3][0].Count) + } + if got.Conflicts[4] != 1 { + t.Fatalf("conflict count must reflect store-time value 1; got %d", got.Conflicts[4]) + } +} + +// TestPendingEvidenceStash_MemberKeyedIsolation asserts two seats sharing the same +// (sessionID, attemptHash) keep separate entries -- the member-keying that prevents +// a sibling seat from overwriting another's evidence. +func TestPendingEvidenceStash_MemberKeyedIsolation(t *testing.T) { + ResetPendingEvidenceRegistryForTest() + t.Cleanup(ResetPendingEvidenceRegistryForTest) + + hash := stashTestHash(0x33) + stashPendingEvidence("s", 1, hash, attempt.Evidence{Overflows: map[group.MemberIndex]uint{7: 1}}) + stashPendingEvidence("s", 2, hash, attempt.Evidence{Overflows: map[group.MemberIndex]uint{8: 1}}) + + got1, ok1 := takePendingEvidence("s", 1, hash) + got2, ok2 := takePendingEvidence("s", 2, hash) + if !ok1 || !ok2 { + t.Fatalf("both member entries must exist; ok1=%v ok2=%v", ok1, ok2) + } + if got1.Overflows[7] != 1 || got1.Overflows[8] != 0 { + t.Fatalf("seat 1 entry bled; got %+v", got1.Overflows) + } + if got2.Overflows[8] != 1 || got2.Overflows[7] != 0 { + t.Fatalf("seat 2 entry bled; got %+v", got2.Overflows) + } +} + +func TestPendingEvidenceStash_ClearForSession(t *testing.T) { + ResetPendingEvidenceRegistryForTest() + t.Cleanup(ResetPendingEvidenceRegistryForTest) + + stashPendingEvidence("s", 1, stashTestHash(0x01), stashTestEvidence()) + stashPendingEvidence("s", 1, stashTestHash(0x02), stashTestEvidence()) + stashPendingEvidence("other", 1, stashTestHash(0x03), stashTestEvidence()) + + clearPendingEvidenceForSession("s", 1) + + if PendingEvidenceStashedForTest("s", 1) { + t.Fatal("clearForSession must remove every attempt of (s,1)") + } + if !PendingEvidenceStashedForTest("other", 1) { + t.Fatal("clearForSession must not touch a different session") + } +} + +func TestClearPendingEvidenceOnLocalSuccess(t *testing.T) { + ResetPendingEvidenceRegistryForTest() + t.Cleanup(ResetPendingEvidenceRegistryForTest) + + hash := stashTestHash(0x44) + stashPendingEvidence("s", 1, hash, stashTestEvidence()) + ClearPendingEvidenceOnLocalSuccess("s", 1, hash) + if PendingEvidenceStashedForTest("s", 1) { + t.Fatal("success clear must remove the attempt's stash entry") + } +} + +// TestEvictStalePendingEvidence asserts the TTL backstop: a fresh entry survives a +// long TTL and is swept once the cutoff passes its creation time. A negative +// max-age sets the cutoff in the future, so the entry is deterministically stale +// without sleeping. +func TestEvictStalePendingEvidence(t *testing.T) { + ResetPendingEvidenceRegistryForTest() + t.Cleanup(ResetPendingEvidenceRegistryForTest) + + stashPendingEvidence("s", 1, stashTestHash(0x55), stashTestEvidence()) + + if n := evictStalePendingEvidence(time.Hour); n != 0 { + t.Fatalf("a fresh entry must survive a long TTL; evicted %d", n) + } + if !PendingEvidenceStashedForTest("s", 1) { + t.Fatal("entry must remain after a no-op sweep") + } + if n := evictStalePendingEvidence(-time.Hour); n != 1 { + t.Fatalf("a future cutoff must evict the entry; evicted %d", n) + } + if PendingEvidenceStashedForTest("s", 1) { + t.Fatal("entry must be gone after the sweep") + } +} diff --git a/pkg/frost/signing/roast_retry_submit.go b/pkg/frost/signing/roast_retry_submit.go index f4d7157b84..bdd659f9c9 100644 --- a/pkg/frost/signing/roast_retry_submit.go +++ b/pkg/frost/signing/roast_retry_submit.go @@ -1,43 +1,39 @@ package signing import ( - "github.com/ipfs/go-log/v2" - "github.com/keep-network/keep-core/pkg/frost/roast" "github.com/keep-network/keep-core/pkg/frost/roast/attempt" "github.com/keep-network/keep-core/pkg/protocol/group" ) -// roastRetryLogger is the logger the snapshot-submission path uses -// for non-fatal diagnostics (submission failures, signature errors). -// A submission failure does not propagate to the signing flow: -// Phase 4 ships the submission code path unused in production, and -// even when wired (Phase 5+) a transient submission failure is -// recoverable by the next attempt's evidence flow. -var roastRetryLogger = log.Logger("keep-frost-roast-retry") - -// submitSnapshotIfActive is invoked at end-of-collect to push the -// receive loop's accumulated evidence into the ROAST coordinator's -// RecordEvidence pipeline. member is the local seat whose receive loop -// is submitting (request.MemberIndex). The path is fully member-aware -// (RFC-21 Phase 7.3 PR2b-2): a multi-seat operator's sibling seats each -// submit their own evidence against their own coordinator and attempt -// handle, so they no longer mis-attribute or collide (which is why the -// PR2b-1.5 multi-seat no-op guard is gone). The function is a no-op when -// any of the following is true: +// submitSnapshotIfActive is invoked at end-of-collect to capture the receive +// loop's accumulated evidence for the ROAST blame pipeline. member is the local +// seat whose receive loop is submitting (request.MemberIndex). The path is fully +// member-aware (RFC-21 Phase 7.3 PR2b-2): a multi-seat operator's sibling seats +// each capture their own evidence against their own attempt binding, so they never +// mis-attribute or collide. +// +// RFC-21 Phase 7.3 PR2b-2 step 2 (the blame bridge): the captured evidence is +// STASHED keyed by the attempt's (RoastSessionID, member, attemptHash), NOT +// recorded against the drive handle. The drive handle is never aggregated -- the +// transition exchange is the sole bundle producer and aggregates the OBSERVE +// handle -- so a RecordEvidence here was a write-only dead end. Stashing instead +// lets the exchange's BroadcastForcedSnapshot build + sign ONE snapshot carrying +// this evidence, so the elected coordinator's AggregateBundle includes it and +// NextAttempt's f+1 accuser tally can finally fire. // -// - this member has no registered ROAST-retry coordinator (default -// build where RegisterRoastRetryCoordinator is a no-op, or a -// partially-registered operator where only a sibling seat is wired); -// - no session-handle binding exists for (sessionID, member) (the -// typical Phase-4 state, where the orchestration layer that calls -// SetCurrentAttemptHandleForSession is not yet implemented); -// - the recorder is nil / a NoOp (no events were captured). +// The function is a no-op when any of the following holds: // -// Otherwise the function builds a LocalEvidenceSnapshot, signs it with -// this member's registered Signer, and submits it via -// Coordinator.RecordEvidence. Errors at any step are logged at WARN -// level and otherwise swallowed -- snapshot submission must not break -// the receive loop's primary signing behaviour. +// - no session-handle binding exists for (sessionID, member): the default build +// (where currentAttemptHandleForCollect always returns ok=false), or a state +// where the orchestration layer that calls SetCurrentAttemptHandleForSession +// has not run; +// - the recorder is nil / a NoOp (no events were captured); +// - the captured evidence is empty across all three categories: the exchange +// still broadcasts an empty proof-of-attendance snapshot for the attempt, so +// skipping the stash here does not silence-park the seat. +// +// Capturing must never break the receive loop's primary signing behaviour, so the +// function returns silently on every skip condition. func submitSnapshotIfActive( sessionID string, member group.MemberIndex, @@ -46,14 +42,12 @@ func submitSnapshotIfActive( if recorder == nil { return } - // Member-aware lookup: a multi-seat operator's elected seat aggregates with - // its OWN coordinator and the snapshot is attributed to deps.SelfMember == - // member. A seat with no coordinator (partial registration) submits nothing. - deps, ok := RegisteredRoastRetryCoordinatorForMember(member) - if !ok { - return - } - handle, ctx, ok := currentAttemptHandleForCollect(sessionID, member) + // The drive binding (set by BeginOrchestrationForSession) signals this seat is + // driving a ROAST attempt and carries its AttemptContext. ctx.SessionID is the + // STABLE RoastSessionID and ctx.Hash() the attempt hash -- the + // namespace-independent coordinate the transition exchange keys its observe + // binding (and this stash) by, so the broadcast resolves the same entry. + _, ctx, ok := currentAttemptHandleForCollect(sessionID, member) if !ok { return } @@ -61,61 +55,14 @@ func submitSnapshotIfActive( if len(evidence.Overflows) == 0 && len(evidence.Rejects) == 0 && len(evidence.Conflicts) == 0 { - // Truly nothing observed worth submitting; emitting an empty - // snapshot is still meaningful in the ROAST protocol - // (proof-of-attendance) but adds noise to the bundle, so we - // skip it. The emptiness test MUST consider all three evidence - // categories, not just overflows: a validation-blamable Reject - // (e.g. an attempt-context-hash mismatch) or a first-write-wins - // Conflict populates Rejects/Conflicts WITHOUT any Overflow, and - // NextAttempt's exclusion path consumes snapshot.Rejects - // (next_attempt.go). Dropping a reject/conflict-only snapshot - // here would silently starve the blame pipeline of exactly the - // validation evidence it needs. + // Truly nothing observed worth carrying. The emptiness test MUST consider + // all three categories, not just overflows: a validation-blamable Reject + // (e.g. an attempt-context-hash mismatch) or a first-write-wins Conflict + // populates Rejects/Conflicts WITHOUT any Overflow, and NextAttempt's + // exclusion path consumes snapshot.Rejects (next_attempt.go). Dropping a + // reject/conflict-only snapshot here would silently starve the blame + // pipeline of exactly the validation evidence it needs. return } - snap := buildSignedSnapshot(deps, ctx, evidence) - if snap == nil { - return - } - if err := deps.Coordinator.RecordEvidence(handle, snap); err != nil { - roastRetryLogger.Warnf( - "roast-retry: RecordEvidence failed for session %q: %v", - sessionID, - err, - ) - } -} - -// buildSignedSnapshot constructs and signs a LocalEvidenceSnapshot -// from the captured evidence. Returns nil and logs on signature -// failure; callers treat nil as "skip submission" and continue. -func buildSignedSnapshot( - deps RoastRetryDeps, - ctx attempt.AttemptContext, - evidence attempt.Evidence, -) *roast.LocalEvidenceSnapshot { - snap := roast.NewLocalEvidenceSnapshot( - group.MemberIndex(deps.SelfMember), - ctx.Hash(), - evidence, - ) - payload, err := snap.SignableBytes() - if err != nil { - roastRetryLogger.Warnf( - "roast-retry: canonicalising snapshot failed: %v", - err, - ) - return nil - } - sig, err := deps.Signer.Sign(payload) - if err != nil { - roastRetryLogger.Warnf( - "roast-retry: signing snapshot failed: %v", - err, - ) - return nil - } - snap.OperatorSignature = sig - return snap + stashPendingEvidence(ctx.SessionID, member, ctx.Hash(), evidence) } diff --git a/pkg/frost/signing/roast_retry_submit_frost_roast_retry_test.go b/pkg/frost/signing/roast_retry_submit_frost_roast_retry_test.go index b813f02000..3c97c7111d 100644 --- a/pkg/frost/signing/roast_retry_submit_frost_roast_retry_test.go +++ b/pkg/frost/signing/roast_retry_submit_frost_roast_retry_test.go @@ -3,8 +3,6 @@ package signing import ( - "errors" - "sync" "testing" "github.com/keep-network/keep-core/pkg/frost/roast" @@ -12,87 +10,6 @@ import ( "github.com/keep-network/keep-core/pkg/protocol/group" ) -// captureCoordinator is a roast.Coordinator wrapper that records -// every RecordEvidence call so tests can assert what was submitted. -// It delegates everything else to an embedded real coordinator. -type captureCoordinator struct { - inner roast.Coordinator - mu sync.Mutex - recordedFor []roast.AttemptHandle - recordedSnp []*roast.LocalEvidenceSnapshot - recordErr error -} - -func newCaptureCoordinator(inner roast.Coordinator) *captureCoordinator { - return &captureCoordinator{inner: inner} -} - -func (c *captureCoordinator) BeginAttempt(ctx attempt.AttemptContext) (roast.AttemptHandle, error) { - return c.inner.BeginAttempt(ctx) -} -func (c *captureCoordinator) State(h roast.AttemptHandle) (roast.AttemptState, error) { - return c.inner.State(h) -} -func (c *captureCoordinator) SelectedCoordinator(h roast.AttemptHandle) (group.MemberIndex, error) { - return c.inner.SelectedCoordinator(h) -} -func (c *captureCoordinator) RecordEvidence(h roast.AttemptHandle, s *roast.LocalEvidenceSnapshot) error { - c.mu.Lock() - defer c.mu.Unlock() - if c.recordErr != nil { - return c.recordErr - } - c.recordedFor = append(c.recordedFor, h) - c.recordedSnp = append(c.recordedSnp, s) - return c.inner.RecordEvidence(h, s) -} -func (c *captureCoordinator) AggregateBundle(h roast.AttemptHandle) (*roast.TransitionMessage, error) { - return c.inner.AggregateBundle(h) -} -func (c *captureCoordinator) MarkSucceeded(h roast.AttemptHandle) error { - return c.inner.MarkSucceeded(h) -} -func (c *captureCoordinator) VerifyBundle(h roast.AttemptHandle, m *roast.TransitionMessage) error { - return c.inner.VerifyBundle(h, m) -} -func (c *captureCoordinator) NextAttempt( - h roast.AttemptHandle, m *roast.TransitionMessage, t uint, pk []byte, -) (attempt.AttemptContext, error) { - return c.inner.NextAttempt(h, m, t, pk) -} - -// deterministicSigner produces SHA256(memberID || payload)-style -// signatures the captureSignatureVerifier accepts. -type deterministicSigner struct { - id group.MemberIndex -} - -func (d *deterministicSigner) Sign(payload []byte) ([]byte, error) { - out := make([]byte, len(payload)+1) - out[0] = byte(d.id) - copy(out[1:], payload) - return out, nil -} - -type deterministicVerifier struct{} - -func (deterministicVerifier) Verify( - payload []byte, signature []byte, signer group.MemberIndex, -) error { - if len(signature) != len(payload)+1 { - return errors.New("deterministicVerifier: length mismatch") - } - if signature[0] != byte(signer) { - return errors.New("deterministicVerifier: signer byte mismatch") - } - for i, b := range payload { - if signature[i+1] != b { - return errors.New("deterministicVerifier: payload byte mismatch") - } - } - return nil -} - func newTestContextForSubmit(t *testing.T, sessionID string) attempt.AttemptContext { t.Helper() ctx, err := attempt.NewAttemptContext( @@ -110,111 +27,76 @@ func newTestContextForSubmit(t *testing.T, sessionID string) attempt.AttemptCont return ctx } -func TestSubmitSnapshotIfActive_NoOpWhenRegistryEmpty(t *testing.T) { - ResetRoastRetryRegistrationForTest() +// TestSubmitSnapshotIfActive_NilRecorderIsNoOp guards the cheap nil guard: the +// receive loop falls back to a nil/NoOp recorder when ROAST retry is inactive, and +// submit must not panic or stash. +func TestSubmitSnapshotIfActive_NilRecorderIsNoOp(t *testing.T) { ResetSessionHandleRegistryForTest() - t.Cleanup(ResetRoastRetryRegistrationForTest) + ResetPendingEvidenceRegistryForTest() t.Cleanup(ResetSessionHandleRegistryForTest) + t.Cleanup(ResetPendingEvidenceRegistryForTest) - // No registration, no binding. submit should be a no-op. - recorder := attempt.NewBoundedRecorder() - recorder.RecordOverflow(7) - submitSnapshotIfActive("session-x", 1, recorder) - // Nothing to assert observably: success is the absence of a - // panic and no calls to a non-existent coordinator. + submitSnapshotIfActive("session-nil", 1, nil) + + if PendingEvidenceStashedForTest("session-nil", 1) { + t.Fatal("nil recorder must not stash") + } } +// TestSubmitSnapshotIfActive_NoOpWhenSessionUnbound asserts that without a +// session-handle binding (the orchestration layer has not run, or the default +// build), submit stashes nothing -- there is no attempt to attribute evidence to. func TestSubmitSnapshotIfActive_NoOpWhenSessionUnbound(t *testing.T) { - ResetRoastRetryRegistrationForTest() ResetSessionHandleRegistryForTest() - t.Cleanup(ResetRoastRetryRegistrationForTest) + ResetPendingEvidenceRegistryForTest() t.Cleanup(ResetSessionHandleRegistryForTest) - - innerCoord := roast.NewInMemoryCoordinator() - cap := newCaptureCoordinator(innerCoord) - RegisterRoastRetryCoordinator(RoastRetryDeps{ - Coordinator: cap, - Signer: &deterministicSigner{id: 1}, - Verifier: deterministicVerifier{}, - SelfMember: 1, - }) + t.Cleanup(ResetPendingEvidenceRegistryForTest) recorder := attempt.NewBoundedRecorder() recorder.RecordOverflow(7) submitSnapshotIfActive("session-with-no-binding", 1, recorder) - if len(cap.recordedFor) != 0 { - t.Fatalf( - "expected no RecordEvidence calls when session unbound; got %d", - len(cap.recordedFor), - ) + if PendingEvidenceStashedForTest("session-with-no-binding", 1) { + t.Fatal("expected no stash when session unbound") } } +// TestSubmitSnapshotIfActive_NoOpWhenRecorderEmpty asserts a bound attempt whose +// recorder captured zero events stashes nothing: the exchange still broadcasts an +// empty proof-of-attendance snapshot, so skipping the stash does not silence-park +// the seat. func TestSubmitSnapshotIfActive_NoOpWhenRecorderEmpty(t *testing.T) { - ResetRoastRetryRegistrationForTest() ResetSessionHandleRegistryForTest() - t.Cleanup(ResetRoastRetryRegistrationForTest) + ResetPendingEvidenceRegistryForTest() t.Cleanup(ResetSessionHandleRegistryForTest) + t.Cleanup(ResetPendingEvidenceRegistryForTest) - innerCoord := roast.NewInMemoryCoordinatorWithSigning( - 1, - &deterministicSigner{id: 1}, - deterministicVerifier{}, - ) - cap := newCaptureCoordinator(innerCoord) - RegisterRoastRetryCoordinator(RoastRetryDeps{ - Coordinator: cap, - Signer: &deterministicSigner{id: 1}, - Verifier: deterministicVerifier{}, - SelfMember: 1, - }) - + const selfMember group.MemberIndex = 1 ctx := newTestContextForSubmit(t, "session-empty") - handle, err := cap.BeginAttempt(ctx) - if err != nil { - t.Fatalf("begin: %v", err) - } - SetCurrentAttemptHandleForSession("session-empty", 1, handle, ctx) + SetCurrentAttemptHandleForSession("session-empty", selfMember, roast.AttemptHandle{}, ctx) - // Recorder is bounded but has captured zero events. recorder := attempt.NewBoundedRecorder() - submitSnapshotIfActive("session-empty", 1, recorder) + submitSnapshotIfActive("session-empty", selfMember, recorder) - if len(cap.recordedFor) != 0 { - t.Fatalf( - "expected no RecordEvidence for empty snapshot; got %d", - len(cap.recordedFor), - ) + if PendingEvidenceStashedForTest("session-empty", selfMember) { + t.Fatal("expected no stash for an empty snapshot") } } -func TestSubmitSnapshotIfActive_SubmitsSignedSnapshotWhenBoundAndPopulated(t *testing.T) { - ResetRoastRetryRegistrationForTest() +// TestSubmitSnapshotIfActive_StashesEvidenceWhenBoundAndPopulated is the core +// blame-bridge wiring (RFC-21 Phase 7.3 PR2b-2 step 2): a bound attempt with +// captured evidence stashes the RAW evidence keyed by the attempt's +// (RoastSessionID==ctx.SessionID, member, attemptHash==ctx.Hash()) so the +// transition exchange's BroadcastForcedSnapshot can carry it into the bundle. +func TestSubmitSnapshotIfActive_StashesEvidenceWhenBoundAndPopulated(t *testing.T) { ResetSessionHandleRegistryForTest() - t.Cleanup(ResetRoastRetryRegistrationForTest) + ResetPendingEvidenceRegistryForTest() t.Cleanup(ResetSessionHandleRegistryForTest) + t.Cleanup(ResetPendingEvidenceRegistryForTest) const selfMember group.MemberIndex = 1 - innerCoord := roast.NewInMemoryCoordinatorWithSigning( - selfMember, - &deterministicSigner{id: selfMember}, - deterministicVerifier{}, - ) - cap := newCaptureCoordinator(innerCoord) - RegisterRoastRetryCoordinator(RoastRetryDeps{ - Coordinator: cap, - Signer: &deterministicSigner{id: selfMember}, - Verifier: deterministicVerifier{}, - SelfMember: uint32(selfMember), - }) - ctx := newTestContextForSubmit(t, "session-real") - handle, err := cap.BeginAttempt(ctx) - if err != nil { - t.Fatalf("begin: %v", err) - } - SetCurrentAttemptHandleForSession("session-real", selfMember, handle, ctx) + SetCurrentAttemptHandleForSession("session-real", selfMember, roast.AttemptHandle{}, ctx) recorder := attempt.NewBoundedRecorder() recorder.RecordOverflow(3) @@ -222,135 +104,86 @@ func TestSubmitSnapshotIfActive_SubmitsSignedSnapshotWhenBoundAndPopulated(t *te recorder.RecordOverflow(5) submitSnapshotIfActive("session-real", selfMember, recorder) - if len(cap.recordedFor) != 1 { - t.Fatalf("expected 1 RecordEvidence; got %d", len(cap.recordedFor)) - } - if cap.recordedFor[0] != handle { - t.Fatal("RecordEvidence handle mismatch") + evidence, ok := takePendingEvidence(ctx.SessionID, selfMember, ctx.Hash()) + if !ok { + t.Fatal("expected stashed evidence after a populated submit") } - snap := cap.recordedSnp[0] - if snap.SenderID() != selfMember { - t.Fatalf("snapshot sender: got %d want %d", snap.SenderID(), selfMember) + // 2 distinct senders observed (3 twice, 5 once). + if len(evidence.Overflows) != 2 { + t.Fatalf("expected 2 overflow senders stashed; got %d", len(evidence.Overflows)) } - if len(snap.OperatorSignature) == 0 { - t.Fatal("snapshot must be signed") + if evidence.Overflows[3] != 2 || evidence.Overflows[5] != 1 { + t.Fatalf("stashed overflow counts wrong: %+v", evidence.Overflows) } - // 2 distinct senders observed. - if len(snap.Overflows) != 2 { - t.Fatalf("expected 2 overflow entries; got %d", len(snap.Overflows)) + // take consumes: a second take finds nothing. + if _, ok := takePendingEvidence(ctx.SessionID, selfMember, ctx.Hash()); ok { + t.Fatal("take must consume the stash entry") } } -// TestSubmitSnapshotIfActive_MultiSeatSubmitsPerSeat asserts the PR2b-2 member- -// aware submit path: with two local seats registered, each seat submits its own -// evidence against ITS OWN coordinator and attempt handle. The PR2b-1.5 count>1 -// no-op guard is gone, so both submissions land -- and neither lands on the -// sibling's coordinator. -func TestSubmitSnapshotIfActive_MultiSeatSubmitsPerSeat(t *testing.T) { - ResetRoastRetryRegistrationForTest() +// TestSubmitSnapshotIfActive_StashesRejectOnlySnapshot guards the all-categories +// emptiness test: a snapshot carrying ONLY reject evidence -- no overflow, no +// conflict -- must still be stashed. A validation-blamable Reject populates +// Evidence.Rejects without any Overflow, and NextAttempt's exclusion path consumes +// snapshot.Rejects; an overflow-only emptiness check would starve the blame +// pipeline. +func TestSubmitSnapshotIfActive_StashesRejectOnlySnapshot(t *testing.T) { ResetSessionHandleRegistryForTest() - t.Cleanup(ResetRoastRetryRegistrationForTest) + ResetPendingEvidenceRegistryForTest() t.Cleanup(ResetSessionHandleRegistryForTest) + t.Cleanup(ResetPendingEvidenceRegistryForTest) - cap1 := newCaptureCoordinator(roast.NewInMemoryCoordinatorWithSigning( - 1, &deterministicSigner{id: 1}, deterministicVerifier{}, - )) - cap2 := newCaptureCoordinator(roast.NewInMemoryCoordinatorWithSigning( - 2, &deterministicSigner{id: 2}, deterministicVerifier{}, - )) - RegisterRoastRetryCoordinatorForMember(1, RoastRetryDeps{ - Coordinator: cap1, - Signer: &deterministicSigner{id: 1}, - Verifier: deterministicVerifier{}, - SelfMember: 1, - }) - RegisterRoastRetryCoordinatorForMember(2, RoastRetryDeps{ - Coordinator: cap2, - Signer: &deterministicSigner{id: 2}, - Verifier: deterministicVerifier{}, - SelfMember: 2, - }) - - ctx := newTestContextForSubmit(t, "session-ms") - handle1, err := cap1.BeginAttempt(ctx) - if err != nil { - t.Fatalf("seat 1 begin: %v", err) - } - handle2, err := cap2.BeginAttempt(ctx) - if err != nil { - t.Fatalf("seat 2 begin: %v", err) - } - SetCurrentAttemptHandleForSession("session-ms", 1, handle1, ctx) - SetCurrentAttemptHandleForSession("session-ms", 2, handle2, ctx) - - rec1 := attempt.NewBoundedRecorder() - rec1.RecordOverflow(3) - rec2 := attempt.NewBoundedRecorder() - rec2.RecordOverflow(4) + const selfMember group.MemberIndex = 1 + ctx := newTestContextForSubmit(t, "session-reject-only") + SetCurrentAttemptHandleForSession("session-reject-only", selfMember, roast.AttemptHandle{}, ctx) - submitSnapshotIfActive("session-ms", 1, rec1) - submitSnapshotIfActive("session-ms", 2, rec2) + recorder := attempt.NewBoundedRecorder() + recorder.RecordReject(2, "attempt_context_hash_mismatch") + submitSnapshotIfActive("session-reject-only", selfMember, recorder) - // Each seat's evidence landed on ITS OWN coordinator (no mis-attribution), - // each against its own handle and signed as its own member. - if len(cap1.recordedFor) != 1 || cap1.recordedFor[0] != handle1 { - t.Fatalf("seat 1 must record once against its own handle; got %+v", cap1.recordedFor) - } - if len(cap2.recordedFor) != 1 || cap2.recordedFor[0] != handle2 { - t.Fatalf("seat 2 must record once against its own handle; got %+v", cap2.recordedFor) - } - if cap1.recordedSnp[0].SenderID() != 1 { - t.Fatalf("seat 1 snapshot sender: got %d want 1", cap1.recordedSnp[0].SenderID()) + evidence, ok := takePendingEvidence(ctx.SessionID, selfMember, ctx.Hash()) + if !ok { + t.Fatal("reject-only snapshot must be stashed") } - if cap2.recordedSnp[0].SenderID() != 2 { - t.Fatalf("seat 2 snapshot sender: got %d want 2", cap2.recordedSnp[0].SenderID()) + if len(evidence.Rejects[2]) == 0 { + t.Fatalf("stashed evidence must carry the reject for sender 2; got %+v", evidence.Rejects) } } -// TestSubmitSnapshotIfActive_SubmitsRejectOnlySnapshot guards the fold of Codex's -// PR2b-2 P2: a snapshot carrying ONLY reject evidence -- no overflow, no conflict -// -- must still be signed and submitted. A validation-blamable Reject (e.g. an -// attempt-context-hash mismatch) populates Evidence.Rejects without any Overflow, -// and NextAttempt's exclusion path consumes snapshot.Rejects; the old overflow-only -// emptiness check silently dropped such snapshots, starving the blame pipeline. -func TestSubmitSnapshotIfActive_SubmitsRejectOnlySnapshot(t *testing.T) { - ResetRoastRetryRegistrationForTest() +// TestSubmitSnapshotIfActive_MultiSeatStashesPerSeat asserts the PR2b-2 +// member-aware path: with two local seats bound to the SAME attempt (same +// RoastSessionID + attemptHash), each seat stashes its OWN evidence under its own +// member key -- neither overwrites the other (the member-keying that fixes the +// sibling-collision hazard). +func TestSubmitSnapshotIfActive_MultiSeatStashesPerSeat(t *testing.T) { ResetSessionHandleRegistryForTest() - t.Cleanup(ResetRoastRetryRegistrationForTest) + ResetPendingEvidenceRegistryForTest() t.Cleanup(ResetSessionHandleRegistryForTest) + t.Cleanup(ResetPendingEvidenceRegistryForTest) - const selfMember group.MemberIndex = 1 - cap := newCaptureCoordinator(roast.NewInMemoryCoordinatorWithSigning( - selfMember, &deterministicSigner{id: selfMember}, deterministicVerifier{}, - )) - RegisterRoastRetryCoordinator(RoastRetryDeps{ - Coordinator: cap, - Signer: &deterministicSigner{id: selfMember}, - Verifier: deterministicVerifier{}, - SelfMember: uint32(selfMember), - }) + ctx := newTestContextForSubmit(t, "session-ms") + SetCurrentAttemptHandleForSession("session-ms", 1, roast.AttemptHandle{}, ctx) + SetCurrentAttemptHandleForSession("session-ms", 2, roast.AttemptHandle{}, ctx) - ctx := newTestContextForSubmit(t, "session-reject-only") - handle, err := cap.BeginAttempt(ctx) - if err != nil { - t.Fatalf("begin: %v", err) - } - SetCurrentAttemptHandleForSession("session-reject-only", selfMember, handle, ctx) + rec1 := attempt.NewBoundedRecorder() + rec1.RecordOverflow(3) + rec2 := attempt.NewBoundedRecorder() + rec2.RecordOverflow(4) - // Only a reject -- no overflow, no conflict. - recorder := attempt.NewBoundedRecorder() - recorder.RecordReject(2, "attempt_context_hash_mismatch") - submitSnapshotIfActive("session-reject-only", selfMember, recorder) + submitSnapshotIfActive("session-ms", 1, rec1) + submitSnapshotIfActive("session-ms", 2, rec2) - if len(cap.recordedFor) != 1 { - t.Fatalf("reject-only snapshot must be submitted; got %d RecordEvidence calls", len(cap.recordedFor)) + ev1, ok1 := takePendingEvidence(ctx.SessionID, 1, ctx.Hash()) + ev2, ok2 := takePendingEvidence(ctx.SessionID, 2, ctx.Hash()) + if !ok1 || !ok2 { + t.Fatalf("both seats must stash their own evidence; got ok1=%v ok2=%v", ok1, ok2) } - snap := cap.recordedSnp[0] - if len(snap.Rejects) == 0 { - t.Fatal("submitted snapshot must carry the reject evidence") + // Seat 1 observed sender 3 only; seat 2 observed sender 4 only -- no bleed. + if ev1.Overflows[3] != 1 || ev1.Overflows[4] != 0 { + t.Fatalf("seat 1 stash must isolate its own evidence; got %+v", ev1.Overflows) } - if len(snap.OperatorSignature) == 0 { - t.Fatal("reject-only snapshot must be signed") + if ev2.Overflows[4] != 1 || ev2.Overflows[3] != 0 { + t.Fatalf("seat 2 stash must isolate its own evidence; got %+v", ev2.Overflows) } } @@ -403,32 +236,3 @@ func TestClearCurrentAttemptHandleForSession_RemovesBinding(t *testing.T) { t.Fatal("binding must be cleared") } } - -func TestSubmitSnapshotIfActive_RecordEvidenceFailureIsLoggedNotPropagated(t *testing.T) { - ResetRoastRetryRegistrationForTest() - ResetSessionHandleRegistryForTest() - t.Cleanup(ResetRoastRetryRegistrationForTest) - t.Cleanup(ResetSessionHandleRegistryForTest) - - innerCoord := roast.NewInMemoryCoordinatorWithSigning( - 1, &deterministicSigner{id: 1}, deterministicVerifier{}, - ) - cap := newCaptureCoordinator(innerCoord) - cap.recordErr = errors.New("synthetic RecordEvidence failure") - RegisterRoastRetryCoordinator(RoastRetryDeps{ - Coordinator: cap, - Signer: &deterministicSigner{id: 1}, - Verifier: deterministicVerifier{}, - SelfMember: 1, - }) - - ctx := newTestContextForSubmit(t, "session-failure") - handle, _ := cap.BeginAttempt(ctx) - SetCurrentAttemptHandleForSession("session-failure", 1, handle, ctx) - - recorder := attempt.NewBoundedRecorder() - recorder.RecordOverflow(3) - - // Must not panic. Caller is unaffected. - submitSnapshotIfActive("session-failure", 1, recorder) -} diff --git a/pkg/frost/signing/roast_transition_exchange_frost_native_roast_retry.go b/pkg/frost/signing/roast_transition_exchange_frost_native_roast_retry.go index 43171150fb..61908b980f 100644 --- a/pkg/frost/signing/roast_transition_exchange_frost_native_roast_retry.go +++ b/pkg/frost/signing/roast_transition_exchange_frost_native_roast_retry.go @@ -22,9 +22,10 @@ import ( // coordinator's local observe handle (only the elected seat collects, for // aggregation); peers' transition bundles are verified against this seat's // own observe handle and, when valid, stored as the next-attempt record. -// - BroadcastForcedSnapshot: a participating seat publishes a forced (empty) -// proof-of-attendance snapshot so it appears in the aggregated bundle and is -// not silence-parked by NextAttempt. It records its OWN snapshot before +// - BroadcastForcedSnapshot: a participating seat publishes its proof-of- +// attendance snapshot -- carrying the real evidence the coarse receive loop +// stashed for the attempt, if any -- so it appears in the aggregated bundle +// and is not silence-parked by NextAttempt. It records its OWN snapshot before // broadcasting, so VerifyBundle's censorship check is meaningful. // - AggregateAndBroadcast: the elected seat aggregates the collected snapshots // into a coordinator-signed bundle, stores it locally via the same @@ -83,6 +84,10 @@ func (e *RoastTransitionExchange) listen() { // produced a transition record to clear, so its bindings would otherwise // linger until the TTL sweep. defer clearObservedAttemptsForSession(e.roastSessionID, e.member) + // Likewise drop any stashed coarse-path evidence this seat captured but never + // broadcast -- a succeeded attempt never reaches BroadcastForcedSnapshot, which + // is what consumes the stash (RFC-21 Phase 7.3 PR2b-2 step 2). + defer clearPendingEvidenceForSession(e.roastSessionID, e.member) for { select { case <-e.ctx.Done(): @@ -180,10 +185,19 @@ func (e *RoastTransitionExchange) HasLostSync() bool { return e.lostSync.Load() } -// BroadcastForcedSnapshot publishes this seat's forced (empty) proof-of-attendance -// snapshot for the attempt, recording it locally BEFORE the broadcast so the -// censorship check on the returned bundle is meaningful. A no-op when the seat -// has no observe binding for the attempt or signing fails. +// BroadcastForcedSnapshot publishes this seat's proof-of-attendance snapshot for +// the attempt, recording it locally BEFORE the broadcast so the censorship check +// on the returned bundle is meaningful. A no-op when the seat has no observe +// binding for the attempt or signing fails. +// +// RFC-21 Phase 7.3 PR2b-2 step 2 (the blame bridge): the snapshot carries the REAL +// evidence the coarse receive loop captured for this attempt, if any. The coarse +// path stashes its recorder snapshot keyed by the same (RoastSessionID, member, +// attemptHash); this consumes it so the seat's broadcast -- and therefore the +// elected coordinator's aggregated bundle -- carries real rejects/overflows/ +// conflicts and NextAttempt's f+1 tally can fire. When the stash is empty (the +// seat observed nothing) the snapshot is the empty proof-of-attendance one, which +// must still be broadcast so the seat is not silence-parked. func (e *RoastTransitionExchange) BroadcastForcedSnapshot( attemptHash [attempt.MessageDigestLength]byte, ) { @@ -191,7 +205,11 @@ func (e *RoastTransitionExchange) BroadcastForcedSnapshot( if !ok { return } - snapshot := roast.NewLocalEvidenceSnapshot(e.member, attemptHash, attempt.Evidence{}) + // takePendingEvidence returns the zero Evidence on a miss, which + // NewLocalEvidenceSnapshot renders as the empty proof-of-attendance snapshot -- + // still broadcast so the seat is not silence-parked. + evidence, _ := takePendingEvidence(e.roastSessionID, e.member, attemptHash) + snapshot := roast.NewLocalEvidenceSnapshot(e.member, attemptHash, evidence) payload, err := snapshot.SignableBytes() if err != nil { e.logger.Warnf("roast transition: forced snapshot signable bytes: [%v]", err) diff --git a/pkg/frost/signing/roast_transition_exchange_frost_native_roast_retry_test.go b/pkg/frost/signing/roast_transition_exchange_frost_native_roast_retry_test.go index 5a73a209fc..9e74d3283b 100644 --- a/pkg/frost/signing/roast_transition_exchange_frost_native_roast_retry_test.go +++ b/pkg/frost/signing/roast_transition_exchange_frost_native_roast_retry_test.go @@ -633,3 +633,99 @@ func TestRoastTransitionExchange_NonElectedDoesNotAggregate(t *testing.T) { t.Fatal("non-elected seat must not store a transition record from AggregateAndBroadcast") } } + +// TestRoastTransitionExchange_StashedEvidenceDrivesExclusion is the end-to-end +// blame-bridge proof (RFC-21 Phase 7.3 PR2b-2 step 2): coarse-path evidence stashed +// by the receive loop is carried by BroadcastForcedSnapshot into the aggregated +// bundle, so NextAttempt's f+1 accuser tally excludes the blamed member -- the +// exclusion the pre-bridge forced-EMPTY snapshots could never trigger. +func TestRoastTransitionExchange_StashedEvidenceDrivesExclusion(t *testing.T) { + ResetObservedAttemptRegistryForTest() + ResetRoastTransitionRegistryForTest() + ResetPendingEvidenceRegistryForTest() + t.Cleanup(ResetObservedAttemptRegistryForTest) + t.Cleanup(ResetRoastTransitionRegistryForTest) + t.Cleanup(ResetPendingEvidenceRegistryForTest) + + roastSessionID := "exchange-blame-bridge-session" + included := []group.MemberIndex{1, 2, 3} + dkgKey := []byte{0x09, 0x0a} + // original group size 3; quorum = ExclusionAccuserQuorum(3, threshold) = + // 3 - 2 + 1 = 2, so two accusers establish a permanent reject exclusion. + const threshold uint = 2 + + ctx := newExchangeTestContext(t, roastSessionID, included, dkgKey) + hash := ctx.Hash() + nodes := newExchangeTestNodes(t, roastSessionID, ctx, dkgKey) + + // Members 1 and 2 each observed a validation reject against member 3 during the + // coarse receive loop; their submit stashed it. Member 3 observed nothing and + // broadcasts only an empty proof-of-attendance snapshot. + rejectAgainst3 := attempt.Evidence{ + Rejects: map[group.MemberIndex][]attempt.RejectEntry{ + 3: {{Reason: "attempt_context_hash_mismatch", Count: 1}}, + }, + } + stashPendingEvidence(roastSessionID, 1, hash, rejectAgainst3) + stashPendingEvidence(roastSessionID, 2, hash, rejectAgainst3) + + bundleMsg, elected := produceTransitionBundleForTest(t, roastSessionID, ctx, nodes) + + // The aggregated bundle must carry real reject evidence from >=2 accusers, not + // the forced-empty snapshots of the pre-bridge design. + bundle := &roast.TransitionMessage{} + if err := bundle.Unmarshal(bundleMsg.Payload); err != nil { + t.Fatalf("unmarshal bundle: %v", err) + } + accusers := 0 + for i := range bundle.Bundle { + if len(bundle.Bundle[i].Rejects) > 0 { + accusers++ + } + } + if accusers < 2 { + t.Fatalf("bundle must carry reject evidence from >=2 accusers; got %d", accusers) + } + + // BroadcastForcedSnapshot consumed the stash. + if PendingEvidenceStashedForTest(roastSessionID, 1) || + PendingEvidenceStashedForTest(roastSessionID, 2) { + t.Fatal("BroadcastForcedSnapshot must consume the stash") + } + + // A non-elected accuser verifies the bundle against its own observe handle and + // computes the next attempt: member 3 meets the f+1 reject quorum -> excluded. + var verifier group.MemberIndex + for _, m := range included { + if m != elected { + verifier = m + break + } + } + binding, ok := observedAttempt(roastSessionID, verifier, hash) + if !ok { + t.Fatalf("non-elected seat %d must still hold its observe binding", verifier) + } + if err := nodes[verifier].coord.VerifyBundle(binding.handle, bundle); err != nil { + t.Fatalf("verify bundle: %v", err) + } + next, err := nodes[verifier].coord.NextAttempt(binding.handle, bundle, threshold, dkgKey) + if err != nil { + t.Fatalf("next attempt: %v", err) + } + + excluded3 := false + for _, m := range next.ExcludedSet { + if m == 3 { + excluded3 = true + } + } + if !excluded3 { + t.Fatalf("member 3 must be excluded by the f+1 reject quorum; excluded=%v", next.ExcludedSet) + } + for _, m := range next.IncludedSet { + if m == 3 { + t.Fatalf("excluded member 3 must not be in the next included set; included=%v", next.IncludedSet) + } + } +} diff --git a/pkg/tbtc/signing_loop.go b/pkg/tbtc/signing_loop.go index 57e7feb1ce..754f932424 100644 --- a/pkg/tbtc/signing_loop.go +++ b/pkg/tbtc/signing_loop.go @@ -119,10 +119,12 @@ type signingRetryLoop struct { doneCheck signingDoneCheckStrategy - // participantSelector dispatches qualified-operator selection. - // Default: legacy retry shuffle. Phase 7 may install a - // ROAST-driven implementation behind the frost_roast_retry - // build tag once AggregateBundle production is wired upstream. + // participantSelector dispatches qualified-operator selection. The default + // build uses the legacy retry shuffle; the frost_roast_retry build installs the + // ROAST-driven selector (signing_loop_selector_frost_roast_retry.go), which + // consumes the transition record the exchange produces -- now carrying the real + // blame evidence the coarse path captures (RFC-21 Phase 7.3 PR2b-2 step 2) -- + // and falls back to the shuffle only on the uniform initial/inactive condition. participantSelector signingParticipantSelector // transitionController, when non-nil, owns the session-scoped ROAST diff --git a/pkg/tbtc/signing_loop_roast_transition_controller_frost_native_roast_retry.go b/pkg/tbtc/signing_loop_roast_transition_controller_frost_native_roast_retry.go index 5ad533407e..c0c2380974 100644 --- a/pkg/tbtc/signing_loop_roast_transition_controller_frost_native_roast_retry.go +++ b/pkg/tbtc/signing_loop_roast_transition_controller_frost_native_roast_retry.go @@ -186,6 +186,15 @@ func (c *roastTransitionControllerImpl) OnAttemptSucceeded() { c.requestTemplate.MemberIndex, hash, ) + // Also drop any coarse-path evidence stashed for this attempt: a succeeded + // attempt never reaches OnAttemptFailed -> BroadcastForcedSnapshot (which is + // what consumes the stash), so without this the entry would leak until the TTL + // sweep (RFC-21 Phase 7.3 PR2b-2 step 2, the blame bridge). + signing.ClearPendingEvidenceOnLocalSuccess( + c.requestTemplate.RoastSessionID, + c.requestTemplate.MemberIndex, + hash, + ) } func (c *roastTransitionControllerImpl) HasLostSync() bool {