diff --git a/crates/hotshot/new-protocol/src/consensus.rs b/crates/hotshot/new-protocol/src/consensus.rs index e5d0f2304d7..d51662dc50e 100644 --- a/crates/hotshot/new-protocol/src/consensus.rs +++ b/crates/hotshot/new-protocol/src/consensus.rs @@ -903,6 +903,14 @@ impl Consensus { epoch: EpochNumber, outbox: &mut Outbox>, ) -> Protocol { + if view < self.current_view { + debug!( + %view, + current_view = %self.current_view, + "ignoring timeout for stale view" + ); + return Protocol::Abort; + } let we_were_leader = self.is_leader(view, epoch); if we_were_leader { if self.proposed_views.contains(&view) { @@ -1011,6 +1019,14 @@ impl Consensus { outbox: &mut Outbox>, ) -> Protocol { let view = certificate.view_number() + 1; + if view < self.current_view { + debug!( + %view, + current_view = %self.current_view, + "ignoring stale timeout certificate" + ); + return Protocol::Abort; + } if self.timeout_certs.contains_key(&view) { return Protocol::Continue; } @@ -1064,6 +1080,20 @@ impl Consensus { cert2, proposal, } = epoch_change; + // Compare epochs (not views) so a node that timed out past a boundary + // it never saw can still recover via a genuinely new epoch change. + if self + .current_epoch + .is_some_and(|current| cert2.data.epoch < current) + { + debug!( + view = %cert2.view_number(), + epoch = %cert2.data.epoch, + current_epoch = ?self.current_epoch.map(|e| *e), + "ignoring stale epoch change for an epoch we have already entered" + ); + return Protocol::Abort; + } // Check if this epoch change is new if self .locked_cert diff --git a/crates/hotshot/new-protocol/src/coordinator.rs b/crates/hotshot/new-protocol/src/coordinator.rs index f41f92617dc..9f6c3a4d676 100644 --- a/crates/hotshot/new-protocol/src/coordinator.rs +++ b/crates/hotshot/new-protocol/src/coordinator.rs @@ -751,6 +751,14 @@ where ); }, ConsensusOutput::ViewChanged(view, epoch) => { + let current_view = self.consensus.current_view(); + if view < current_view { + warn!( + %node, %view, %epoch, %current_view, + "ignoring view change to stale view" + ); + return Ok(()); + } info!(%node, %view, %epoch, "view changed"); self.consensus.set_view(view, epoch); self.timer.reset_with_epoch(view, epoch); @@ -894,6 +902,14 @@ where }, ConsensusMessage::TimeoutVote(timeout_msg) => { let view = timeout_msg.vote.view_number(); + let current_view = self.consensus.current_view(); + if view < current_view { + debug!( + %node, %sender, %view, %current_view, + "ignoring timeout vote for stale view" + ); + return None; + } debug!( %node, %sender, %view, has_lock = timeout_msg.lock.is_some(), @@ -1203,6 +1219,16 @@ where let _ = respond.send(result); }, ClientRequest::SeedPreCutover { seed, respond } => { + let current_view = self.consensus.current_view(); + if seed.cutover_view > ViewNumber::genesis() && current_view >= seed.cutover_view { + tracing::info!( + %current_view, + cutover_view = *seed.cutover_view, + "coordinator: ignoring pre-cutover seed; already past the cutover", + ); + let _ = respond.send(()); + return Ok(()); + } tracing::info!( undecided = seed.undecided.len(), anchor_view = *seed.decided_anchor.view_number(), @@ -1276,6 +1302,16 @@ where let _ = respond.send(()); }, ClientRequest::SubmitTimeoutVote { vote, respond } => { + let view = vote.view_number(); + let current_view = self.consensus.current_view(); + if view < current_view { + debug!( + %view, %current_view, + "ignoring bridged timeout vote for stale view" + ); + let _ = respond.send(()); + return Ok(()); + } self.timeout_collector.accumulate_vote(vote.clone()).await; self.timeout_one_honest_collector .accumulate_vote(vote.clone()) diff --git a/crates/hotshot/new-protocol/src/tests/consensus.rs b/crates/hotshot/new-protocol/src/tests/consensus.rs index 33c960fd2ab..06a99972712 100644 --- a/crates/hotshot/new-protocol/src/tests/consensus.rs +++ b/crates/hotshot/new-protocol/src/tests/consensus.rs @@ -21,8 +21,8 @@ use crate::{ tests::common::{ assertions::{ any, count_matching, is_leaf_decided, is_proposal, is_request_block_and_header, - is_request_state, is_send_timeout_cert, is_send_timeout_vote, is_vote1, is_vote2, - node_index_for_key, + is_request_state, is_send_timeout_cert, is_send_timeout_vote, is_view_changed, + is_vote1, is_vote2, node_index_for_key, }, utils::{ConsensusHarness, MockBlock}, }, @@ -821,3 +821,78 @@ async fn test_decide_not_repeated_for_same_view() { assert_eq!(1, count_matching(harness.outputs(), is_leaf_decided)); } + +/// A timeout for a view below `current_view` is ignored entirely: no +/// timeout vote is signed or broadcast. Regression test for restarted +/// nodes being dragged back to long-past views (e.g. the protocol-upgrade +/// cutover) by joining stale timeouts via `TimeoutOneHonest`. +#[tokio::test] +async fn test_stale_timeout_ignored() { + let mut harness = ConsensusHarness::new(0).await; + + harness + .consensus + .set_view(ViewNumber::new(5), EpochNumber::genesis()); + + // Timeout for view 2 (< current view 5) must not produce a vote. + harness + .apply(ConsensusInput::Timeout( + ViewNumber::new(2), + EpochNumber::genesis(), + )) + .await; + assert!( + !any(harness.outputs(), is_send_timeout_vote), + "must not sign a timeout vote for a view below the current view" + ); + + // Timeout at the current view still produces a vote. + harness + .apply(ConsensusInput::Timeout( + ViewNumber::new(5), + EpochNumber::genesis(), + )) + .await; + assert!( + any(harness.outputs(), is_send_timeout_vote), + "timeout at the current view must still produce a vote" + ); +} + +/// A timeout certificate advancing into a view below `current_view` is +/// ignored: no `ViewChanged` is emitted and the certificate is not +/// rebroadcast. Regression test: stale TCs (e.g. formed around the +/// protocol-upgrade cutover by restarted nodes) must not drag a caught-up +/// node's view backwards. +#[tokio::test] +async fn test_stale_timeout_certificate_ignored() { + let mut harness = ConsensusHarness::new(0).await; + let test_data = TestData::new(6).await; + + harness + .consensus + .set_view(ViewNumber::new(5), EpochNumber::genesis()); + + // TC certifying view 2 advances into view 3 (< current view 5) — ignored. + harness.apply(test_data.views[1].timeout_cert_input()).await; + + assert!( + !any(harness.outputs(), is_view_changed), + "stale timeout certificate must not change the view" + ); + assert!( + !any(harness.outputs(), is_send_timeout_cert), + "stale timeout certificate must not be rebroadcast" + ); + + // A TC certifying the current view (advancing into view 6) still works. + harness.apply(test_data.views[4].timeout_cert_input()).await; + assert!( + any(harness.outputs(), is_view_changed), + "timeout certificate over the current view must still advance the view" + ); + assert!( + any(harness.outputs(), is_send_timeout_cert), + "timeout certificate over the current view must still be forwarded" + ); +} diff --git a/crates/hotshot/new-protocol/src/tests/epoch_change.rs b/crates/hotshot/new-protocol/src/tests/epoch_change.rs index cd8841388cd..5f5ad5712f3 100644 --- a/crates/hotshot/new-protocol/src/tests/epoch_change.rs +++ b/crates/hotshot/new-protocol/src/tests/epoch_change.rs @@ -1,7 +1,7 @@ use hotshot::types::BLSPubKey; use hotshot_example_types::node_types::TestTypes; use hotshot_types::{ - data::EpochNumber, + data::{EpochNumber, ViewNumber}, message::Proposal as SignedProposal, traits::{block_contents::BlockHeader, signature_key::SignatureKey}, utils::is_epoch_transition, @@ -247,6 +247,42 @@ async fn test_handle_epoch_change_stale() { ); } +/// An otherwise-valid EpochChangeMessage for an epoch boundary the node has +/// already crossed is rejected, even when the lock does not catch it (here: +/// no locked cert at all). Regression test: replayed boundary messages must +/// not emit a backwards `ViewChanged` and regress a caught-up node to a +/// long-past view. +#[tokio::test] +async fn test_handle_epoch_change_replay_of_crossed_boundary() { + let mut harness = ConsensusHarness::new(0).await; + let test_data = TestData::new_with_epoch_height(11, EPOCH_HEIGHT).await; + + // Simulate a node already deep in epoch 3 — e.g. freshly restarted and + // seeded from its anchor — with no locked cert (genesis safety), so the + // locked-cert staleness check cannot reject the replay. + harness + .consensus + .set_view(ViewNumber::new(25), EpochNumber::new(3)); + + // Replay the epoch 1 → 2 boundary (view 10, block 10). + let epoch_view = &test_data.views[9]; + let proposal: Proposal = epoch_view.proposal.data.clone(); + let epoch_change = EpochChangeMessage { + cert1: epoch_view.cert1.clone(), + cert2: epoch_view.cert2.clone(), + proposal, + }; + + harness + .apply(ConsensusInput::EpochChange(epoch_change)) + .await; + + assert!( + !any(harness.outputs(), is_view_changed), + "replayed epoch change for a crossed boundary must not change the view" + ); +} + /// Verify that exactly one SendEpochChange is emitted when processing /// views through a single epoch boundary. #[tokio::test] diff --git a/crates/hotshot/task-impls/src/consensus/handlers.rs b/crates/hotshot/task-impls/src/consensus/handlers.rs index 997cabc8a76..bfd6c25f870 100644 --- a/crates/hotshot/task-impls/src/consensus/handlers.rs +++ b/crates/hotshot/task-impls/src/consensus/handlers.rs @@ -488,9 +488,14 @@ pub(crate) async fn handle_timeout // Forward the same vote to any external listener so the espresso bridge // can submit it to the new-protocol coordinator at the legacy → 0.8 // upgrade boundary. Only emit when a target upgrade is decided (i.e. - // we know a cutover is coming) to avoid spurious events in normal - // operation. The check is cheap; the event payload is small. - if task_state.upgrade_lock.decided_upgrade_cert().is_some() { + // we know a cutover is coming) and the view is not past the cutover, + // to avoid spurious events in normal operation. The check is cheap; + // the event payload is small. + if task_state + .upgrade_lock + .decided_upgrade_cert() + .is_some_and(|cert| view_number <= cert.data.new_version_first_view) + { broadcast_event( Event { view_number,