Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions crates/hotshot/new-protocol/src/consensus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -903,6 +903,14 @@ impl<T: NodeType> Consensus<T> {
epoch: EpochNumber,
outbox: &mut Outbox<ConsensusOutput<T>>,
) -> Protocol {
if view < self.current_view {
debug!(
%view,
current_view = %self.current_view,
"ignoring timeout for stale view"
);
return Protocol::Abort;
}
let we_were_leader = self.is_leader(view, epoch);
if we_were_leader {
if self.proposed_views.contains(&view) {
Expand Down Expand Up @@ -1011,6 +1019,14 @@ impl<T: NodeType> Consensus<T> {
outbox: &mut Outbox<ConsensusOutput<T>>,
) -> Protocol {
let view = certificate.view_number() + 1;
if view < self.current_view {
debug!(
%view,
current_view = %self.current_view,
"ignoring stale timeout certificate"
);
return Protocol::Abort;
}
if self.timeout_certs.contains_key(&view) {
return Protocol::Continue;
}
Expand Down Expand Up @@ -1064,6 +1080,20 @@ impl<T: NodeType> Consensus<T> {
cert2,
proposal,
} = epoch_change;
// Compare epochs (not views) so a node that timed out past a boundary
// it never saw can still recover via a genuinely new epoch change.
if self
.current_epoch
.is_some_and(|current| cert2.data.epoch < current)
{
debug!(
view = %cert2.view_number(),
epoch = %cert2.data.epoch,
current_epoch = ?self.current_epoch.map(|e| *e),
"ignoring stale epoch change for an epoch we have already entered"
);
return Protocol::Abort;
}
// Check if this epoch change is new
if self
.locked_cert
Expand Down
36 changes: 36 additions & 0 deletions crates/hotshot/new-protocol/src/coordinator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -751,6 +751,14 @@ where
);
},
ConsensusOutput::ViewChanged(view, epoch) => {
let current_view = self.consensus.current_view();
if view < current_view {
warn!(
%node, %view, %epoch, %current_view,
"ignoring view change to stale view"
);
return Ok(());
}
info!(%node, %view, %epoch, "view changed");
self.consensus.set_view(view, epoch);
self.timer.reset_with_epoch(view, epoch);
Expand Down Expand Up @@ -894,6 +902,14 @@ where
},
ConsensusMessage::TimeoutVote(timeout_msg) => {
let view = timeout_msg.vote.view_number();
let current_view = self.consensus.current_view();
if view < current_view {
debug!(
%node, %sender, %view, %current_view,
"ignoring timeout vote for stale view"
);
return None;
}
debug!(
%node, %sender, %view,
has_lock = timeout_msg.lock.is_some(),
Expand Down Expand Up @@ -1203,6 +1219,16 @@ where
let _ = respond.send(result);
},
ClientRequest::SeedPreCutover { seed, respond } => {
let current_view = self.consensus.current_view();
if seed.cutover_view > ViewNumber::genesis() && current_view >= seed.cutover_view {
tracing::info!(
%current_view,
cutover_view = *seed.cutover_view,
"coordinator: ignoring pre-cutover seed; already past the cutover",
);
let _ = respond.send(());
return Ok(());
}
tracing::info!(
undecided = seed.undecided.len(),
anchor_view = *seed.decided_anchor.view_number(),
Expand Down Expand Up @@ -1276,6 +1302,16 @@ where
let _ = respond.send(());
},
ClientRequest::SubmitTimeoutVote { vote, respond } => {
let view = vote.view_number();
let current_view = self.consensus.current_view();
if view < current_view {
debug!(
%view, %current_view,
"ignoring bridged timeout vote for stale view"
);
let _ = respond.send(());
return Ok(());
}
self.timeout_collector.accumulate_vote(vote.clone()).await;
self.timeout_one_honest_collector
.accumulate_vote(vote.clone())
Expand Down
79 changes: 77 additions & 2 deletions crates/hotshot/new-protocol/src/tests/consensus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ use crate::{
tests::common::{
assertions::{
any, count_matching, is_leaf_decided, is_proposal, is_request_block_and_header,
is_request_state, is_send_timeout_cert, is_send_timeout_vote, is_vote1, is_vote2,
node_index_for_key,
is_request_state, is_send_timeout_cert, is_send_timeout_vote, is_view_changed,
is_vote1, is_vote2, node_index_for_key,
},
utils::{ConsensusHarness, MockBlock},
},
Expand Down Expand Up @@ -821,3 +821,78 @@ async fn test_decide_not_repeated_for_same_view() {

assert_eq!(1, count_matching(harness.outputs(), is_leaf_decided));
}

/// A timeout for a view below `current_view` is ignored entirely: no
/// timeout vote is signed or broadcast. Regression test for restarted
/// nodes being dragged back to long-past views (e.g. the protocol-upgrade
/// cutover) by joining stale timeouts via `TimeoutOneHonest`.
#[tokio::test]
async fn test_stale_timeout_ignored() {
let mut harness = ConsensusHarness::new(0).await;

harness
.consensus
.set_view(ViewNumber::new(5), EpochNumber::genesis());

// Timeout for view 2 (< current view 5) must not produce a vote.
harness
.apply(ConsensusInput::Timeout(
ViewNumber::new(2),
EpochNumber::genesis(),
))
.await;
assert!(
!any(harness.outputs(), is_send_timeout_vote),
"must not sign a timeout vote for a view below the current view"
);

// Timeout at the current view still produces a vote.
harness
.apply(ConsensusInput::Timeout(
ViewNumber::new(5),
EpochNumber::genesis(),
))
.await;
assert!(
any(harness.outputs(), is_send_timeout_vote),
"timeout at the current view must still produce a vote"
);
}

/// A timeout certificate advancing into a view below `current_view` is
/// ignored: no `ViewChanged` is emitted and the certificate is not
/// rebroadcast. Regression test: stale TCs (e.g. formed around the
/// protocol-upgrade cutover by restarted nodes) must not drag a caught-up
/// node's view backwards.
#[tokio::test]
async fn test_stale_timeout_certificate_ignored() {
let mut harness = ConsensusHarness::new(0).await;
let test_data = TestData::new(6).await;

harness
.consensus
.set_view(ViewNumber::new(5), EpochNumber::genesis());

// TC certifying view 2 advances into view 3 (< current view 5) — ignored.
harness.apply(test_data.views[1].timeout_cert_input()).await;

assert!(
!any(harness.outputs(), is_view_changed),
"stale timeout certificate must not change the view"
);
assert!(
!any(harness.outputs(), is_send_timeout_cert),
"stale timeout certificate must not be rebroadcast"
);

// A TC certifying the current view (advancing into view 6) still works.
harness.apply(test_data.views[4].timeout_cert_input()).await;
assert!(
any(harness.outputs(), is_view_changed),
"timeout certificate over the current view must still advance the view"
);
assert!(
any(harness.outputs(), is_send_timeout_cert),
"timeout certificate over the current view must still be forwarded"
);
}
38 changes: 37 additions & 1 deletion crates/hotshot/new-protocol/src/tests/epoch_change.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use hotshot::types::BLSPubKey;
use hotshot_example_types::node_types::TestTypes;
use hotshot_types::{
data::EpochNumber,
data::{EpochNumber, ViewNumber},
message::Proposal as SignedProposal,
traits::{block_contents::BlockHeader, signature_key::SignatureKey},
utils::is_epoch_transition,
Expand Down Expand Up @@ -247,6 +247,42 @@ async fn test_handle_epoch_change_stale() {
);
}

/// An otherwise-valid EpochChangeMessage for an epoch boundary the node has
/// already crossed is rejected, even when the lock does not catch it (here:
/// no locked cert at all). Regression test: replayed boundary messages must
/// not emit a backwards `ViewChanged` and regress a caught-up node to a
/// long-past view.
#[tokio::test]
async fn test_handle_epoch_change_replay_of_crossed_boundary() {
let mut harness = ConsensusHarness::new(0).await;
let test_data = TestData::new_with_epoch_height(11, EPOCH_HEIGHT).await;

// Simulate a node already deep in epoch 3 — e.g. freshly restarted and
// seeded from its anchor — with no locked cert (genesis safety), so the
// locked-cert staleness check cannot reject the replay.
harness
.consensus
.set_view(ViewNumber::new(25), EpochNumber::new(3));

// Replay the epoch 1 → 2 boundary (view 10, block 10).
let epoch_view = &test_data.views[9];
let proposal: Proposal<TestTypes> = epoch_view.proposal.data.clone();
let epoch_change = EpochChangeMessage {
cert1: epoch_view.cert1.clone(),
cert2: epoch_view.cert2.clone(),
proposal,
};

harness
.apply(ConsensusInput::EpochChange(epoch_change))
.await;

assert!(
!any(harness.outputs(), is_view_changed),
"replayed epoch change for a crossed boundary must not change the view"
);
}

/// Verify that exactly one SendEpochChange is emitted when processing
/// views through a single epoch boundary.
#[tokio::test]
Expand Down
11 changes: 8 additions & 3 deletions crates/hotshot/task-impls/src/consensus/handlers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -488,9 +488,14 @@ pub(crate) async fn handle_timeout<TYPES: NodeType, I: NodeImplementation<TYPES>
// Forward the same vote to any external listener so the espresso bridge
// can submit it to the new-protocol coordinator at the legacy → 0.8
// upgrade boundary. Only emit when a target upgrade is decided (i.e.
// we know a cutover is coming) to avoid spurious events in normal
// operation. The check is cheap; the event payload is small.
if task_state.upgrade_lock.decided_upgrade_cert().is_some() {
// we know a cutover is coming) and the view is not past the cutover,
// to avoid spurious events in normal operation. The check is cheap;
// the event payload is small.
if task_state
.upgrade_lock
.decided_upgrade_cert()
.is_some_and(|cert| view_number <= cert.data.new_version_first_view)
{
broadcast_event(
Event {
view_number,
Expand Down
Loading