Skip to content

Commit 14a37d0

Browse files
authored
Demo upgrade to New Protocol + Fixes (#4406)
* add upgrade test for new proto * fix test * fix cutover * harden upgrade * reduce comments
1 parent 284de7a commit 14a37d0

16 files changed

Lines changed: 290 additions & 42 deletions

File tree

.env

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,9 @@ ESPRESSO_BUILDER_WEBSERVER_RESPONSE_TIMEOUT_DURATION=1500ms
170170
ESPRESSO_BUILDER_BUFFER_VIEW_NUM_COUNT=50
171171

172172
# Load generator
173-
ESPRESSO_SUBMIT_TRANSACTIONS_DELAY=2s
173+
# Mean submit delay defaults to 2s (set per-process in process-compose.yaml as
174+
# ${ESPRESSO_SUBMIT_TRANSACTIONS_DELAY:-2s} so tests can override it). Keeping the
175+
# default out of this file avoids a .env-vs-env precedence ambiguity for the override.
174176
ESPRESSO_SUBMIT_TRANSACTIONS_PUBLIC_PORT=24010
175177
ESPRESSO_SUBMIT_TRANSACTIONS_PRIVATE_PORT=24020
176178
ESPRESSO_SUBMIT_TRANSACTIONS_PRIVATE_FALLBACK_PORT=24030

.github/workflows/test.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,7 @@ jobs:
360360
- test-name: test_native_demo_drb_header_upgrade
361361
- test-name: test_native_demo_fee_to_drb_header_upgrade
362362
- test-name: test_native_demo_epoch_reward_upgrade
363+
- test-name: test_native_demo_new_protocol_upgrade
363364
- test-name: test_native_demo_da_committee
364365
- test-name: test_native_demo_ff_base
365366

crates/espresso/node/src/consensus_handle.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use hotshot_new_protocol::{
1111
coordinator::{Coordinator, CoordinatorOutput, error::Severity},
1212
cutover::{
1313
CutoverGate, extract_pre_cutover_seed, forward_legacy_epoch_changes,
14-
forward_legacy_timeout_votes,
14+
forward_legacy_high_qc, forward_legacy_timeout_votes,
1515
},
1616
network::Network,
1717
state::UpdateLeaf,
@@ -155,6 +155,10 @@ where
155155
legacy_event_rx.clone(),
156156
client_api.clone(),
157157
));
158+
spawn(forward_legacy_high_qc(
159+
legacy_event_rx.clone(),
160+
client_api.clone(),
161+
));
158162
spawn(forward_legacy_epoch_changes(
159163
legacy_event_rx.clone(),
160164
client_api.clone(),

crates/espresso/node/src/context.rs

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -206,15 +206,7 @@ where
206206
.membership_coordinator(membership_coordinator.clone())
207207
.network(coordinator_network)
208208
.initializer(&initializer_for_coordinator)
209-
.upgrade_lock({
210-
// TODO: The Coordinator and HotShot each create their own UpgradeLock
211-
// from the same inputs. They need to share a single lock so that upgrade
212-
// certificate updates are visible to both.
213-
UpgradeLock::from_certificate(
214-
upgrade,
215-
&initializer_for_coordinator.decided_upgrade_certificate,
216-
)
217-
})
209+
.upgrade_lock(handle.hotshot.upgrade_lock.clone())
218210
.public_key(validator_config.public_key)
219211
.private_key(validator_config.private_key.clone())
220212
.state_private_key(validator_config.state_private_key.clone())

crates/hotshot/new-protocol/src/client.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use committable::Commitment;
55
use hotshot_types::{
66
data::{EpochNumber, Leaf2, ViewNumber},
77
message::Proposal as SignedProposal,
8+
simple_certificate::QuorumCertificate2,
89
simple_vote::TimeoutVote2,
910
traits::{leaf_fetcher_network::LeafFetcherNetwork, node_implementation::NodeType},
1011
utils::StateAndDelta,
@@ -123,6 +124,14 @@ impl<T: NodeType> ClientApi<T> {
123124
.await
124125
}
125126

127+
/// Forward the last legacy view's QC so the first new-protocol leader can
128+
/// propose on it even if the cutover seed was snapshotted before it formed.
129+
pub async fn submit_legacy_high_qc(&self, qc: QuorumCertificate2<T>) -> Result<(), QueryError> {
130+
let (respond, rx) = oneshot::channel();
131+
self.call(ClientRequest::SubmitLegacyHighQc { qc, respond }, rx)
132+
.await
133+
}
134+
126135
/// Refresh the coordinator network's peer set for `epoch`.
127136
pub async fn bump_network_epoch(&self, epoch: EpochNumber) -> Result<(), QueryError> {
128137
let (respond, rx) = oneshot::channel();
@@ -226,6 +235,10 @@ pub(crate) enum ClientRequest<T: NodeType> {
226235
vote: TimeoutVote2<T>,
227236
respond: oneshot::Sender<()>,
228237
},
238+
SubmitLegacyHighQc {
239+
qc: QuorumCertificate2<T>,
240+
respond: oneshot::Sender<()>,
241+
},
229242
BumpNetworkEpoch {
230243
epoch: EpochNumber,
231244
respond: oneshot::Sender<()>,

crates/hotshot/new-protocol/src/consensus.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -304,16 +304,21 @@ impl<T: NodeType> Consensus<T> {
304304
let view = seed.decided_anchor.view_number();
305305
if view > self.last_decided_view {
306306
self.last_decided_view = view;
307-
self.last_decided_leaf = seed.decided_anchor;
307+
self.last_decided_leaf = seed.decided_anchor.clone();
308308
}
309309

310+
let mut highest_seeded_block: u64 = seed.decided_anchor.block_header().block_number();
311+
310312
for leaf in seed.undecided {
311313
let view = leaf.view_number();
312314
let justify_qc = leaf.justify_qc().clone();
313315
self.register_legacy_qc(&justify_qc);
314316

315317
let block_number = leaf.block_header().block_number();
316318
let epoch = EpochNumber::new(epoch_from_block_number(block_number, *self.epoch_height));
319+
if block_number > highest_seeded_block {
320+
highest_seeded_block = block_number;
321+
}
317322

318323
let view_change_evidence = leaf.view_change_evidence.clone().and_then(|e| match e {
319324
ViewChangeEvidence2::Timeout(tc) => Some(tc),
@@ -355,6 +360,13 @@ impl<T: NodeType> Consensus<T> {
355360
if last_pre_cutover > self.current_view {
356361
self.current_view = last_pre_cutover;
357362
}
363+
let seeded_epoch = EpochNumber::new(epoch_from_block_number(
364+
highest_seeded_block,
365+
*self.epoch_height,
366+
));
367+
if self.current_epoch.is_none_or(|cur| cur < seeded_epoch) {
368+
self.current_epoch = Some(seeded_epoch);
369+
}
358370
}
359371

360372
/// Register `justify_qc` as Cert1 for its parent view (idempotent)

crates/hotshot/new-protocol/src/coordinator.rs

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1315,6 +1315,41 @@ where
13151315
}
13161316
let _ = respond.send(());
13171317
},
1318+
ClientRequest::SubmitLegacyHighQc { qc, respond } => {
1319+
// QC certifies the last legacy view; cutover view is the next.
1320+
// Register idempotently so the smooth-start precondition holds
1321+
// regardless of arrival order vs. the cutover seed.
1322+
let qc_view = qc.view_number();
1323+
let cutover_view = qc_view + 1;
1324+
self.consensus.register_legacy_qc(&qc);
1325+
1326+
// Still parked on the last legacy view (seed landed without this
1327+
// QC, waiting out the timer) and not yet skipped via TC2: propose
1328+
// the cutover view on the real QC now. Self-idempotent — once
1329+
// started, `cur_view` advances past `qc_view` and `maybe_propose`
1330+
// dedups by `proposed_views`.
1331+
let cur_view = self.consensus.current_view();
1332+
if cur_view == qc_view
1333+
&& self.consensus.timeout_cert_at(cutover_view).is_none()
1334+
&& self.consensus.cert1_at(qc_view).is_some()
1335+
&& self.consensus.proposal_at(qc_view).is_some()
1336+
{
1337+
tracing::info!(
1338+
%cutover_view,
1339+
"bridged late legacy high QC; proposing cutover view on it (no timeout)"
1340+
);
1341+
self.start();
1342+
while let Some(output) = self.outbox.pop_front() {
1343+
if let Err(err) = self.process_consensus_output(output) {
1344+
tracing::warn!(
1345+
%err,
1346+
"error processing bridged-high-qc bootstrap output"
1347+
);
1348+
}
1349+
}
1350+
}
1351+
let _ = respond.send(());
1352+
},
13181353
ClientRequest::BumpNetworkEpoch { epoch, respond } => {
13191354
if let Err(err) = self
13201355
.network

crates/hotshot/new-protocol/src/cutover.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,23 @@ pub async fn forward_legacy_timeout_votes<T: NodeType>(
159159
}
160160
}
161161

162+
/// Forward the last legacy view's QC into the coordinator, so the cutover-view
163+
/// leader can propose on it instead of waiting out a timeout when the cutover
164+
/// seed was snapshotted before the QC formed.
165+
pub async fn forward_legacy_high_qc<T: NodeType>(
166+
legacy_event_rx: InactiveReceiver<Event<T>>,
167+
client_api: ClientApi<T>,
168+
) {
169+
let mut rx = legacy_event_rx.activate_cloned();
170+
while let Some(event) = rx.next().await {
171+
if let EventType::LegacyHighQcFormed { qc } = event.event
172+
&& let Err(err) = client_api.submit_legacy_high_qc(qc).await
173+
{
174+
tracing::warn!(%err, "failed to forward legacy high QC to new-protocol coordinator");
175+
}
176+
}
177+
}
178+
162179
/// Forward legacy epoch transitions into `bump_network_epoch`.
163180
/// `epoch_height == 0` disables forwarding.
164181
pub async fn forward_legacy_epoch_changes<T: NodeType>(

crates/hotshot/new-protocol/src/tests/legacy_cutover.rs

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ use versions::{NEW_PROTOCOL_VERSION, Upgrade, version};
5151
use crate::{
5252
consensus::ConsensusOutput,
5353
coordinator::{Coordinator, CoordinatorOutput, error::Severity, timer::Timer},
54-
cutover::{CutoverGate, forward_legacy_timeout_votes},
54+
cutover::{CutoverGate, forward_legacy_high_qc, forward_legacy_timeout_votes},
5555
helpers::test_upgrade_lock,
5656
network::cliquenet::Cliquenet,
5757
outbox::Outbox,
@@ -421,11 +421,14 @@ async fn spawn_node(
421421
let legacy_event_rx = legacy.read().await.event_stream_known_impl().deactivate();
422422
bg_handles.push(
423423
tokio::spawn(forward_legacy_timeout_votes(
424-
legacy_event_rx,
424+
legacy_event_rx.clone(),
425425
client_api.clone(),
426426
))
427427
.abort_handle(),
428428
);
429+
bg_handles.push(
430+
tokio::spawn(forward_legacy_high_qc(legacy_event_rx, client_api.clone())).abort_handle(),
431+
);
429432

430433
let (decision_tx, decision_rx) = mpsc::unbounded_channel::<DecisionEvent>();
431434
let runner_abort = tokio::spawn(run_cutover_node(
@@ -670,14 +673,16 @@ const PREDICTED_CUTOVER_VIEW: u64 = UPGRADE_VIEW + 20;
670673
/// subset of `{V-2, V-1, V, V+1, V+2}`.
671674
const V: u64 = PREDICTED_CUTOVER_VIEW - 1;
672675

673-
/// Happy path. `cutover_view - 1` reliably has no QC at cutover, so
674-
/// the new protocol skips it via TC2.
676+
/// Happy path. The QC for `cutover_view - 1` forms in the legacy protocol and
677+
/// rides into the new protocol via the cutover seed's `high_qc`, so the first
678+
/// leader proposes directly on it (no TC2 skip, no timer wait) and every view
679+
/// is decided.
675680
#[tokio::test(flavor = "multi_thread")]
676681
async fn legacy_runs_upgrade_then_new_protocol_takes_over() {
677682
run_cutover_test(
678683
4,
679684
6,
680-
views([PREDICTED_CUTOVER_VIEW - 1]),
685+
BTreeSet::new(),
681686
Duration::from_secs(180),
682687
DEFAULT_NEW_PROTO_VIEW_TIMEOUT,
683688
Vec::new(),

crates/hotshot/task-impls/src/consensus/mod.rs

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ use hotshot_types::{
1414
consensus::OuterConsensus,
1515
data::{EpochNumber, ViewNumber},
1616
epoch_membership::EpochMembershipCoordinator,
17-
event::Event,
17+
event::{Event, EventType},
1818
message::UpgradeLock,
1919
simple_certificate::{NextEpochQuorumCertificate2, QuorumCertificate2, TimeoutCertificate2},
2020
simple_vote::{HasEpoch, NextEpochQuorumVote2, QuorumVote2, TimeoutVote2},
@@ -35,7 +35,7 @@ use self::handlers::{
3535
};
3636
use crate::{
3737
events::HotShotEvent,
38-
helpers::{broadcast_view_change, validate_qc_and_next_epoch_qc},
38+
helpers::{broadcast_event, broadcast_view_change, validate_qc_and_next_epoch_qc},
3939
vote_collection::{EpochRootVoteCollectorsMap, VoteCollectorsMap},
4040
};
4141

@@ -261,6 +261,32 @@ impl<TYPES: NodeType, I: NodeImplementation<TYPES>> ConsensusTaskState<TYPES, I>
261261
.await;
262262
}
263263
},
264+
HotShotEvent::Qc2Formed(either::Left(qc))
265+
if self.upgrade_lock.new_protocol_active(self.cur_view)
266+
&& !self.upgrade_lock.new_protocol_active(qc.view_number()) =>
267+
{
268+
// Cutover boundary only: the gated proposal path won't land this
269+
// last-legacy QC in `high_qc`, so capture it here for
270+
// `extract_pre_cutover_seed` to carry across. `update_high_qc` is
271+
// monotone, so this is a no-op outside the cutover window.
272+
let mut consensus_writer = self.consensus.write().await;
273+
let _ = consensus_writer.update_high_qc(qc.clone());
274+
drop(consensus_writer);
275+
if let Err(e) = self.storage.update_high_qc2(qc.clone()).await {
276+
tracing::warn!("Failed to persist boundary high QC: {e}");
277+
}
278+
// Forward to the espresso bridge -> new-protocol coordinator, in case the
279+
// cutover seed was snapshotted before this QC finished assembling. Only the
280+
// cutover-view leader reaches this arm, so it lands exactly where needed.
281+
broadcast_event(
282+
Event {
283+
view_number: qc.view_number(),
284+
event: EventType::LegacyHighQcFormed { qc: qc.clone() },
285+
},
286+
&self.output_event_stream,
287+
)
288+
.await;
289+
},
264290
_ => {},
265291
}
266292

@@ -279,7 +305,24 @@ impl<TYPES: NodeType, I: NodeImplementation<TYPES>> TaskState for ConsensusTaskS
279305
_receiver: &Receiver<Arc<Self::Event>>,
280306
) -> Result<()> {
281307
if self.upgrade_lock.new_protocol_active(self.cur_view) {
282-
return Ok(());
308+
// Past cutover: still admit votes/QCs for strictly-pre-cutover views so
309+
// the cutover leader can finish the last legacy QC; everything else
310+
// (proposing, voting, view changes) stays shut down.
311+
let admit = match event.as_ref() {
312+
HotShotEvent::QuorumVoteRecv(vote) => {
313+
!self.upgrade_lock.new_protocol_active(vote.view_number())
314+
},
315+
HotShotEvent::EpochRootQuorumVoteRecv(vote) => {
316+
!self.upgrade_lock.new_protocol_active(vote.view_number())
317+
},
318+
HotShotEvent::Qc2Formed(either::Left(qc)) => {
319+
!self.upgrade_lock.new_protocol_active(qc.view_number())
320+
},
321+
_ => false,
322+
};
323+
if !admit {
324+
return Ok(());
325+
}
283326
}
284327
self.handle(event, sender.clone()).await
285328
}

0 commit comments

Comments
 (0)