Skip to content

Commit 1eba31b

Browse files
authored
[new-protocol] garbage collect consensus data for views less than decided view (#4415)
* gc for view < decided view * fix snapshot * gc on every decide * fix test
1 parent a2f322f commit 1eba31b

17 files changed

Lines changed: 43 additions & 162 deletions

crates/espresso/node/src/context.rs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ use std::{
22
fmt::{Debug, Display},
33
future::Future,
44
marker::PhantomData,
5-
num::NonZeroU64,
65
sync::Arc,
76
time::{Duration, Instant},
87
};
@@ -126,7 +125,6 @@ where
126125
event_consumer: impl PersistenceEventConsumer + 'static,
127126
proposal_fetcher_cfg: ProposalFetcherConfig,
128127
bootstrap_epoch_catchup_timeout: Duration,
129-
new_protocol_consensus_gc_interval: NonZeroU64,
130128
) -> anyhow::Result<Self> {
131129
let config = &network_config.config;
132130
let pub_key = validator_config.public_key;
@@ -213,7 +211,6 @@ where
213211
.stake_table_capacity(stake_table_capacity)
214212
.timeout_duration(Duration::from_secs(10))
215213
.storage(Arc::clone(&persistence))
216-
.garbage_collection_interval(new_protocol_consensus_gc_interval.get())
217214
.metrics(metrics)
218215
.make();
219216

crates/espresso/node/src/lib.rs

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ pub mod state_cert;
1919
pub mod state_signature;
2020
pub mod util;
2121

22-
use std::{fmt::Debug, marker::PhantomData, num::NonZeroU64, sync::Arc, time::Duration};
22+
use std::{fmt::Debug, marker::PhantomData, sync::Arc, time::Duration};
2323

2424
use alloy::primitives::U256;
2525
use anyhow::Context;
@@ -136,8 +136,6 @@ pub struct NetworkParams {
136136
/// Per-step timeout for the startup stake-table catchup walk
137137
/// (`bootstrap_epoch_window`).
138138
pub bootstrap_epoch_catchup_timeout: Duration,
139-
/// Number of blocks between new-protocol consensus garbage collection passes.
140-
pub new_protocol_consensus_gc_interval: NonZeroU64,
141139
/// The address to advertise as our public API's URL
142140
pub public_api_url: Option<Url>,
143141
/// Cliquenet network address.
@@ -837,7 +835,6 @@ where
837835
event_consumer,
838836
proposal_fetcher_config,
839837
network_params.bootstrap_epoch_catchup_timeout,
840-
network_params.new_protocol_consensus_gc_interval,
841838
)
842839
.await?;
843840

@@ -1667,7 +1664,6 @@ pub mod testing {
16671664
event_consumer,
16681665
Default::default(),
16691666
Duration::from_secs(2),
1670-
NonZeroU64::new(100).unwrap(),
16711667
)
16721668
.await
16731669
.unwrap()

crates/espresso/node/src/options.rs

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ use std::{
66
collections::HashSet,
77
fmt::{self, Formatter},
88
iter::once,
9-
num::NonZeroU64,
109
path::PathBuf,
1110
time::Duration,
1211
};
@@ -341,18 +340,6 @@ pub struct Options {
341340
#[clap(long, env = "ESPRESSO_NODE_BOOTSTRAP_EPOCH_CATCHUP_TIMEOUT", default_value = "30s", value_parser = parse_duration)]
342341
pub bootstrap_epoch_catchup_timeout: Duration,
343342

344-
/// Number of blocks between new-protocol garbage collection passes.
345-
///
346-
/// Controls how often the new-protocol coordinator triggers garbage
347-
/// collection of decided views/epochs across consensus state, VID,
348-
/// vote collectors, and storage.
349-
#[clap(
350-
long,
351-
env = "ESPRESSO_NODE_NEW_PROTOCOL_CONSENSUS_GC_INTERVAL",
352-
default_value = "100"
353-
)]
354-
pub new_protocol_consensus_gc_interval: NonZeroU64,
355-
356343
#[clap(flatten)]
357344
pub logging: logging::Config,
358345

@@ -691,7 +678,6 @@ pub struct PublicNodeConfig {
691678
pub catchup_base_timeout: Duration,
692679
pub local_catchup_timeout: Duration,
693680
pub bootstrap_epoch_catchup_timeout: Duration,
694-
pub new_protocol_consensus_gc_interval: NonZeroU64,
695681
pub catchup_backoff: BackoffParams,
696682
pub proposal_fetcher: ProposalFetcherConfig,
697683
pub libp2p: Libp2pTuning,
@@ -1031,7 +1017,6 @@ impl PublicNodeConfig {
10311017
catchup_base_timeout: opt.catchup_base_timeout,
10321018
local_catchup_timeout: opt.local_catchup_timeout,
10331019
bootstrap_epoch_catchup_timeout: opt.bootstrap_epoch_catchup_timeout,
1034-
new_protocol_consensus_gc_interval: opt.new_protocol_consensus_gc_interval,
10351020
catchup_backoff: opt.catchup_backoff,
10361021
proposal_fetcher: opt.proposal_fetcher_config,
10371022
libp2p: Libp2pTuning::from(opt),

crates/espresso/node/src/run.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,6 @@ where
103103
catchup_base_timeout: opt.catchup_base_timeout,
104104
local_catchup_timeout: opt.local_catchup_timeout,
105105
bootstrap_epoch_catchup_timeout: opt.bootstrap_epoch_catchup_timeout,
106-
new_protocol_consensus_gc_interval: opt.new_protocol_consensus_gc_interval,
107106
libp2p_history_gossip: opt.libp2p_history_gossip,
108107
libp2p_history_length: opt.libp2p_history_length,
109108
libp2p_max_ihave_length: opt.libp2p_max_ihave_length,

crates/espresso/node/src/snapshots/espresso_node__options__tests__config_node_response_postgres.snap

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@ local_catchup_timeout:
4848
bootstrap_epoch_catchup_timeout:
4949
secs: 30
5050
nanos: 0
51-
new_protocol_consensus_gc_interval: 100
5251
catchup_backoff:
5352
factor: 4
5453
base:

crates/hotshot/new-protocol/bench/src/node.rs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -131,14 +131,12 @@ async fn build_coordinator(
131131
upgrade_lock.clone(),
132132
genesis_leaf.clone(),
133133
epoch_height,
134-
100,
135134
);
136135

137136
let vote1_collector = VoteCollector::new(membership.clone(), upgrade_lock.clone());
138137
let vote2_collector = VoteCollector::new(membership.clone(), upgrade_lock.clone());
139138
let timeout_collector = VoteCollector::new(membership.clone(), upgrade_lock.clone());
140139
let timeout_one_honest_collector = VoteCollector::new(membership.clone(), upgrade_lock.clone());
141-
let checkpoint_collector = VoteCollector::new(membership.clone(), upgrade_lock.clone());
142140
let epoch_root_collector =
143141
EpochRootVoteCollector::new(membership.clone(), upgrade_lock.clone());
144142

@@ -198,7 +196,6 @@ async fn build_coordinator(
198196
.vote2_collector(vote2_collector)
199197
.timeout_collector(timeout_collector)
200198
.timeout_one_honest_collector(timeout_one_honest_collector)
201-
.checkpoint_collector(checkpoint_collector)
202199
.epoch_root_collector(epoch_root_collector)
203200
.vid_disperser(vid_disperser)
204201
.vid_reconstructor(vid_reconstructor)

crates/hotshot/new-protocol/src/consensus.rs

Lines changed: 4 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ use hotshot_types::{
2121
check_qc_state_cert_correspondence,
2222
},
2323
simple_vote::{
24-
CheckpointData, HasEpoch, LightClientStateUpdateVote2, QuorumData2, SimpleVote,
25-
TimeoutData2, TimeoutVote2, Vote2Data,
24+
HasEpoch, LightClientStateUpdateVote2, QuorumData2, SimpleVote, TimeoutData2, TimeoutVote2,
25+
Vote2Data,
2626
},
2727
stake_table::{HSStakeTable, StakeTableEntries},
2828
traits::{
@@ -42,8 +42,8 @@ use crate::{
4242
helpers::proposal_commitment,
4343
logging::KeyPrefix,
4444
message::{
45-
Certificate1, Certificate2, CheckpointVote, EpochChangeMessage, Proposal,
46-
ProposalFetchRequest, ProposalMessage, Validated, VidShareMessage, Vote1, Vote2,
45+
Certificate1, Certificate2, EpochChangeMessage, Proposal, ProposalFetchRequest,
46+
ProposalMessage, Validated, VidShareMessage, Vote1, Vote2,
4747
},
4848
outbox::Outbox,
4949
state::{StateRequest, StateResponse},
@@ -115,7 +115,6 @@ pub enum ConsensusOutput<T: NodeType> {
115115
RequestDrbResult(EpochNumber),
116116
SendProposal(SignedProposal<T, Proposal<T>>),
117117
SendVidShares(Vec<VidShareMessage<T>>),
118-
SendCheckpointVote(CheckpointVote<T>),
119118
SendTimeoutVote(TimeoutVote2<T>, Option<Certificate1<T>>),
120119
SendVote1(Vote1<T>),
121120
SendVote2(Vote2<T>),
@@ -199,7 +198,6 @@ pub struct Consensus<T: NodeType> {
199198
node_id: KeyPrefix,
200199
upgrade_lock: UpgradeLock<T>,
201200

202-
garbage_collection_interval: BlockNumber,
203201
pub(crate) epoch_height: BlockNumber,
204202
}
205203

@@ -232,7 +230,6 @@ impl<T: NodeType> Consensus<T> {
232230
upgrade_lock: UpgradeLock<T>,
233231
genesis_leaf: Leaf2<T>,
234232
epoch_height: B,
235-
garbage_collection_interval: B,
236233
) -> Self
237234
where
238235
B: Into<BlockNumber>,
@@ -270,7 +267,6 @@ impl<T: NodeType> Consensus<T> {
270267
state_certs: BTreeMap::new(),
271268
upgrade_lock,
272269
vid_shares: BTreeMap::new(),
273-
garbage_collection_interval: garbage_collection_interval.into(),
274270
epoch_height: epoch_height.into(),
275271
}
276272
}
@@ -1360,10 +1356,6 @@ impl<T: NodeType> Consensus<T> {
13601356
}
13611357
let new_decided_view = max(self.last_decided_view, leaf.view_number());
13621358
let last_decided_leaf = leaf.clone();
1363-
let mut gc = None;
1364-
if leaf.block_header().block_number() % *self.garbage_collection_interval == 0 {
1365-
gc = Some((leaf.view_number(), leaf.justify_qc().epoch()));
1366-
}
13671359
let mut decided = vec![leaf];
13681360
let mut vid_shares = vec![self.signed_vid_share(view)];
13691361

@@ -1381,11 +1373,6 @@ impl<T: NodeType> Consensus<T> {
13811373
if let Some(payload) = self.blocks.get(&parent_view) {
13821374
leaf.fill_block_payload_unchecked(payload.clone());
13831375
}
1384-
if gc.is_none()
1385-
&& leaf.block_header().block_number() % *self.garbage_collection_interval == 0
1386-
{
1387-
gc = Some((leaf.view_number(), leaf.justify_qc().epoch()));
1388-
}
13891376
vid_shares.push(self.signed_vid_share(parent_view));
13901377
decided.push(leaf);
13911378
parent_view = proposal.justify_qc.view_number();
@@ -1403,26 +1390,6 @@ impl<T: NodeType> Consensus<T> {
14031390
cert2: Some(cert2.clone()),
14041391
vid_shares,
14051392
});
1406-
if let Some(gc) = gc {
1407-
let gc_data = CheckpointData {
1408-
view: gc.0,
1409-
epoch: gc.1.unwrap_or_default(),
1410-
};
1411-
let vote = match SimpleVote::create_signed_vote(
1412-
gc_data,
1413-
view,
1414-
&self.public_key,
1415-
&self.private_key,
1416-
&self.upgrade_lock,
1417-
) {
1418-
Ok(vote) => vote,
1419-
Err(err) => {
1420-
warn!(%view, %err, "failed to create signed checkpoint vote");
1421-
return;
1422-
},
1423-
};
1424-
outbox.push_back(ConsensusOutput::SendCheckpointVote(vote));
1425-
}
14261393
}
14271394

14281395
/// Build a `LightClientStateUpdateVote2` for an epoch-root leaf.

crates/hotshot/new-protocol/src/coordinator.rs

Lines changed: 10 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,9 @@ use crate::{
4242
helpers::proposal_commitment,
4343
logging::KeyPrefix,
4444
message::{
45-
self, BlockMessage, Certificate2, CheckpointCertificate, CheckpointVote, ConsensusMessage,
46-
Message, MessageType, Proposal, ProposalFetchMessage, ProposalMessage, TimeoutOneHonest,
47-
TransactionMessage, Unchecked, Vote2,
45+
self, BlockMessage, Certificate2, ConsensusMessage, Message, MessageType, Proposal,
46+
ProposalFetchMessage, ProposalMessage, TimeoutOneHonest, TransactionMessage, Unchecked,
47+
Vote2,
4848
},
4949
network::Network,
5050
outbox::Outbox,
@@ -78,7 +78,6 @@ pub struct Coordinator<T: NodeType, N, S> {
7878
vote2_collector: VoteCollector<T, Vote2<T>, Certificate2<T>>,
7979
timeout_collector: VoteCollector<T, TimeoutVote2<T>, TimeoutCertificate2<T>>,
8080
timeout_one_honest_collector: VoteCollector<T, TimeoutVote2<T>, TimeoutOneHonest<T>>,
81-
checkpoint_collector: VoteCollector<T, CheckpointVote<T>, CheckpointCertificate<T>>,
8281
epoch_root_collector: EpochRootVoteCollector<T>,
8382
epoch_manager: EpochManager<T>,
8483
block_builder: BlockBuilder<T>,
@@ -122,7 +121,6 @@ where
122121
stake_table_capacity: usize,
123122
timeout_duration: Duration,
124123
storage: S,
125-
garbage_collection_interval: u64,
126124
metrics: &dyn Metrics,
127125
) -> Self {
128126
let mut consensus = Consensus::new(
@@ -134,7 +132,6 @@ where
134132
upgrade_lock.clone(),
135133
initializer.anchor_leaf.clone(),
136134
initializer.epoch_height,
137-
garbage_collection_interval,
138135
);
139136

140137
let genesis_cert1 = initializer.high_qc.clone();
@@ -193,10 +190,6 @@ where
193190
membership_coordinator.clone(),
194191
lock.clone(),
195192
))
196-
.checkpoint_collector(VoteCollector::new(
197-
membership_coordinator.clone(),
198-
lock.clone(),
199-
))
200193
.epoch_root_collector(EpochRootVoteCollector::new(
201194
membership_coordinator.clone(),
202195
lock,
@@ -425,14 +418,6 @@ where
425418
return Err(CoordinatorError::regular(e).context("proposal validation"))
426419
}
427420
},
428-
Some(cert) = self.checkpoint_collector.next() => {
429-
finish_measurement(next_input);
430-
let Some(epoch) = cert.epoch() else {
431-
let msg = format!("missing epoch in view {}", cert.view_number());
432-
return Err(CoordinatorError::critical(msg).context("gc certificate"))
433-
};
434-
self.gc(cert.view_number(), epoch);
435-
}
436421
Some(item) = self.block_builder.next() => match item {
437422
Ok(block) => {
438423
finish_measurement(next_input);
@@ -571,20 +556,6 @@ where
571556
debug!(%node, %epoch, "request drb result");
572557
self.epoch_manager.request_drb_result(epoch);
573558
},
574-
ConsensusOutput::SendCheckpointVote(checkpoint_vote) => {
575-
let view = checkpoint_vote.view_number();
576-
let epoch = checkpoint_vote.data.epoch;
577-
debug!(%node, %view, %epoch, "send checkpoint vote");
578-
let message = Message {
579-
sender: self.public_key.clone(),
580-
message_type: MessageType::Consensus(ConsensusMessage::Checkpoint(
581-
checkpoint_vote,
582-
)),
583-
};
584-
self.network
585-
.broadcast(message.view_number(), &message)
586-
.map_err(|e| CoordinatorError::from(e).context("broadcast checkpoint vote"))?
587-
},
588559
ConsensusOutput::LeafDecided {
589560
leaves,
590561
cert1,
@@ -601,6 +572,13 @@ where
601572
if let Some(cert2) = cert2 {
602573
self.storage.append_cert2(cert2.view_number, cert2.clone());
603574
}
575+
// `leaves` is ordered newest first
576+
// garbage collect the data for views < decided view
577+
if let Some(newest) = leaves.first() {
578+
let gc_view = newest.view_number();
579+
let gc_epoch = newest.justify_qc().epoch().unwrap_or_default();
580+
self.gc(gc_view, gc_epoch);
581+
}
604582
for leaf in leaves {
605583
self.epoch_manager.handle_leaf_decided(leaf);
606584
}
@@ -940,16 +918,6 @@ where
940918
);
941919
Some(ConsensusInput::EpochChange(epoch_change))
942920
},
943-
ConsensusMessage::Checkpoint(checkpoint) => {
944-
debug!(
945-
%node, %sender,
946-
view = %checkpoint.view_number(),
947-
epoch = %checkpoint.data.epoch,
948-
"recv checkpoint vote"
949-
);
950-
self.checkpoint_collector.accumulate_vote(checkpoint).await;
951-
None
952-
},
953921
},
954922
MessageType::Block(msg) => {
955923
match msg {
@@ -1401,7 +1369,6 @@ where
14011369
fn gc(&mut self, view: ViewNumber, epoch: EpochNumber) {
14021370
info!(node = %self.node_id, %view, "garbage collecting");
14031371
self.consensus.gc(view, epoch);
1404-
self.checkpoint_collector.gc(view, epoch);
14051372
let _ = self.network.gc(view); // TODO
14061373
self.state_manager.gc(view);
14071374
self.vid_disperser.gc(view);

0 commit comments

Comments
 (0)