Skip to content

Commit 0a6abc9

Browse files
committed
refactor(starfish-core): simplify misbehavior classification and tighten gauge updates
Address pending review comments on PR #11531: - Drop `ErrorSource` enum and `classify_for_source`. With Provable-stays- Provable across all sources, the only source-specific case was the Subscriber's `UnexpectedAuthority` mapping; fold it into `classify_block_header_error` so every classifier path goes through one function. `record_faulty_block_header` no longer takes a source argument. - Pass `context: &Context` to `record_faulty_block_header` and bump the in_memory gauges (`faulty_blocks_provable_by_authority`, `faulty_blocks_unprovable_by_peer` with `source="in_memory"`) on every recorded fault. `flush_faulty_block_header_buffer` resets them to zero, matching the existing pattern for missing_proposals and equivocations. - Drop the verify_fetched_headers recording in commit_syncer fast/regular paths — those errors are fetch-shape and classify as Untracked today. Replace with TODO pointing at the recording entry point for when per-header faults become observable there. - Move `misbehavior_store.reset()` to step 1 of `reinitialize` alongside the other in-memory cache clears.
1 parent 4de5782 commit 0a6abc9

7 files changed

Lines changed: 97 additions & 171 deletions

File tree

crates/starfish/core/src/authority_service.rs

Lines changed: 9 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ use crate::{
4444
SerializedBlockBundleParts, SerializedHeaderAndTransactions, SerializedTransactionsV1,
4545
SerializedTransactionsV2, TransactionFetchMode,
4646
},
47-
scoring_metrics_store::{ErrorSource, MisbehaviorStore},
47+
scoring_metrics_store::MisbehaviorStore,
4848
shard_reconstructor::TransactionMessage,
4949
stake_aggregator::{QuorumThreshold, StakeAggregator},
5050
storage::Store,
@@ -177,12 +177,8 @@ impl<C: CoreThreadDispatcher> AuthorityService<C> {
177177
let signed_block_header: SignedBlockHeader = bcs::from_bytes(&serialized_block_header)
178178
.map_err(ConsensusError::MalformedHeader)
179179
.inspect_err(|e| {
180-
self.misbehavior_store.record_faulty_block_header(
181-
peer,
182-
peer,
183-
e,
184-
ErrorSource::Subscriber,
185-
);
180+
self.misbehavior_store
181+
.record_faulty_block_header(peer, peer, e, &self.context);
186182
})?;
187183

188184
// Reject blocks not produced by the peer.
@@ -194,12 +190,8 @@ impl<C: CoreThreadDispatcher> AuthorityService<C> {
194190
.bundles_with_invalid_parts
195191
.with_label_values(&[peer_hostname, "header", e.name()])
196192
.inc();
197-
self.misbehavior_store.record_faulty_block_header(
198-
peer,
199-
peer,
200-
&e,
201-
ErrorSource::Subscriber,
202-
);
193+
self.misbehavior_store
194+
.record_faulty_block_header(peer, peer, &e, &self.context);
203195
info!("Block with wrong authority from {}: {}", peer, e);
204196
return Err(e);
205197
}
@@ -217,7 +209,7 @@ impl<C: CoreThreadDispatcher> AuthorityService<C> {
217209
peer,
218210
signed_block_header.author(),
219211
&e,
220-
ErrorSource::Subscriber,
212+
&self.context,
221213
);
222214
info!("Invalid block header from {}: {}", peer, e);
223215
return Err(e);
@@ -299,12 +291,8 @@ impl<C: CoreThreadDispatcher> AuthorityService<C> {
299291
.map_err(ConsensusError::MalformedHeader)
300292
.inspect_err(|e| {
301293
// Author is unknown when deserialization fails — blame the peer.
302-
self.misbehavior_store.record_faulty_block_header(
303-
peer,
304-
peer,
305-
e,
306-
ErrorSource::Subscriber,
307-
);
294+
self.misbehavior_store
295+
.record_faulty_block_header(peer, peer, e, &self.context);
308296
})?;
309297

310298
let header_round = signed_block_header.round();
@@ -342,7 +330,7 @@ impl<C: CoreThreadDispatcher> AuthorityService<C> {
342330
peer,
343331
signed_block_header.author(),
344332
&e,
345-
ErrorSource::Subscriber,
333+
&self.context,
346334
);
347335
info!("Invalid additional block header from {}: {}", peer, e);
348336
return Err(e);

crates/starfish/core/src/commit_syncer/fast.rs

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ use crate::{
3737
error::{ConsensusError, ConsensusResult},
3838
header_synchronizer::HeaderSynchronizerHandle,
3939
network::{NetworkClient, SerializedTransactionsV2},
40-
scoring_metrics_store::{ErrorSource, MisbehaviorStore},
40+
scoring_metrics_store::MisbehaviorStore,
4141
transaction_ref::{GenericTransactionRef, TransactionRef},
4242
};
4343

@@ -793,14 +793,11 @@ impl<C: NetworkClient> FastCommitSyncer<C> {
793793
break;
794794
}
795795
Err(e) => {
796-
// Author is unknown for fetch-shape errors and
797-
// for MalformedHeader — blame the serving peer.
798-
inner.misbehavior_store.record_faulty_block_header(
799-
authority,
800-
authority,
801-
&e,
802-
ErrorSource::CommitSyncer,
803-
);
796+
// TODO: verify_fetched_headers currently only returns
797+
// fetch-shape errors (wrong count/ref) which classify
798+
// as Untracked. When per-header faults become observable
799+
// here, record them as peer misbehavior via
800+
// `inner.misbehavior_store.record_faulty_block_header`.
804801
warn!(
805802
"[{}] Failed to verify headers from {}: {}",
806803
inner.sync_type.as_str(),

crates/starfish/core/src/commit_syncer/mod.rs

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ use crate::{
6262
error::{ConsensusError, ConsensusResult},
6363
header_synchronizer::HeaderSynchronizerHandle,
6464
network::NetworkClient,
65-
scoring_metrics_store::{ErrorSource, MisbehaviorStore},
65+
scoring_metrics_store::MisbehaviorStore,
6666
stake_aggregator::{QuorumThreshold, StakeAggregator},
6767
transaction_ref::{GenericTransactionRef, TransactionRef},
6868
};
@@ -253,20 +253,16 @@ impl<C: NetworkClient> Inner<C> {
253253
.map_err(ConsensusError::MalformedHeader)
254254
.inspect_err(|e| {
255255
// Author is unknown when deserialization fails — blame the peer.
256-
self.misbehavior_store.record_faulty_block_header(
257-
peer,
258-
peer,
259-
e,
260-
ErrorSource::CommitSyncer,
261-
);
256+
self.misbehavior_store
257+
.record_faulty_block_header(peer, peer, e, &self.context);
262258
})?;
263259
// The block signature needs to be verified.
264260
if let Err(e) = self.block_verifier.verify(&signed_block_header) {
265261
self.misbehavior_store.record_faulty_block_header(
266262
peer,
267263
signed_block_header.author(),
268264
&e,
269-
ErrorSource::CommitSyncer,
265+
&self.context,
270266
);
271267
return Err(e);
272268
}

crates/starfish/core/src/commit_syncer/regular.rs

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ use crate::{
4242
error::{ConsensusError, ConsensusResult},
4343
header_synchronizer::HeaderSynchronizerHandle,
4444
network::{NetworkClient, SerializedTransactionsV1, SerializedTransactionsV2},
45-
scoring_metrics_store::{ErrorSource, MisbehaviorStore},
45+
scoring_metrics_store::MisbehaviorStore,
4646
transaction_ref::{GenericTransactionRef, GenericTransactionRefAPI as _},
4747
};
4848

@@ -631,22 +631,16 @@ impl<C: NetworkClient> RegularCommitSyncer<C> {
631631
)
632632
.await?;
633633
// 5. Verify headers: count matches and each reference matches requested.
634+
// TODO: verify_fetched_headers currently only returns fetch-shape
635+
// errors (wrong count/ref) which classify as Untracked. When
636+
// per-header faults become observable here, record them as peer
637+
// misbehavior via
638+
// `inner.misbehavior_store.record_faulty_block_header`.
634639
verify_fetched_headers(
635640
target_authority,
636641
request_block_refs,
637642
serialized_block_headers,
638643
)
639-
.inspect_err(|e| {
640-
// Author unknown for fetch-shape errors and MalformedHeader —
641-
// blame the serving peer. Only MalformedHeader will be counted
642-
// (fetch-shape errors classify as Untracked).
643-
inner.misbehavior_store.record_faulty_block_header(
644-
target_authority,
645-
target_authority,
646-
e,
647-
ErrorSource::CommitSyncer,
648-
);
649-
})
650644
}
651645
})
652646
.collect();

crates/starfish/core/src/dag_state.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -357,7 +357,7 @@ impl DagState {
357357
// from the block refs already loaded into the cache.
358358
let recovered_misbehavior_counts = store
359359
.scan_misbehavior_counts()
360-
.expect("Database error reading scoring metrics");
360+
.expect("reading misbehavior counts from storage should not fail");
361361
state.misbehavior_store.initialize_misbehavior_counts(
362362
recovered_misbehavior_counts,
363363
&state.recent_headers_refs_by_authority,
@@ -440,6 +440,7 @@ impl DagState {
440440
self.tx_ref_to_block_digest_by_authority = vec![BTreeMap::new(); num_authorities];
441441
self.pending_commit_votes.clear();
442442
self.pending_acknowledgments.clear();
443+
self.misbehavior_store.reset();
443444

444445
// 2. Reinitialize threshold_clock with current round
445446
let current_round = self.threshold_clock.get_round();
@@ -456,11 +457,10 @@ impl DagState {
456457
}
457458

458459
// 4. Re-initialize misbehavior counts from storage
459-
self.misbehavior_store.reset();
460460
let recovered_misbehavior_counts = self
461461
.store
462462
.scan_misbehavior_counts()
463-
.expect("Database error reading scoring metrics");
463+
.expect("reading misbehavior counts from storage should not fail");
464464
self.misbehavior_store.initialize_misbehavior_counts(
465465
recovered_misbehavior_counts,
466466
&self.recent_headers_refs_by_authority,

crates/starfish/core/src/header_synchronizer.rs

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ use crate::{
5050
dag_state::{DagState, DataSource},
5151
error::{ConsensusError, ConsensusResult},
5252
network::NetworkClient,
53-
scoring_metrics_store::{ErrorSource, MisbehaviorStore},
53+
scoring_metrics_store::MisbehaviorStore,
5454
transactions_synchronizer::TransactionsSynchronizerHandle,
5555
};
5656

@@ -850,12 +850,8 @@ impl<C: NetworkClient, V: BlockVerifier, D: CoreThreadDispatcher> HeaderSynchron
850850
.map_err(ConsensusError::MalformedHeader)
851851
.inspect_err(|e| {
852852
// Author is unknown when deserialization fails — blame the peer.
853-
misbehavior_store.record_faulty_block_header(
854-
peer_index,
855-
peer_index,
856-
e,
857-
ErrorSource::Synchronizer,
858-
);
853+
misbehavior_store
854+
.record_faulty_block_header(peer_index, peer_index, e, context);
859855
})?;
860856

861857
if let Err(e) = block_verifier.verify(&signed_block_header) {
@@ -873,7 +869,7 @@ impl<C: NetworkClient, V: BlockVerifier, D: CoreThreadDispatcher> HeaderSynchron
873869
peer_index,
874870
signed_block_header.author(),
875871
&e,
876-
ErrorSource::Synchronizer,
872+
context,
877873
);
878874
warn!("Invalid block received from {}: {}", peer_index, e);
879875
return Err(e);
@@ -1005,7 +1001,7 @@ impl<C: NetworkClient, V: BlockVerifier, D: CoreThreadDispatcher> HeaderSynchron
10051001
authority_index,
10061002
authority_index,
10071003
e,
1008-
ErrorSource::Synchronizer,
1004+
&context,
10091005
);
10101006
})?;
10111007
block_verifier.verify(&signed_block_header).tap_err(|err|{
@@ -1020,7 +1016,7 @@ impl<C: NetworkClient, V: BlockVerifier, D: CoreThreadDispatcher> HeaderSynchron
10201016
authority_index,
10211017
signed_block_header.author(),
10221018
err,
1023-
ErrorSource::Synchronizer,
1019+
&context,
10241020
);
10251021
warn!("Invalid block header received from {}: {}", authority_index, err);
10261022
})?;

0 commit comments

Comments
 (0)