@@ -863,6 +863,46 @@ pub fn window_content_from_accum(mb_idx: u64) -> Option<(Vec<[u8; 32]>, Vec<[u8;
863863 Some((mb_hashes, vrf_outputs))
864864}
865865
866+ /// v36: deterministic recent-Heartbeat liveness for Phase-2A producer eligibility.
867+ ///
868+ /// Phase-2A previously read the recent-Heartbeat bit from `load_account(reg).heartbeat_slots`, i.e. the
869+ /// persisted `accounts` CF — which is written by a DETACHED best-effort persist (microblocks are the
870+ /// authoritative store). The eligible snapshot runs async, so each committee member read a different
871+ /// persist-lag prefix → divergent eligible_producers → the QC-bound epoch_commitment split → 2f+1 never
872+ /// formed → finality stall. Fix: derive the set from the COMMITTED block bodies (synchronously saved at
873+ /// apply, canonical + identical on every node), bounded to `scan_end`. Returns the supers that sent a
874+ /// Heartbeat whose ANCHOR fell in the current or previous subwindow (same epoch — mirrors the old bitmask
875+ /// recency) and that was included in a block at-or-below scan_end. A pure function of the canonical chain
876+ /// ≤ scan_end ⇒ identical on every committee member, with NO live-tip dependence. Scans ≤2 subwindows
877+ /// (~2880 blocks), off the production path; bodies are retained 6 epochs, far beyond this window.
878+ /// (current, previous) global subwindow indices (anchor/1440 = epoch*10 + subwindow) for `scan_end`. The
879+ /// previous counts ONLY within the SAME epoch — at a subwindow-0/epoch boundary it returns current==prev
880+ /// (no previous), mirroring the prior `heartbeat_epoch==hb_epoch` + `if cur_sub>0` bitmask recency. Pure
881+ /// ⇒ unit-tested for the off-by-one + cross-epoch boundary.
882+ fn recency_subwindow_indices(scan_end: u64) -> (u64, u64) {
883+ let cur_idx = scan_end / 1440;
884+ let cur_sub = (scan_end % 14400) / 1440; // 0..9 within the epoch
885+ let prev_idx = if cur_sub > 0 { cur_idx.saturating_sub(1) } else { cur_idx };
886+ (cur_idx, prev_idx)
887+ }
888+
889+ fn recent_heartbeat_senders(storage: &crate::storage::Storage, scan_end: u64) -> std::collections::HashSet<String> {
890+ let (cur_idx, prev_idx) = recency_subwindow_indices(scan_end);
891+ let start = prev_idx.saturating_mul(1440);
892+ let mut set: std::collections::HashSet<String> = std::collections::HashSet::new();
893+ for h in start..=scan_end {
894+ if let Ok(Some(block)) = storage.load_microblock_auto_format(h) {
895+ for tx in &block.transactions {
896+ if let qnet_state::TransactionType::Heartbeat { node_id, anchor_height, .. } = &tx.tx_type {
897+ let s = anchor_height / 1440;
898+ if s == cur_idx || s == prev_idx { set.insert(node_id.clone()); }
899+ }
900+ }
901+ }
902+ }
903+ set
904+ }
905+
866906// ═══════════════════════════════════════════════════════════════════════════════
867907// PRODUCTION v2.50: Lock-free global storage with OnceCell + Arc
868908// RocksDB does NOT support multiple connections - single instance shared immutably
@@ -4098,24 +4138,22 @@ impl BlockchainNode {
40984138 // returning node's next HBC is always in range. Phase 1 builds the
40994139 // registered-super-node set; Phase 2A is the single eligibility path.
41004140 let scan_end = macroblock_index * 90;
4101- const PHASE_2A_SCAN_BLOCKS: u64 = 14_400;
4102- let scan_start = scan_end.saturating_sub(PHASE_2A_SCAN_BLOCKS);
4103-
4104- // Phase 1: registered Super node IDs (necessary, not sufficient).
4105- let mut registered_super_nodes: std::collections::HashSet<String> = std::collections::HashSet::new();
4106- for height in scan_start..=scan_end {
4107- if let Ok(Some(block)) = storage.load_microblock_auto_format(height) {
4108- for tx in &block.transactions {
4109- if let qnet_state::TransactionType::NodeRegistration {
4110- node_id, node_type, ..
4111- } = &tx.tx_type {
4112- if *node_type == qnet_state::NodeType::Super {
4113- registered_super_nodes.insert(node_id.clone());
4114- }
4115- }
4116- }
4117- }
4118- }
4141+
4142+ // Phase 1: registered Super node IDs (necessary, not sufficient). Sourced from the
4143+ // deterministic, snapshot-carried srtr_ registry index — NOT a recent-block body scan.
4144+ // A body scan only saw registrations inside the last epoch, so a node whose NodeRegistration
4145+ // is older than that window (every genesis node: block 0; any super away beyond the L2
4146+ // carryover) could never re-enter the producer/committee set after a snapshot cold-join —
4147+ // it synced but stayed ineligible. The registry index holds EVERY chain-confirmed super
4148+ // regardless of registration age (the same source the reward roster uses), so a returning
4149+ // node re-enters through the Phase-2A heartbeat gate below WITHOUT re-registration. Phase-2A
4150+ // (recent on-chain Heartbeat) absorbs any registration-timing edge: a just-applied
4151+ // registration not yet heartbeated is filtered out, so eligible-set membership stays stable.
4152+ let registered_super_nodes: std::collections::HashSet<String> =
4153+ match storage.super_registrations_sorted() {
4154+ Ok(regs) => regs.into_iter().map(|(node_id, _w)| node_id).collect(),
4155+ Err(_) => std::collections::HashSet::new(),
4156+ };
41194157
41204158 // v35: Phase-2A admits a registered Super node on UNFORGEABLE on-chain liveness —
41214159 // a Heartbeat-TX in the current or previous subwindow (Account.heartbeat_slots),
@@ -4124,18 +4162,25 @@ impl BlockchainNode {
41244162 {
41254163 let hb_epoch = scan_end / 14400;
41264164 let cur_sub = ((scan_end % 14400) / 1440) as u16;
4165+ // Deterministic recent-Heartbeat set from committed block bodies bounded to end_height
4166+ // (NOT the async-lagging accounts CF, whose per-node persist lag gave a divergent eligible
4167+ // set → epoch_commitment split → finality stall). Computed once; identical on every member.
4168+ let recent_hb = recent_heartbeat_senders(storage, scan_end);
41274169 let mut regs: Vec<&String> = registered_super_nodes.iter().collect();
41284170 regs.sort();
41294171 let mut added_tally = 0usize;
41304172 for reg in regs {
41314173 if eligible_ids.contains(reg) { continue; }
4132- let acct = match storage.load_account(reg).ok().flatten() {
4133- Some(a) if a.heartbeat_epoch == hb_epoch => a,
4174+ // Determinism: the srtr_ candidate pool is read at the live applied tip, which under
4175+ // async production can run ahead of end_height; bound each re-entry candidate to a
4176+ // registration CONFIRMED by end_height so every committee member admits the SAME set
4177+ // (an ahead-of-end_height registration is excluded identically everywhere). Genesis
4178+ // nodes carry reg_height=0 ⇒ always pass.
4179+ match storage.node_reg_height(reg) {
4180+ Ok(Some(h)) if h <= scan_end => {}
41344181 _ => continue,
4135- };
4136- let mut recent = acct.heartbeat_slots & (1u16 << cur_sub.min(9));
4137- if cur_sub > 0 { recent |= acct.heartbeat_slots & (1u16 << (cur_sub - 1)); }
4138- if recent == 0 { continue; }
4182+ }
4183+ if !recent_hb.contains(reg) { continue; }
41394184 let rep = (reputation_map.get(reg).copied()
41404185 .unwrap_or(qnet_consensus::deterministic_reputation::INITIAL_REPUTATION)
41414186 .clamp(0.0, 100.0) * 100.0).round() as u32;
@@ -27732,6 +27777,24 @@ mod tests {
2773227777 assert!(!checkpoint_participation_allowed(false, 0, mb_end)); // syncing, no window → defer
2773327778 }
2773427779
27780+ // Phase-2A recency window (deterministic heartbeat eligibility): current subwindow + previous ONLY
27781+ // within the same epoch. The epoch boundary (subwindow 0) must NOT bridge to the prior epoch's
27782+ // subwindow 9 — that is the off-by-one that would change WHO is eligible vs the old bitmask gate.
27783+ #[test]
27784+ fn recency_subwindow_indices_boundary() {
27785+ // Mid-epoch: previous = current-1 (same epoch).
27786+ assert_eq!(recency_subwindow_indices(5 * 1440), (5, 4));
27787+ assert_eq!(recency_subwindow_indices(5 * 1440 + 100), (5, 4));
27788+ // Epoch start (subwindow 0): no previous within the epoch ⇒ prev == cur (degenerates to {cur}).
27789+ assert_eq!(recency_subwindow_indices(0), (0, 0));
27790+ assert_eq!(recency_subwindow_indices(14400), (10, 10)); // epoch1 sub0 ⇒ (10,10), NOT (10,9)
27791+ assert_eq!(recency_subwindow_indices(14400 + 50), (10, 10));
27792+ // Epoch1 subwindow 1: previous is sub0 of the SAME epoch (10), never the prior epoch's sub9 (9).
27793+ assert_eq!(recency_subwindow_indices(14400 + 1440), (11, 10));
27794+ // Epoch2 subwindow 0: again no bridge.
27795+ assert_eq!(recency_subwindow_indices(2 * 14400), (20, 20));
27796+ }
27797+
2773527798 // committee_for_height determinism: genesis era ⇒ None (caller uses the genesis committee), and
2773627799 // an ABSENT N-2 snapshot ⇒ None — REJECT, never a per-node walk-back guess (which would fork).
2773727800 #[test]
0 commit comments