@@ -22214,108 +22214,46 @@ impl BlockchainNode {
2221422214 let rotation_tracker_clone = rotation_tracker.clone();
2221522215
2221622216 // ═══════════════════════════════════════════════════════════════════════════
22217- // v14.6: STALE-ROUND GUARD ( pre-save re-check of consensus view)
22217+ // v22: pre-save STALE-ROUND GUARD removed
2221822218 // ═══════════════════════════════════════════════════════════════════════════
22219- // Problem root-caused from testnet h=4711 split-brain:
22219+ // The pre-broadcast `effective_round_now > microblock.timeout_round`
22220+ // check (legacy v14.6 / v15.11) was a self-yield mechanism designed
22221+ // for the round-based microblock failover model. v22 collapsed that
22222+ // model to pure VRF + time-derived skip-slot (see
22223+ // `v22_compute_empty_slot_offset` and the producer loop), so:
2222022224 //
22221- // * Block production takes 8-15 s (entropy bft_wait → PoH mixing →
22222- // state_root → Dilithium sign → serialize). The producer is busy
22223- // for that entire window.
22224- // * During the window, validators may cast signed timeout votes.
22225- // Enough of them reach f+1 / 2f+1 → adopted / certified round
22226- // advances past 0. Other nodes switch to the failover producer.
22227- // * The slow primary returns from its pipeline with a block signed
22228- // for the OLD round (e.g. timeout_round=0). The failover producer
22229- // meanwhile has already produced its own block for the NEW round
22230- // (e.g. timeout_round=16). Both blocks are valid-looking at height
22231- // h; different peers accept different ones first → split-brain.
22225+ // * `microblock.timeout_round` is hard-coded to 0 at construction;
22226+ // * the comparison `effective_round_now > 0` becomes structurally
22227+ // true whenever the macroblock-level view-change (a SEPARATE,
22228+ // preserved subsystem) bumps `HIGHEST_CERTIFIED_ROUND` — which
22229+ // happens on every legitimate commit-reveal-phase timeout —
22230+ // making the guard fire on every honest producer attempt.
2223222231 //
22233- // v14.4 removed the local `fallback_selected_self` short-cut that
22234- // allowed a node to self-promote without 2f+1 votes, but this path is
22235- // different — it's the legitimate primary producer failing to notice
22236- // that consensus has already rotated away from it.
22232+ // Forensic case (clean v22 deploy, all 5 nodes synchronised at h=89,
22233+ // first macroblock commit-reveal triggers view-change → certified
22234+ // bumps to 3 → next microblock production at h=90 emits
22235+ // `[WARN][PROD] yield_stale_round h=90 produced_for_round=0
22236+ // effective_round_now=3 action=skip_save` and never broadcasts,
22237+ // resulting in a permanent stall observed 1.8 hours after deploy
22238+ // despite zero fork, zero pk_mismatch on the production path, and
22239+ // a fully-synchronised microblock chain).
2223722240 //
22238- // Top-tier BFT design:
22239- // Leader commitment is valid only if "I am still leader at commit
22240- // time". Before broadcasting the final block, re-read the certified
22241- // / adopted round from shared state. If it advanced past the round
22242- // we locked in at production start, our block is stale — yield
22243- // silently. The failover producer's block (at the new round) wins.
22241+ // Safety after removal
22242+ // ────────────────────
22243+ // The original guard's job was to prevent two valid producers at
22244+ // different rotation rounds from emitting blocks at the same height.
22245+ // In v22 there is exactly ONE producer per height — VRF-deterministic
22246+ // primary at offset 0, deterministically-derived fallback at
22247+ // offset ≥ 1 — so there is no "second valid candidate" to suppress.
22248+ // The peer-side producer-authority check at
22249+ // `block_pipeline.rs` still rejects any signed block whose producer
22250+ // does not match the cached VRF expectation.
2224422251 //
22245- // Safety:
22246- // * Hash chain + Dilithium sig + state_root still validate both
22247- // candidate blocks — we never accept garbage. This guard only
22248- // PREVENTS us from emitting a second valid candidate.
22249- // * Peer-side ingest does NOT reject on round comparison
22250- // (v14.8.6): microblock-round and macroblock-consensus-round
22251- // are independent domains; cross-domain comparison caused
22252- // livelock. Safety at the peer side is carried by hash chain,
22253- // Dilithium3 signature, VRF-deterministic producer, and
22254- // retroactive 2f+1 macroblock ratification.
22255- //
22256- // Scalability:
22257- // * One atomic read (HIGHEST_CERTIFIED_ROUND DashMap lookup).
22258- // Microseconds. Unaffected by validator count.
22259- //
22260- // Liveness:
22261- // * If the primary yields, the failover leader's block stands.
22262- // * If nobody else has a block ready, the next iteration of the
22263- // production loop picks up the new round deterministically and
22264- // produces cleanly.
22265- //
22266- // v15.11: stale-round self-check uses the EFFECTIVE rotation
22267- // round (live - baseline) for the current macroblock. The
22268- // baseline is the round at which the previous block in this
22269- // mb was finalized, which auto-resets to 0 for each new
22270- // height after a successful save. This eliminates the
22271- // post-stall producer mute that v14.8.10 suffered (forensic
22272- // case h=15886 → h=15899: 14 consecutive yields after a
22273- // single rotation event because the round counter persisted
22274- // across heights within the macroblock).
22275- //
22276- // Safety:
22277- // * HIGHEST_ADOPTED_ROUND is populated only after
22278- // Dilithium3 verification of f+1 signed TimeoutVotes —
22279- // unforgeable by ≤ f Byzantine validators.
22280- // * HIGHEST_CERTIFIED_ROUND still requires 2f+1 signed
22281- // votes at the same round.
22282- // * Baseline is monotonic and synced across nodes through
22283- // block application (every honest validator records the
22284- // same baseline when applying a finalized block).
22285- //
22286- // Scalability: O(1) DashMap reads per block save, independent
22287- // of validator count. Suitable for 1000+ super-node committees.
22252+ // Macroblock-level view-change is unchanged and continues to
22253+ // advance `HIGHEST_CERTIFIED_ROUND` on commit-reveal-phase failure;
22254+ // that state stays internal to the macroblock-finality subsystem
22255+ // and never gates microblock production after v22.
2228822256 // ═══════════════════════════════════════════════════════════════════════════
22289- {
22290- // v15.11: re-read effective rotation round (live - baseline)
22291- // right before broadcast. Any advance past the value baked
22292- // into the block at production start means rotation has
22293- // moved on for THIS height and our candidate is stale.
22294- // The baseline correctly excludes prior-height rotation
22295- // that has already been finalized.
22296- let effective_round_now: u64 = if unified_p2p.is_some() {
22297- crate::unified_p2p::get_effective_rotation_round(height_for_storage / 90)
22298- } else {
22299- 0
22300- };
22301- if effective_round_now > microblock.timeout_round {
22302- println!(
22303- "[WARN][PROD] yield_stale_round h={} produced_for_round={} effective_round_now={} action=skip_save",
22304- height_for_storage, microblock.timeout_round, effective_round_now
22305- );
22306- // Clear broadcast lock so the next iteration can proceed
22307- crate::unified_p2p::BLOCK_BROADCAST_IN_PROGRESS
22308- .store(false, std::sync::atomic::Ordering::SeqCst);
22309- // Return TX to mempool so the failover producer can include them
22310- if !included_tx_hashes.is_empty() && is_debug() {
22311- println!(
22312- "[DBG][MEMPOOL] yield_stale_round tx_count={} left_in_mempool",
22313- included_tx_hashes.len()
22314- );
22315- }
22316- continue;
22317- }
22318- }
2231922257
2232022258 // Save synchronously to ensure block exists before height increment
2232122259 // This is FAST (just RocksDB write, ~10-50ms) and prevents race conditions
0 commit comments