Skip to content

Commit 1ff366a

Browse files
AIQnetLabclaude
andcommitted
fix(onboarding): serve finalized data ungated + rebroadcast NodeRegistration; quiet log levels
- Remove the pre-activation serve-gate from handle_block_request and handle_macroblock_request. Finalized blocks/macroblocks/snapshots are QC-bound public data, served to any peer (sync-first, register-second) — a peer cannot forge a block without a 2f+1 QC. Unblocks block-sync and snapshot fast-sync for a fresh super-node not yet on-chain registered; DoS stays bounded by the per-(IP,id) rate-limit + leader-shed. - Rebroadcast a node's own NodeRegistration (bounded, ~30x/60s) from the periodic registration loop so a dropped join-time broadcast still reaches a producer for inclusion (one-shot send was lost when the joining node was poorly connected). - Log hygiene: set_node_reputation deprecation spam -> DBG; epoch_boundary_crossed and storage startup notes WARN -> DBG; VRF key-without-PoP WARN only for non-genesis; narrow the private-IP label to RFC1918 172.16/12. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 147b4ff commit 1ff366a

3 files changed

Lines changed: 76 additions & 62 deletions

File tree

development/qnet-integration/src/node.rs

Lines changed: 55 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,16 @@ pub static SYNC_IN_PROGRESS: AtomicBool = AtomicBool::new(false);
182182
pub static FAST_SYNC_IN_PROGRESS: AtomicBool = AtomicBool::new(false);
183183
pub static NODE_IS_SYNCHRONIZED: AtomicBool = AtomicBool::new(false);
184184

185+
/// Signed NodeRegistration awaiting on-chain inclusion: (node_id, tx_bytes, attempts_left). The
186+
/// periodic registration loop rebroadcasts it (one per ~60s cycle, bounded budget) so a single
187+
/// join-time broadcast that is dropped while the node is poorly connected still reaches a producer.
188+
/// One-shot send does not scale to thousands of joining super-nodes. Cleared when the budget runs out
189+
/// (by then, with serving open, the node is well-connected and inclusion has long since happened).
190+
pub static PENDING_NODE_REGISTRATION: std::sync::Mutex<Option<(String, Vec<u8>, u32)>> =
191+
std::sync::Mutex::new(None);
192+
/// Rebroadcast budget for a pending NodeRegistration (~1 per 60s ⇒ ~30 min of retries).
193+
pub const PENDING_REGISTRATION_MAX_REBROADCASTS: u32 = 30;
194+
185195
// ═══════════════════════════════════════════════════════════════════════════════
186196
// L1 ARCHITECTURE: Global coordinator handle for phase-aware decisions
187197
// Set once during node startup, read from anywhere.
@@ -4869,8 +4879,15 @@ impl BlockchainNode {
48694879
if pk_hex.len() == crate::crypto::vrf::D3_PK_BYTES * 2 {
48704880
// Log if registering key from unsigned TX (no proof-of-possession)
48714881
if tx.dilithium_signature.is_none() || tx.dilithium_signature.as_ref().map(|s| s.is_empty()).unwrap_or(true) {
4872-
println!("[WARN][VRF] key_registered_without_pop node={}",
4873-
&node_id[..16.min(node_id.len())]);
4882+
// Genesis identities install their key from the trusted genesis block
4883+
// (no proof-of-possession by design) — DBG. Only a NON-genesis unsigned
4884+
// key warrants a WARN (real missing-PoP signal).
4885+
if node_id.starts_with("genesis_node_") {
4886+
if is_debug() { println!("[DBG][VRF] genesis_key_registered node={}", &node_id[..16.min(node_id.len())]); }
4887+
} else {
4888+
println!("[WARN][VRF] key_registered_without_pop node={}",
4889+
&node_id[..16.min(node_id.len())]);
4890+
}
48744891
}
48754892
if !crate::genesis_constants::has_vrf_key(node_id) {
48764893
if let Ok(pk_bytes) = hex::decode(pk_hex) {
@@ -15151,6 +15168,27 @@ impl BlockchainNode {
1515115168
if let Some(ref p2p) = unified_p2p {
1515215169
if is_info() { println!("[INFO][ACTIVE] periodic_registration h={} best={}", reg_our_h, reg_best_h); }
1515315170
p2p.register_as_active_node_async().await;
15171+
15172+
// Bounded rebroadcast of our own NodeRegistration so a dropped join-time
15173+
// broadcast still reaches a producer. Re-applying an already-included
15174+
// registration is a no-op (nonce/dedup), so this is safe; the budget stops it.
15175+
let resend = if let Ok(mut guard) = PENDING_NODE_REGISTRATION.lock() {
15176+
let out = if let Some((id, bytes, attempts)) = guard.as_mut() {
15177+
if *attempts > 0 {
15178+
*attempts -= 1;
15179+
Some((id.clone(), bytes.clone(), *attempts))
15180+
} else { None }
15181+
} else { None };
15182+
if matches!(guard.as_ref(), Some((_, _, 0))) { *guard = None; } // budget spent
15183+
out
15184+
} else { None };
15185+
if let Some((reg_id, reg_bytes, attempts_left)) = resend {
15186+
if let Err(e) = p2p.broadcast_transaction(reg_bytes) {
15187+
if is_warn() { println!("[WARN][REG] rebroadcast_fail id={} err={}", reg_id, e); }
15188+
} else if is_info() {
15189+
println!("[INFO][REG] registration_rebroadcast id={} attempts_left={}", reg_id, attempts_left);
15190+
}
15191+
}
1515415192
}
1515515193
} else {
1515615194
if is_info() {
@@ -15422,8 +15460,14 @@ impl BlockchainNode {
1542215460

1542315461
if mb_missing && expected_mb > 0 {
1542415462
let blocks_since = canonical_height.saturating_sub(check_boundary);
15425-
println!("[WARN][SYNC] epoch_boundary_crossed h={} mb={} MISSING blocks_without={} → direct macroblock sync",
15426-
canonical_height, expected_mb, blocks_since);
15463+
// Routine: fires at every macroblock boundary whose macroblock
15464+
// the node has not sealed locally yet (finality trails production) → triggers
15465+
// a direct macroblock sync. DBG, not WARN — a persistent gap surfaces via
15466+
// finality height / consensus_driver_behind, not this per-boundary trace.
15467+
if is_debug() {
15468+
println!("[DBG][SYNC] epoch_boundary_crossed h={} mb={} missing blocks_without={} → direct macroblock sync",
15469+
canonical_height, expected_mb, blocks_since);
15470+
}
1542715471

1542815472
let p2p_pfp = unified_p2p.clone();
1542915473
tokio::spawn(async move {
@@ -27865,11 +27909,13 @@ if is_info() { println!("[INFO][SYNC] recovered node={} lag={}", node_id_for_syn
2786527909
&wallet_address[..16.min(wallet_address.len())],
2786627910
&tx_hash[..16.min(tx_hash.len())]);
2786727911
}
27868-
// v6.5 FIX: Broadcast NodeRegistration TX to network
27869-
// PROBLEM: TX was only added to local mempool without broadcast.
27870-
// If this node is not the current producer, TX would never be included in a block.
27871-
// SOLUTION: Use broadcast_transaction() (Gulf Stream → producer + gossip backup)
27872-
// Same as NodeActivation TX in activation_validation.rs:1984
27912+
// Track for bounded rebroadcast: the periodic registration loop re-sends this so a
27913+
// dropped join-time broadcast still reaches a producer for inclusion.
27914+
if let Ok(mut pend) = PENDING_NODE_REGISTRATION.lock() {
27915+
*pend = Some((self.node_id.clone(), tx_bytes.clone(), PENDING_REGISTRATION_MAX_REBROADCASTS));
27916+
}
27917+
// Broadcast NodeRegistration TX (producer-direct + gossip backup) so a non-producer
27918+
// node's TX still reaches a producer for inclusion.
2787327919
if let Some(ref p2p) = self.unified_p2p {
2787427920
if let Err(e) = p2p.broadcast_transaction(tx_bytes) {
2787527921
if is_warn() { println!("[WARN][REG] broadcast_fail hash={}... err={}", &tx_hash[..16.min(tx_hash.len())], e); }

development/qnet-integration/src/storage.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1053,8 +1053,8 @@ impl PersistentStorage {
10531053
// Previously had arbitrary `< 100` cutoff — if metadata stuck at e.g. 5000
10541054
// but blocks exist up to 8000, recovery was SKIPPED and node stalled permanently.
10551055
if result.is_none() {
1056-
if is_warn() {
1057-
println!("[WARN][STORAGE] no_continuous_from_h={} scanning_for_first_block", metadata_height);
1056+
if is_debug() {
1057+
println!("[DBG][STORAGE] no_continuous_from_h={} scanning_for_first_block", metadata_height);
10581058
}
10591059

10601060
// Find first existing block using RocksDB iterator
@@ -3449,7 +3449,9 @@ impl Storage {
34493449
let network_size = Self::estimate_network_size_from_storage(&persistent);
34503450
let optimal_shards = crate::reward_sharding::calculate_optimal_shards(network_size) as u64;
34513451

3452-
println!("[WARN][STORAGE] AUTO-SCALING: Calculated optimal shards: {}", optimal_shards);
3452+
if crate::node::is_debug() {
3453+
println!("[DBG][STORAGE] auto_scaling optimal_shards={}", optimal_shards);
3454+
}
34533455

34543456
optimal_shards
34553457
};

development/qnet-integration/src/unified_p2p.rs

Lines changed: 16 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -482,9 +482,10 @@ pub fn get_pending_sync_count() -> usize {
482482
pub static SYNC_PEER_COOLDOWN: Lazy<DashMap<String, (u64, u32)>> =
483483
Lazy::new(|| DashMap::new());
484484

485-
// v32.15: pre-activation P2P sync gate. Holds super-node ids whose on-chain
486-
// NodeActivation has been applied. Populated from state at boot + on each
487-
// block apply. Drives admission in handle_block_request / handle_macroblock_sync_request.
485+
// Super-node ids whose on-chain NodeActivation has been applied (from state at boot + on each
486+
// block apply). NOTE: no longer gates sync serving — finalized blocks/macroblocks are public,
487+
// QC-bound data served to any peer (sync-first, register-second). Kept as the registered-super
488+
// set for participation-side use (eligibility/reputation), NOT for admission.
488489
pub static REGISTERED_SUPER_NODES: Lazy<DashMap<String, ()>> = Lazy::new(DashMap::new);
489490

490491
pub fn add_registered_super_node(node_id: String) {
@@ -18395,33 +18396,10 @@ impl SimplifiedP2P {
1839518396
// Update last_seen for requesting peer
1839618397
self.update_peer_last_seen(from_peer);
1839718398

18398-
// v32.15: pre-activation gate. Genesis IPs always pass. Bootstrap grace
18399-
// window (chain ≤ 90) allows all peers to solve chicken-and-egg. Past
18400-
// that, super_node_* requester_ids must be on-chain registered. Reply
18401-
// with an empty batch so the requester doesn't timeout; they need to
18402-
// complete activation before bulk-sync is served.
18403-
let from_ip = from_peer.split(':').next().unwrap_or(from_peer);
18404-
let is_genesis_peer = is_genesis_node_ip(from_ip);
18405-
if !is_genesis_peer && requester_id.starts_with("super_node_") {
18406-
let chain_h = LOCAL_BLOCKCHAIN_HEIGHT.load(std::sync::atomic::Ordering::Relaxed);
18407-
if chain_h > 90 && !is_super_node_registered(&requester_id) {
18408-
if crate::node::is_warn() {
18409-
let id_short: String = requester_id.chars().take(20).collect();
18410-
println!("[WARN][SYNC] gate_unregistered peer={} id={} chain_h={} require_activation",
18411-
from_peer, id_short, chain_h);
18412-
}
18413-
let response = NetworkMessage::BlocksBatch {
18414-
blocks: Vec::new(),
18415-
from_height,
18416-
to_height: from_height,
18417-
sender_id: self.node_id.clone(),
18418-
};
18419-
if let Some(peer_addr) = self.peer_id_to_addr.get(&requester_id) {
18420-
self.send_network_message(&peer_addr.clone(), response);
18421-
}
18422-
return;
18423-
}
18424-
}
18399+
// Finalized blocks are public, QC-bound data: served to ANY peer so a fresh node bootstraps
18400+
// BEFORE it is on-chain registered (sync-first, register-second). A peer cannot forge a block
18401+
// (needs a 2f+1 Dilithium3 QC), so identity is not a serving prerequisite. DoS is bounded by
18402+
// the per-(IP,id) rate-limit below + leader-shed — not by registration status.
1842518403

1842618404
// Shed sync-serving ONLY while actively producing (protects the producer's RocksDB I/O
1842718405
// budget). A node elected for the next slot but STALLED — no block produced in the last
@@ -18763,24 +18741,10 @@ impl SimplifiedP2P {
1876318741
// Update last_seen for requesting peer
1876418742
self.update_peer_last_seen(from_peer);
1876518743

18766-
// v32.15: pre-activation gate — same policy as handle_block_request.
18767-
// Bootstrap grace (chain ≤ 90), genesis IP, or on-chain registered id.
18768-
{
18769-
let from_ip = from_peer.split(':').next().unwrap_or(from_peer);
18770-
let is_genesis_peer = is_genesis_node_ip(from_ip);
18771-
if !is_genesis_peer && requester_id.starts_with("super_node_") {
18772-
let chain_h = LOCAL_BLOCKCHAIN_HEIGHT.load(std::sync::atomic::Ordering::Relaxed);
18773-
if chain_h > 90 && !is_super_node_registered(&requester_id) {
18774-
if crate::node::is_warn() {
18775-
let id_short: String = requester_id.chars().take(20).collect();
18776-
println!("[WARN][MB_SYNC] gate_unregistered peer={} id={} chain_h={} require_activation",
18777-
from_peer, id_short, chain_h);
18778-
}
18779-
return;
18780-
}
18781-
}
18782-
}
18783-
18744+
// Finalized macroblocks (and the snapshot-binding fetch that rides this path) are public,
18745+
// 2f+1-QC-bound data: served to ANY peer so a fresh node can fast-sync via snapshot BEFORE
18746+
// registration. No identity gate; DoS is bounded by the rate-limit below.
18747+
1878418748
// v3.0: CRITICAL FIX - Genesis nodes bypass rate limiting to prevent network isolation
1878518749
let is_genesis_requester = requester_id.starts_with("genesis_node_");
1878618750
let is_genesis_peer = from_peer.split(':').next()
@@ -20527,8 +20491,10 @@ impl SimplifiedP2P {
2052720491
} else {
2052820492
get_privacy_id_for_addr(node_id)
2052920493
};
20530-
if crate::node::is_info() {
20531-
println!("[WARN][P2P] set_node_reputation() deprecated - {} reputation managed via blockchain", display_id);
20494+
// Deprecated no-op (reputation is chain-derived). Trace at DBG only — it was firing
20495+
// hundreds of [WARN] lines per run from recurring call sites, drowning real warnings.
20496+
if crate::node::is_debug() {
20497+
println!("[DBG][P2P] set_node_reputation() deprecated - {} reputation managed via blockchain", display_id);
2053220498
}
2053320499
}
2053420500

0 commit comments

Comments
 (0)