Skip to content

Commit 2c785fd

Browse files
zeljkoXtirumerla
andauthored
feat(evm): handle nonce gaps before resubmission (#726)
* fix: Extend non retriable rpc messages * chore: PR suggestion * feat: Add nonce sync logic * chore: Progress * chore: Improvements * chore: Improvements * chore: Improvements * chore: PR suggestions * chore: Improvements --------- Co-authored-by: tirumerla <tirumerla@gmail.com>
1 parent cf553ef commit 2c785fd

19 files changed

Lines changed: 3615 additions & 118 deletions

File tree

src/constants/evm_transaction.rs

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,40 @@ pub const ALREADY_SUBMITTED_PATTERNS: &[&str] = &[
110110
"same hash was already imported",
111111
];
112112

113+
/// Error message patterns indicating the transaction nonce is ahead of the expected on-chain nonce.
114+
/// This can be transient (burst ordering: tx N+1 arrives before N) or persistent (counter drift).
115+
///
116+
/// Checked **after** `ALREADY_SUBMITTED_PATTERNS` in `classify_submission_error` to avoid
117+
/// ambiguity. Each entry is a lowercased substring to match against the RPC error message.
118+
pub const NONCE_TOO_HIGH_PATTERNS: &[&str] = &[
119+
"nonce too high", // Geth, Erigon, Hardhat, Anvil
120+
"nonce is too high", // Geth, Erigon, Hardhat, Anvil
121+
"nonce too far in the future", // Besu
122+
"exceeds next nonce", // Nethermind
123+
"nonce out of range", // Arbitrum, Optimism, specialized RPCs
124+
"tx-nonce-too-high", // Certain SaaS RPC providers (e.g. Alchemy/Infura internal)
125+
];
126+
127+
/// Maximum number of "nonce too high" retries before escalating to a nonce health job.
128+
/// With ~25s between retries (driven by status checker resend timeout), this means
129+
/// escalation happens within ~75s — enough time for transient burst ordering to resolve.
130+
pub const MAX_NONCE_TOO_HIGH_RETRIES: u32 = 3;
131+
132+
/// Maximum number of nonces to scan when detecting gaps between on-chain and local counter.
133+
/// Gaps beyond this range are logged for operator investigation rather than automated recovery.
134+
pub const MAX_GAP_SCAN_RANGE: u64 = 100;
135+
136+
/// Metadata key used in `RelayerHealthCheck` to indicate a targeted health action.
137+
pub const HEALTH_CHECK_ACTION_KEY: &str = "health_check_action";
138+
139+
/// Value for `HEALTH_CHECK_ACTION_KEY` that triggers nonce gap detection and resolution.
140+
pub const HEALTH_CHECK_ACTION_NONCE_HEALTH: &str = "nonce_health";
141+
142+
/// Optional metadata key carrying a nonce hint for the health action.
143+
/// When present, `resolve_nonce_gaps` ensures the counter covers at least `hint + 1`
144+
/// so the scan range includes the hinted nonce. This handles the case where the
145+
/// counter was reset (e.g., after a restart) but a tx at a higher nonce still exists.
146+
pub const HEALTH_CHECK_NONCE_HINT_KEY: &str = "nonce_hint";
113147
/// Checks if a lowercased message matches "known transaction" without matching
114148
/// "unknown transaction" (substring false positive).
115149
pub fn matches_known_transaction(msg_lower: &str) -> bool {
@@ -122,3 +156,57 @@ pub fn matches_known_transaction(msg_lower: &str) -> bool {
122156
}
123157
false
124158
}
159+
160+
#[cfg(test)]
161+
mod tests {
162+
use super::*;
163+
164+
#[test]
165+
fn test_nonce_too_high_patterns_match_expected_strings() {
166+
let cases = [
167+
"nonce too high",
168+
"nonce is too high",
169+
"nonce too far in the future",
170+
"exceeds next nonce",
171+
"nonce out of range",
172+
];
173+
for case in &cases {
174+
let msg_lower = case.to_lowercase();
175+
assert!(
176+
NONCE_TOO_HIGH_PATTERNS
177+
.iter()
178+
.any(|p| msg_lower.contains(p)),
179+
"Expected NONCE_TOO_HIGH_PATTERNS to match: {case}"
180+
);
181+
}
182+
}
183+
184+
#[test]
185+
fn test_matches_known_transaction_does_not_match_nonce_too_high() {
186+
let nonce_too_high_msgs = [
187+
"nonce too high",
188+
"nonce is too high",
189+
"nonce too far in the future",
190+
"exceeds next nonce",
191+
"nonce out of range",
192+
];
193+
for msg in &nonce_too_high_msgs {
194+
assert!(
195+
!matches_known_transaction(&msg.to_lowercase()),
196+
"matches_known_transaction should NOT match nonce-too-high message: {msg}"
197+
);
198+
}
199+
}
200+
201+
#[test]
202+
fn test_matches_known_transaction_matches_known_transaction() {
203+
assert!(matches_known_transaction("known transaction"));
204+
assert!(matches_known_transaction("already known transaction here"));
205+
}
206+
207+
#[test]
208+
fn test_matches_known_transaction_does_not_match_unknown_transaction() {
209+
assert!(!matches_known_transaction("unknown transaction"));
210+
assert!(!matches_known_transaction("unknown transaction status"));
211+
}
212+
}

src/domain/relayer/evm/evm_relayer.rs

Lines changed: 9 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -72,15 +72,15 @@ where
7272
J: JobProducerTrait + Send + Sync + 'static,
7373
S: DataSignerTrait + Send + Sync + 'static,
7474
{
75-
relayer: RelayerRepoModel,
76-
signer: S,
77-
network: EvmNetwork,
78-
provider: P,
79-
relayer_repository: Arc<RR>,
80-
network_repository: Arc<NR>,
81-
transaction_repository: Arc<TR>,
82-
job_producer: Arc<J>,
83-
transaction_counter_service: Arc<TCS>,
75+
pub(super) relayer: RelayerRepoModel,
76+
pub(super) signer: S,
77+
pub(super) network: EvmNetwork,
78+
pub(super) provider: P,
79+
pub(super) relayer_repository: Arc<RR>,
80+
pub(super) network_repository: Arc<NR>,
81+
pub(super) transaction_repository: Arc<TR>,
82+
pub(super) job_producer: Arc<J>,
83+
pub(super) transaction_counter_service: Arc<TCS>,
8484
}
8585

8686
#[allow(clippy::too_many_arguments)]
@@ -134,50 +134,6 @@ where
134134
})
135135
}
136136

137-
/// Synchronizes the nonce with the blockchain.
138-
///
139-
/// # Returns
140-
///
141-
/// A `Result` indicating success or a `RelayerError` if the operation fails.
142-
#[instrument(
143-
level = "debug",
144-
skip(self),
145-
fields(
146-
request_id = ?crate::observability::request_id::get_request_id(),
147-
relayer_id = %self.relayer.id,
148-
)
149-
)]
150-
async fn sync_nonce(&self) -> Result<(), RelayerError> {
151-
let on_chain_nonce = self
152-
.provider
153-
.get_transaction_count(&self.relayer.address)
154-
.await
155-
.map_err(|e| RelayerError::ProviderError(e.to_string()))?;
156-
157-
let transaction_counter_nonce = self
158-
.transaction_counter_service
159-
.get()
160-
.await
161-
.ok()
162-
.flatten()
163-
.unwrap_or(0);
164-
165-
let nonce = std::cmp::max(on_chain_nonce, transaction_counter_nonce);
166-
167-
debug!(
168-
relayer_id = %self.relayer.id,
169-
on_chain_nonce = %on_chain_nonce,
170-
transaction_counter_nonce = %transaction_counter_nonce,
171-
"syncing nonce"
172-
);
173-
174-
debug!(nonce = %nonce, "setting nonce for relayer");
175-
176-
self.transaction_counter_service.set(nonce).await?;
177-
178-
Ok(())
179-
}
180-
181137
/// Validates the RPC connection to the blockchain provider.
182138
///
183139
/// # Returns

src/domain/relayer/evm/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
/// the `evm_relayer` submodule which contains the core logic for
44
/// relaying transactions and events between different EVM networks.
55
mod evm_relayer;
6+
mod nonce;
67
mod rpc_utils;
78
mod validations;
89

0 commit comments

Comments
 (0)