-
Notifications
You must be signed in to change notification settings - Fork 77
fix: nonce cache desync recovery and Loop partial-success preservation #512
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
2730583
645a700
2cb9f43
27fa04c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1155,17 +1155,62 @@ func sendUserOpCore( | |
| // On-chain or cache has advanced past our nonce — use the new value | ||
| l.Debug("Nonce advanced", "old_nonce", userOp.Nonce.String(), "new_nonce", freshNonce.String()) | ||
| } else if onChainNonce.Cmp(userOp.Nonce) < 0 { | ||
| // On-chain nonce is behind our attempted nonce. Prior UserOps (at lower | ||
| // nonces) haven't mined yet, causing the bundler to reject ours with AA25. | ||
| // The nonce itself is correct — we just need to wait for prior UserOps to | ||
| // mine. Do NOT increment, as that creates an unfillable nonce gap. | ||
| freshNonce = new(big.Int).Set(userOp.Nonce) | ||
| l.Debug("On-chain nonce behind, waiting for prior UserOps to mine before retry", | ||
| "on_chain_nonce", onChainNonce.String(), | ||
| "userOp_nonce", userOp.Nonce.String()) | ||
| // Wait briefly for prior UserOps to mine; without this delay | ||
| // the retry loop burns through attempts in milliseconds. | ||
| time.Sleep(2 * time.Second) | ||
| // On-chain nonce is behind our attempted nonce. Two possibilities: | ||
| // (a) Prior UserOps at lower nonces are genuinely pending in the bundler | ||
| // mempool and just haven't mined yet → wait for them. | ||
| // (b) The cache is stale: a previous UserOp was dropped from the bundler | ||
| // mempool without the cache being invalidated, so there is nothing | ||
| // at on_chain..userOp.Nonce-1 to ever mine. Waiting is futile and | ||
| // leads to the stuck loop described in issue #510. | ||
| // Distinguish the two by inspecting the bundler mempool. If no predecessor | ||
| // UserOp exists for this sender at a nonce in [on_chain, userOp.Nonce), | ||
| // then case (b) holds — rewind the cache to the on-chain nonce. | ||
|
Comment on lines
+1158
to
+1167
|
||
| // Bound the mempool RPC so a hung bundler can't stall the retry loop. | ||
| mempoolCtx, cancelMempool := context.WithTimeout(context.Background(), 5*time.Second) | ||
| pendingOps, mempoolErr := bundlerClient.GetPendingUserOpsForSender(mempoolCtx, entrypoint, userOp.Sender) | ||
| cancelMempool() | ||
|
|
||
| predecessorPending := false | ||
| if mempoolErr == nil { | ||
| for _, op := range pendingOps { | ||
| // EIP-4337 bundlers return nonces as "0x..." hex strings. | ||
| opNonce := new(big.Int) | ||
| nonceStr := strings.TrimPrefix(op.Nonce, "0x") | ||
| if _, ok := opNonce.SetString(nonceStr, 16); !ok { | ||
| l.Debug("Failed to parse pending op nonce, skipping", | ||
| "nonce", op.Nonce, "sender", userOp.Sender.Hex()) | ||
| continue | ||
| } | ||
| if opNonce.Cmp(onChainNonce) >= 0 && opNonce.Cmp(userOp.Nonce) < 0 { | ||
| predecessorPending = true | ||
| break | ||
| } | ||
| } | ||
| } else { | ||
| l.Warn("Failed to inspect bundler mempool while diagnosing nonce conflict; assuming predecessor pending", | ||
| "error", mempoolErr) | ||
| predecessorPending = true | ||
| } | ||
|
Comment on lines
+1189
to
+1193
|
||
|
|
||
| if !predecessorPending { | ||
| // Stale cache: nothing in the mempool can ever mine to bridge the gap. | ||
| // Rewind to the on-chain nonce so the rebuild uses a value the | ||
| // EntryPoint will accept. | ||
| l.Warn("Nonce cache desync detected: no predecessor UserOp pending in bundler mempool, rewinding cache to on-chain nonce", | ||
| "sender", userOp.Sender.Hex(), | ||
| "on_chain_nonce", onChainNonce.String(), | ||
| "stale_cached_nonce", userOp.Nonce.String()) | ||
| globalNonceManager.ResetNonce(userOp.Sender) | ||
| freshNonce = new(big.Int).Set(onChainNonce) | ||
| } else { | ||
| freshNonce = new(big.Int).Set(userOp.Nonce) | ||
| l.Debug("On-chain nonce behind, waiting for prior UserOps to mine before retry", | ||
| "on_chain_nonce", onChainNonce.String(), | ||
| "userOp_nonce", userOp.Nonce.String()) | ||
| // Wait briefly for prior UserOps to mine; without this delay | ||
| // the retry loop burns through attempts in milliseconds. | ||
| time.Sleep(2 * time.Second) | ||
| } | ||
| } else { | ||
| // GetNextNonce returned the same nonce we already tried and on-chain has | ||
| // reached this nonce. A UserOp is pending at this nonce in the mempool. | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The test no longer asserts that the per-iteration failure is observable anywhere (e.g., in
step.Logor nested iteration step metadata). Since the loop now reportsSuccess=trueon per-iteration runner errors, it’s important to still validate that the failure details are surfaced for debugging/clients (for example:step.Logcontains the invalid address error and/or the failed iteration step is marked unsuccessful).