@@ -2,10 +2,10 @@ package multinode
22
33import (
44 "context"
5+ "errors"
56 "fmt"
67 "math"
78 "math/big"
8- "regexp"
99 "time"
1010
1111 "github.com/smartcontractkit/chainlink-common/pkg/logger"
@@ -104,10 +104,7 @@ func (n *node[CHAIN_ID, HEAD, RPC]) aliveLoop() {
104104 var pollFailures uint32
105105
106106 // Finalized state availability check config
107- finalizedStateCheckEnabled := n .nodePoolCfg .FinalizedStateCheckEnabled ()
108107 finalizedStateCheckFailureThreshold := n .nodePoolCfg .FinalizedStateCheckFailureThreshold ()
109- finalizedStateCheckAddress := n .nodePoolCfg .FinalizedStateCheckAddress ()
110- finalizedStateUnavailableRegex := n .nodePoolCfg .FinalizedStateUnavailableRegex ()
111108 var finalizedStateFailures uint32
112109
113110 for {
@@ -159,38 +156,37 @@ func (n *node[CHAIN_ID, HEAD, RPC]) aliveLoop() {
159156 return
160157 }
161158 // Separate finalized state availability check
162- if finalizedStateCheckEnabled {
163- stateCheckCtx , stateCheckCancel := context .WithTimeout (ctx , pollInterval )
164- stateErr := n .RPC ().CheckFinalizedStateAvailability (stateCheckCtx , finalizedStateCheckAddress )
165- stateCheckCancel ()
166- if stateErr != nil {
167- if isFinalizedStateUnavailableError (stateErr , finalizedStateUnavailableRegex ) {
168- if finalizedStateFailures < math .MaxUint32 {
169- finalizedStateFailures ++
170- }
171- lggr .Warnw ("Finalized state not available" , "err" , stateErr , "failures" , finalizedStateFailures , "threshold" , finalizedStateCheckFailureThreshold )
172- if finalizedStateCheckFailureThreshold > 0 && finalizedStateFailures >= finalizedStateCheckFailureThreshold {
173- lggr .Errorw ("RPC node cannot serve finalized state after consecutive failures" , "failures" , finalizedStateFailures )
174- if n .poolInfoProvider != nil {
175- if l , _ := n .poolInfoProvider .LatestChainInfo (); l < 2 && ! n .isLoadBalancedRPC {
176- lggr .Criticalf ("RPC endpoint cannot serve finalized state; %s %s" , msgCannotDisable , msgDegradedState )
177- continue
178- }
159+ stateCheckCtx , stateCheckCancel := context .WithTimeout (ctx , pollInterval )
160+ stateErr := n .RPC ().CheckFinalizedStateAvailability (stateCheckCtx )
161+ stateCheckCancel ()
162+ if stateErr != nil {
163+ if errors .Is (stateErr , ErrFinalizedStateUnavailable ) {
164+ if finalizedStateFailures < math .MaxUint32 {
165+ n .metrics .IncrementFinalizedStateFailed (ctx , n .name )
166+ finalizedStateFailures ++
167+ }
168+ lggr .Warnw ("Finalized state not available" , "err" , stateErr , "failures" , finalizedStateFailures , "threshold" , finalizedStateCheckFailureThreshold )
169+ if finalizedStateFailures >= finalizedStateCheckFailureThreshold {
170+ lggr .Errorw ("RPC node cannot serve finalized state after consecutive failures" , "failures" , finalizedStateFailures )
171+ if n .poolInfoProvider != nil {
172+ if l , _ := n .poolInfoProvider .LatestChainInfo (); l < 2 && ! n .isLoadBalancedRPC {
173+ lggr .Criticalf ("RPC endpoint cannot serve finalized state; %s %s" , msgCannotDisable , msgDegradedState )
174+ continue
179175 }
180- n .declareFinalizedStateNotAvailable ()
181- return
182176 }
183- } else {
184- // Treat as RPC reachability error
185- if pollFailures < math .MaxUint32 {
186- n .metrics .IncrementPollsFailed (ctx , n .name )
187- pollFailures ++
188- }
189- lggr .Warnw ("Finalized state check failed with RPC error" , "err" , stateErr , "pollFailures" , pollFailures )
177+ n .declareFinalizedStateNotAvailable ()
178+ return
190179 }
191180 } else {
192- finalizedStateFailures = 0
181+ // Treat as RPC reachability error
182+ if pollFailures < math .MaxUint32 {
183+ n .metrics .IncrementPollsFailed (ctx , n .name )
184+ pollFailures ++
185+ }
186+ lggr .Warnw ("Finalized state check failed with RPC error" , "err" , stateErr , "pollFailures" , pollFailures )
193187 }
188+ } else {
189+ finalizedStateFailures = 0
194190 }
195191 case bh , open := <- headsSub .Heads :
196192 if ! open {
@@ -757,43 +753,30 @@ func (n *node[CHAIN_ID, HEAD, RPC]) finalizedStateNotAvailableLoop() {
757753 case <- time .After (dialRetryBackoff .Duration ()):
758754 lggr .Tracew ("Trying to re-dial RPC node" , "nodeState" , n .getCachedState ())
759755
760- err := n .rpc . Dial (ctx )
761- if err != nil {
762- lggr . Errorw ( fmt . Sprintf ( "Failed to redial RPC node: %v" , err ), "err" , err , "nodeState" , n . getCachedState () )
756+ state := n .createVerifiedConn (ctx , lggr )
757+ if state != nodeStateAlive {
758+ n . setState ( nodeStateFinalizedStateNotAvailable )
763759 continue
764760 }
765761
766- n .setState (nodeStateDialed )
767-
768- state := n .verifyConn (ctx , lggr )
769- switch state {
770- case nodeStateUnreachable :
762+ stateCheckCtx , stateCheckCancel := context .WithTimeout (ctx , n .nodePoolCfg .PollInterval ())
763+ stateErr := n .RPC ().CheckFinalizedStateAvailability (stateCheckCtx )
764+ stateCheckCancel ()
765+ if stateErr != nil {
766+ if errors .Is (stateErr , ErrFinalizedStateUnavailable ) {
767+ lggr .Warnw ("Finalized state still not available" , "err" , stateErr )
768+ n .setState (nodeStateFinalizedStateNotAvailable )
769+ continue
770+ }
771+ lggr .Warnw ("Finalized state check failed with RPC error" , "err" , stateErr )
771772 n .setState (nodeStateFinalizedStateNotAvailable )
772773 continue
773- case nodeStateAlive :
774- lggr .Infow (fmt .Sprintf ("Successfully redialled and verified RPC node %s. Finalized state was unavailable for %s" , n .String (), time .Since (unavailableAt )), "nodeState" , n .getCachedState ())
775- fallthrough
776- default :
777- n .declareState (state )
778- return
779774 }
775+
776+ lggr .Infow (fmt .Sprintf ("Successfully redialled and verified RPC node %s. Finalized state was unavailable for %s" , n .String (), time .Since (unavailableAt )), "nodeState" , n .getCachedState ())
777+ n .declareState (nodeStateAlive )
778+ return
780779 }
781780 }
782781}
783782
784- // isFinalizedStateUnavailableError checks if the error indicates that the RPC cannot serve
785- // historical state (as opposed to an RPC reachability issue).
786- // If regexPattern is empty, all errors are treated as state unavailable errors.
787- func isFinalizedStateUnavailableError (err error , regexPattern string ) bool {
788- if err == nil {
789- return false
790- }
791- if regexPattern == "" {
792- return true
793- }
794- re , compileErr := regexp .Compile (regexPattern )
795- if compileErr != nil {
796- return true
797- }
798- return re .MatchString (err .Error ())
799- }
0 commit comments