Skip to content

Commit 8cb2cbb

Browse files
committed
Add FinalizedStateUnavailable to ClientErrors
1 parent 77b0b05 commit 8cb2cbb

10 files changed

Lines changed: 284 additions & 139 deletions

multinode/config/config.go

Lines changed: 1 addition & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,10 @@ type MultiNode struct {
3333
NoNewFinalizedHeadsThreshold *config.Duration
3434
FinalityDepth *uint32
3535
FinalityTagEnabled *bool
36-
FinalizedBlockOffset *uint32
36+
FinalizedBlockOffset *uint32
3737

3838
// Finalized State Availability Check
39-
FinalizedStateCheckEnabled *bool
40-
FinalizedStateCheckAddress *string
4139
FinalizedStateCheckFailureThreshold *uint32
42-
FinalizedStateUnavailableRegex *string
4340
}
4441

4542
func (c *MultiNodeConfig) Enabled() bool {
@@ -100,31 +97,10 @@ func (c *MultiNodeConfig) FinalityTagEnabled() bool { return *c.MultiNode.Finali
10097

10198
func (c *MultiNodeConfig) FinalizedBlockOffset() uint32 { return *c.MultiNode.FinalizedBlockOffset }
10299

103-
func (c *MultiNodeConfig) FinalizedStateCheckEnabled() bool {
104-
return c.MultiNode.FinalizedStateCheckEnabled != nil && *c.MultiNode.FinalizedStateCheckEnabled
105-
}
106-
107-
func (c *MultiNodeConfig) FinalizedStateCheckAddress() string {
108-
if c.MultiNode.FinalizedStateCheckAddress == nil {
109-
return ""
110-
}
111-
return *c.MultiNode.FinalizedStateCheckAddress
112-
}
113-
114100
func (c *MultiNodeConfig) FinalizedStateCheckFailureThreshold() uint32 {
115-
if c.MultiNode.FinalizedStateCheckFailureThreshold == nil {
116-
return 0
117-
}
118101
return *c.MultiNode.FinalizedStateCheckFailureThreshold
119102
}
120103

121-
func (c *MultiNodeConfig) FinalizedStateUnavailableRegex() string {
122-
if c.MultiNode.FinalizedStateUnavailableRegex == nil {
123-
return ""
124-
}
125-
return *c.MultiNode.FinalizedStateUnavailableRegex
126-
}
127-
128104
func (c *MultiNodeConfig) SetFrom(f *MultiNodeConfig) {
129105
if f.MultiNode.Enabled != nil {
130106
c.MultiNode.Enabled = f.MultiNode.Enabled
@@ -183,16 +159,7 @@ func (c *MultiNodeConfig) SetFrom(f *MultiNodeConfig) {
183159
}
184160

185161
// Finalized State Availability Check
186-
if f.MultiNode.FinalizedStateCheckEnabled != nil {
187-
c.MultiNode.FinalizedStateCheckEnabled = f.MultiNode.FinalizedStateCheckEnabled
188-
}
189-
if f.MultiNode.FinalizedStateCheckAddress != nil {
190-
c.MultiNode.FinalizedStateCheckAddress = f.MultiNode.FinalizedStateCheckAddress
191-
}
192162
if f.MultiNode.FinalizedStateCheckFailureThreshold != nil {
193163
c.MultiNode.FinalizedStateCheckFailureThreshold = f.MultiNode.FinalizedStateCheckFailureThreshold
194164
}
195-
if f.MultiNode.FinalizedStateUnavailableRegex != nil {
196-
c.MultiNode.FinalizedStateUnavailableRegex = f.MultiNode.FinalizedStateUnavailableRegex
197-
}
198165
}

multinode/mock_node_metrics_test.go

Lines changed: 68 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

multinode/mock_rpc_client_test.go

Lines changed: 10 additions & 11 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

multinode/node.go

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,7 @@ type NodeConfig interface {
2727
DeathDeclarationDelay() time.Duration
2828
NewHeadsPollInterval() time.Duration
2929
VerifyChainID() bool
30-
FinalizedStateCheckEnabled() bool
3130
FinalizedStateCheckFailureThreshold() uint32
32-
FinalizedStateCheckAddress() string
33-
FinalizedStateUnavailableRegex() string
3431
}
3532

3633
type ChainConfig interface {
@@ -52,13 +49,15 @@ type nodeMetrics interface {
5249
IncrementNodeTransitionsToInvalidChainID(ctx context.Context, nodeName string)
5350
IncrementNodeTransitionsToUnusable(ctx context.Context, nodeName string)
5451
IncrementNodeTransitionsToSyncing(ctx context.Context, nodeName string)
52+
IncrementNodeTransitionsToFinalizedStateNotAvailable(ctx context.Context, nodeName string)
5553
RecordNodeClientVersion(ctx context.Context, nodeName string, version string)
5654
SetHighestSeenBlock(ctx context.Context, nodeName string, blockNumber int64)
5755
SetHighestFinalizedBlock(ctx context.Context, nodeName string, blockNumber int64)
5856
IncrementSeenBlocks(ctx context.Context, nodeName string)
5957
IncrementPolls(ctx context.Context, nodeName string)
6058
IncrementPollsFailed(ctx context.Context, nodeName string)
6159
IncrementPollsSuccess(ctx context.Context, nodeName string)
60+
IncrementFinalizedStateFailed(ctx context.Context, nodeName string)
6261
}
6362

6463
type Node[
@@ -277,7 +276,7 @@ func (n *node[CHAIN_ID, HEAD, RPC]) verifyChainID(callerCtx context.Context, lgg
277276
// The node is already closed, and any subsequent transition is invalid.
278277
// To make spotting such transitions a bit easier, return the invalid node state.
279278
return nodeStateLen
280-
case nodeStateDialed, nodeStateOutOfSync, nodeStateInvalidChainID, nodeStateSyncing:
279+
case nodeStateDialed, nodeStateOutOfSync, nodeStateInvalidChainID, nodeStateSyncing, nodeStateFinalizedStateNotAvailable:
281280
default:
282281
panic(fmt.Sprintf("cannot verify node in state %v", st))
283282
}

multinode/node_fsm.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ func (n *node[CHAIN_ID, HEAD, RPC]) transitionToAlive(fn func()) {
186186
return
187187
}
188188
switch n.state {
189-
case nodeStateDialed, nodeStateInvalidChainID, nodeStateSyncing:
189+
case nodeStateDialed, nodeStateInvalidChainID, nodeStateSyncing, nodeStateFinalizedStateNotAvailable:
190190
n.state = nodeStateAlive
191191
default:
192192
panic(transitionFail(n.state, nodeStateAlive))
@@ -368,7 +368,7 @@ func (n *node[CHAIN_ID, HEAD, RPC]) declareFinalizedStateNotAvailable() {
368368
func (n *node[CHAIN_ID, HEAD, RPC]) transitionToFinalizedStateNotAvailable(fn func()) {
369369
ctx, cancel := n.stopCh.NewCtx()
370370
defer cancel()
371-
n.metrics.IncrementNodeTransitionsToUnreachable(ctx, n.name)
371+
n.metrics.IncrementNodeTransitionsToFinalizedStateNotAvailable(ctx, n.name)
372372
n.stateMu.Lock()
373373
defer n.stateMu.Unlock()
374374
if n.state == nodeStateClosed {

multinode/node_lifecycle.go

Lines changed: 44 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@ package multinode
22

33
import (
44
"context"
5+
"errors"
56
"fmt"
67
"math"
78
"math/big"
8-
"regexp"
99
"time"
1010

1111
"github.com/smartcontractkit/chainlink-common/pkg/logger"
@@ -104,10 +104,7 @@ func (n *node[CHAIN_ID, HEAD, RPC]) aliveLoop() {
104104
var pollFailures uint32
105105

106106
// Finalized state availability check config
107-
finalizedStateCheckEnabled := n.nodePoolCfg.FinalizedStateCheckEnabled()
108107
finalizedStateCheckFailureThreshold := n.nodePoolCfg.FinalizedStateCheckFailureThreshold()
109-
finalizedStateCheckAddress := n.nodePoolCfg.FinalizedStateCheckAddress()
110-
finalizedStateUnavailableRegex := n.nodePoolCfg.FinalizedStateUnavailableRegex()
111108
var finalizedStateFailures uint32
112109

113110
for {
@@ -159,38 +156,37 @@ func (n *node[CHAIN_ID, HEAD, RPC]) aliveLoop() {
159156
return
160157
}
161158
// Separate finalized state availability check
162-
if finalizedStateCheckEnabled {
163-
stateCheckCtx, stateCheckCancel := context.WithTimeout(ctx, pollInterval)
164-
stateErr := n.RPC().CheckFinalizedStateAvailability(stateCheckCtx, finalizedStateCheckAddress)
165-
stateCheckCancel()
166-
if stateErr != nil {
167-
if isFinalizedStateUnavailableError(stateErr, finalizedStateUnavailableRegex) {
168-
if finalizedStateFailures < math.MaxUint32 {
169-
finalizedStateFailures++
170-
}
171-
lggr.Warnw("Finalized state not available", "err", stateErr, "failures", finalizedStateFailures, "threshold", finalizedStateCheckFailureThreshold)
172-
if finalizedStateCheckFailureThreshold > 0 && finalizedStateFailures >= finalizedStateCheckFailureThreshold {
173-
lggr.Errorw("RPC node cannot serve finalized state after consecutive failures", "failures", finalizedStateFailures)
174-
if n.poolInfoProvider != nil {
175-
if l, _ := n.poolInfoProvider.LatestChainInfo(); l < 2 && !n.isLoadBalancedRPC {
176-
lggr.Criticalf("RPC endpoint cannot serve finalized state; %s %s", msgCannotDisable, msgDegradedState)
177-
continue
178-
}
159+
stateCheckCtx, stateCheckCancel := context.WithTimeout(ctx, pollInterval)
160+
stateErr := n.RPC().CheckFinalizedStateAvailability(stateCheckCtx)
161+
stateCheckCancel()
162+
if stateErr != nil {
163+
if errors.Is(stateErr, ErrFinalizedStateUnavailable) {
164+
if finalizedStateFailures < math.MaxUint32 {
165+
n.metrics.IncrementFinalizedStateFailed(ctx, n.name)
166+
finalizedStateFailures++
167+
}
168+
lggr.Warnw("Finalized state not available", "err", stateErr, "failures", finalizedStateFailures, "threshold", finalizedStateCheckFailureThreshold)
169+
if finalizedStateFailures >= finalizedStateCheckFailureThreshold {
170+
lggr.Errorw("RPC node cannot serve finalized state after consecutive failures", "failures", finalizedStateFailures)
171+
if n.poolInfoProvider != nil {
172+
if l, _ := n.poolInfoProvider.LatestChainInfo(); l < 2 && !n.isLoadBalancedRPC {
173+
lggr.Criticalf("RPC endpoint cannot serve finalized state; %s %s", msgCannotDisable, msgDegradedState)
174+
continue
179175
}
180-
n.declareFinalizedStateNotAvailable()
181-
return
182176
}
183-
} else {
184-
// Treat as RPC reachability error
185-
if pollFailures < math.MaxUint32 {
186-
n.metrics.IncrementPollsFailed(ctx, n.name)
187-
pollFailures++
188-
}
189-
lggr.Warnw("Finalized state check failed with RPC error", "err", stateErr, "pollFailures", pollFailures)
177+
n.declareFinalizedStateNotAvailable()
178+
return
190179
}
191180
} else {
192-
finalizedStateFailures = 0
181+
// Treat as RPC reachability error
182+
if pollFailures < math.MaxUint32 {
183+
n.metrics.IncrementPollsFailed(ctx, n.name)
184+
pollFailures++
185+
}
186+
lggr.Warnw("Finalized state check failed with RPC error", "err", stateErr, "pollFailures", pollFailures)
193187
}
188+
} else {
189+
finalizedStateFailures = 0
194190
}
195191
case bh, open := <-headsSub.Heads:
196192
if !open {
@@ -757,43 +753,30 @@ func (n *node[CHAIN_ID, HEAD, RPC]) finalizedStateNotAvailableLoop() {
757753
case <-time.After(dialRetryBackoff.Duration()):
758754
lggr.Tracew("Trying to re-dial RPC node", "nodeState", n.getCachedState())
759755

760-
err := n.rpc.Dial(ctx)
761-
if err != nil {
762-
lggr.Errorw(fmt.Sprintf("Failed to redial RPC node: %v", err), "err", err, "nodeState", n.getCachedState())
756+
state := n.createVerifiedConn(ctx, lggr)
757+
if state != nodeStateAlive {
758+
n.setState(nodeStateFinalizedStateNotAvailable)
763759
continue
764760
}
765761

766-
n.setState(nodeStateDialed)
767-
768-
state := n.verifyConn(ctx, lggr)
769-
switch state {
770-
case nodeStateUnreachable:
762+
stateCheckCtx, stateCheckCancel := context.WithTimeout(ctx, n.nodePoolCfg.PollInterval())
763+
stateErr := n.RPC().CheckFinalizedStateAvailability(stateCheckCtx)
764+
stateCheckCancel()
765+
if stateErr != nil {
766+
if errors.Is(stateErr, ErrFinalizedStateUnavailable) {
767+
lggr.Warnw("Finalized state still not available", "err", stateErr)
768+
n.setState(nodeStateFinalizedStateNotAvailable)
769+
continue
770+
}
771+
lggr.Warnw("Finalized state check failed with RPC error", "err", stateErr)
771772
n.setState(nodeStateFinalizedStateNotAvailable)
772773
continue
773-
case nodeStateAlive:
774-
lggr.Infow(fmt.Sprintf("Successfully redialled and verified RPC node %s. Finalized state was unavailable for %s", n.String(), time.Since(unavailableAt)), "nodeState", n.getCachedState())
775-
fallthrough
776-
default:
777-
n.declareState(state)
778-
return
779774
}
775+
776+
lggr.Infow(fmt.Sprintf("Successfully redialled and verified RPC node %s. Finalized state was unavailable for %s", n.String(), time.Since(unavailableAt)), "nodeState", n.getCachedState())
777+
n.declareState(nodeStateAlive)
778+
return
780779
}
781780
}
782781
}
783782

784-
// isFinalizedStateUnavailableError checks if the error indicates that the RPC cannot serve
785-
// historical state (as opposed to an RPC reachability issue).
786-
// If regexPattern is empty, all errors are treated as state unavailable errors.
787-
func isFinalizedStateUnavailableError(err error, regexPattern string) bool {
788-
if err == nil {
789-
return false
790-
}
791-
if regexPattern == "" {
792-
return true
793-
}
794-
re, compileErr := regexp.Compile(regexPattern)
795-
if compileErr != nil {
796-
return true
797-
}
798-
return re.MatchString(err.Error())
799-
}

0 commit comments

Comments
 (0)