@@ -110,7 +110,7 @@ func TestUnit_NodeLifecycle_aliveLoop(t *testing.T) {
110110 tests .AssertLogEventually (t , observedLogs , "Polling disabled" )
111111 assert .Equal (t , nodeStateAlive , node .State ())
112112 })
113- t .Run ("stays alive while below pollFailureThreshold and resets counter on success " , func (t * testing.T ) {
113+ t .Run ("stays alive while below pollFailureThreshold, success decrements failure count " , func (t * testing.T ) {
114114 t .Parallel ()
115115 rpc := newMockRPCClient [ID , Head ](t )
116116 rpc .On ("GetInterceptedChainInfo" ).Return (ChainInfo {}, ChainInfo {})
@@ -132,9 +132,9 @@ func TestUnit_NodeLifecycle_aliveLoop(t *testing.T) {
132132 // stays healthy while below threshold
133133 assert .Equal (t , nodeStateAlive , node .State ())
134134 }).Times (pollFailureThreshold - 1 )
135- // 2. Successful call that is expected to reset counter
135+ // 2. Successful call that is expected to decrement the counter (counter: 2 → 1)
136136 rpc .On ("ClientVersion" , mock .Anything ).Return ("" , nil ).Once ()
137- // 3. Return error. If we have not reset the timer, we'll transition to nonAliveState
137+ // 3. Return error. Counter was decremented ( not reset), so it reaches 2 — still below threshold.
138138 rpc .On ("ClientVersion" , mock .Anything ).Return ("" , pollError ).Once ()
139139 // 4. Once during the call, check if node is alive
140140 var ensuredAlive atomic.Bool
@@ -176,6 +176,37 @@ func TestUnit_NodeLifecycle_aliveLoop(t *testing.T) {
176176 return nodeStateUnreachable == node .State ()
177177 })
178178 })
179+ t .Run ("transitions to unreachable when net poll failures accumulate despite intermittent successes" , func (t * testing.T ) {
180+ t .Parallel ()
181+ rpc := newMockRPCClient [ID , Head ](t )
182+ rpc .On ("GetInterceptedChainInfo" ).Return (ChainInfo {}, ChainInfo {})
183+ const pollFailureThreshold = 3
184+ node := newSubscribedNode (t , testNodeOpts {
185+ config : testNodeConfig {
186+ pollFailureThreshold : pollFailureThreshold ,
187+ pollInterval : tests .TestInterval ,
188+ },
189+ rpc : rpc ,
190+ })
191+ defer func () { assert .NoError (t , node .close ()) }()
192+
193+ pollError := errors .New ("failed to get ClientVersion" )
194+ // Pattern F·F·S·F·F: with the decay counter the net failure debt reaches
195+ // threshold=3 at the 5th poll (counter: 1→2→1→2→3). With the old
196+ // reset-on-success behaviour the counter resets to 0 at S and peaks at only
197+ // 2 before the next success, never tripping.
198+ rpc .On ("ClientVersion" , mock .Anything ).Return ("" , pollError ).Times (2 )
199+ rpc .On ("ClientVersion" , mock .Anything ).Return ("" , nil ).Once ()
200+ rpc .On ("ClientVersion" , mock .Anything ).Return ("" , pollError ).Times (2 )
201+ // Unlimited successes after: ensures old code stays alive indefinitely so
202+ // the test correctly fails (times out) when run against the old behaviour.
203+ rpc .On ("ClientVersion" , mock .Anything ).Return ("" , nil )
204+ rpc .On ("Dial" , mock .Anything ).Return (errors .New ("failed to dial" )).Maybe ()
205+ node .declareAlive ()
206+ tests .AssertEventually (t , func () bool {
207+ return node .State () == nodeStateUnreachable
208+ })
209+ })
179210 t .Run ("with threshold poll failures, but we are the last node alive, forcibly keeps it alive" , func (t * testing.T ) {
180211 t .Parallel ()
181212 rpc := newMockRPCClient [ID , Head ](t )
0 commit comments