|
1 | 1 | package multinode |
2 | 2 |
|
3 | 3 | import ( |
| 4 | + "context" |
4 | 5 | "errors" |
5 | 6 | "fmt" |
6 | 7 | "math/big" |
@@ -1495,6 +1496,179 @@ func TestUnit_NodeLifecycle_unreachableLoop(t *testing.T) { |
1495 | 1496 | return node.State() == nodeStateAlive |
1496 | 1497 | }) |
1497 | 1498 | }) |
| 1499 | + t.Run("with PollSuccessThreshold set, without isSyncing, node becomes alive once all probe polls succeed", func(t *testing.T) { |
| 1500 | + t.Parallel() |
| 1501 | + rpc := newMockRPCClient[ID, Head](t) |
| 1502 | + nodeChainID := RandomID() |
| 1503 | + const pollSuccessThreshold = 2 |
| 1504 | + node := newAliveNode(t, testNodeOpts{ |
| 1505 | + rpc: rpc, |
| 1506 | + chainID: nodeChainID, |
| 1507 | + config: testNodeConfig{ |
| 1508 | + pollSuccessThreshold: pollSuccessThreshold, |
| 1509 | + pollInterval: tests.TestInterval, |
| 1510 | + }, |
| 1511 | + }) |
| 1512 | + defer func() { assert.NoError(t, node.close()) }() |
| 1513 | + |
| 1514 | + rpc.On("Dial", mock.Anything).Return(nil).Once() |
| 1515 | + rpc.On("ChainID", mock.Anything).Return(nodeChainID, nil).Once() |
| 1516 | + rpc.On("ClientVersion", mock.Anything).Return("", nil).Twice() |
| 1517 | + setupRPCForAliveLoop(t, rpc) |
| 1518 | + |
| 1519 | + node.declareUnreachable() |
| 1520 | + tests.AssertEventually(t, func() bool { |
| 1521 | + return node.State() == nodeStateAlive |
| 1522 | + }) |
| 1523 | + }) |
| 1524 | + t.Run("with PollSuccessThreshold set, node becomes alive once all probe polls succeed", func(t *testing.T) { |
| 1525 | + t.Parallel() |
| 1526 | + rpc := newMockRPCClient[ID, Head](t) |
| 1527 | + nodeChainID := RandomID() |
| 1528 | + const pollSuccessThreshold = 2 |
| 1529 | + node := newAliveNode(t, testNodeOpts{ |
| 1530 | + rpc: rpc, |
| 1531 | + chainID: nodeChainID, |
| 1532 | + config: testNodeConfig{ |
| 1533 | + nodeIsSyncingEnabled: true, |
| 1534 | + pollSuccessThreshold: pollSuccessThreshold, |
| 1535 | + pollInterval: tests.TestInterval, |
| 1536 | + }, |
| 1537 | + }) |
| 1538 | + defer func() { assert.NoError(t, node.close()) }() |
| 1539 | + |
| 1540 | + rpc.On("Dial", mock.Anything).Return(nil).Once() |
| 1541 | + rpc.On("ChainID", mock.Anything).Return(nodeChainID, nil).Once() |
| 1542 | + rpc.On("IsSyncing", mock.Anything).Return(false, nil) |
| 1543 | + rpc.On("ClientVersion", mock.Anything).Return("", nil).Twice() |
| 1544 | + setupRPCForAliveLoop(t, rpc) |
| 1545 | + |
| 1546 | + node.declareUnreachable() |
| 1547 | + tests.AssertEventually(t, func() bool { |
| 1548 | + return node.State() == nodeStateAlive |
| 1549 | + }) |
| 1550 | + }) |
| 1551 | + t.Run("with PollSuccessThreshold set, probe poll failure keeps node unreachable and restarts redial", func(t *testing.T) { |
| 1552 | + t.Parallel() |
| 1553 | + rpc := newMockRPCClient[ID, Head](t) |
| 1554 | + nodeChainID := RandomID() |
| 1555 | + lggr, observedLogs := logger.TestObserved(t, zap.WarnLevel) |
| 1556 | + const pollSuccessThreshold = 2 |
| 1557 | + node := newAliveNode(t, testNodeOpts{ |
| 1558 | + rpc: rpc, |
| 1559 | + chainID: nodeChainID, |
| 1560 | + lggr: lggr, |
| 1561 | + config: testNodeConfig{ |
| 1562 | + pollSuccessThreshold: pollSuccessThreshold, |
| 1563 | + pollInterval: tests.TestInterval, |
| 1564 | + }, |
| 1565 | + }) |
| 1566 | + defer func() { assert.NoError(t, node.close()) }() |
| 1567 | + |
| 1568 | + rpc.On("Dial", mock.Anything).Return(nil).Once() |
| 1569 | + rpc.On("ChainID", mock.Anything).Return(nodeChainID, nil).Once() |
| 1570 | + rpc.On("ClientVersion", mock.Anything).Return("", nil).Once() |
| 1571 | + rpc.On("ClientVersion", mock.Anything).Return("", errors.New("probe poll failed")).Once() |
| 1572 | + // after the probe aborts, rpc.Close() is called and the redial backoff fires again; keep failing |
| 1573 | + rpc.On("Dial", mock.Anything).Return(errors.New("failed to dial")) |
| 1574 | + // guard: if current code (no probe) enters aliveLoop, fail the subscribe so the node returns to unreachable |
| 1575 | + rpc.On("SubscribeToHeads", mock.Anything).Return(nil, nil, errors.New("unexpected")).Maybe() |
| 1576 | + |
| 1577 | + node.declareUnreachable() |
| 1578 | + tests.AssertLogEventually(t, observedLogs, "Recovery probe poll failed; restarting redial") |
| 1579 | + assert.Equal(t, nodeStateUnreachable, node.State()) |
| 1580 | + }) |
| 1581 | +} |
| 1582 | + |
| 1583 | +func TestUnit_NodeLifecycle_probeUntilStable(t *testing.T) { |
| 1584 | + t.Parallel() |
| 1585 | + |
| 1586 | + t.Run("returns true immediately when threshold is zero, skipping probe", func(t *testing.T) { |
| 1587 | + t.Parallel() |
| 1588 | + rpc := newMockRPCClient[ID, Head](t) |
| 1589 | + // ClientVersion is intentionally NOT mocked: probing must be entirely skipped. |
| 1590 | + node := newTestNode(t, testNodeOpts{ |
| 1591 | + rpc: rpc, |
| 1592 | + config: testNodeConfig{ |
| 1593 | + pollSuccessThreshold: 0, |
| 1594 | + pollInterval: tests.TestInterval, |
| 1595 | + }, |
| 1596 | + }) |
| 1597 | + result := node.probeUntilStable(t.Context(), logger.Test(t)) |
| 1598 | + assert.True(t, result) |
| 1599 | + }) |
| 1600 | + t.Run("returns false when context is already cancelled", func(t *testing.T) { |
| 1601 | + t.Parallel() |
| 1602 | + rpc := newMockRPCClient[ID, Head](t) |
| 1603 | + // ClientVersion must never be called: ctx is done before the first timer fires. |
| 1604 | + node := newTestNode(t, testNodeOpts{ |
| 1605 | + rpc: rpc, |
| 1606 | + config: testNodeConfig{ |
| 1607 | + pollSuccessThreshold: 2, |
| 1608 | + pollInterval: tests.TestInterval, |
| 1609 | + }, |
| 1610 | + }) |
| 1611 | + ctx, cancel := context.WithCancel(t.Context()) |
| 1612 | + cancel() |
| 1613 | + result := node.probeUntilStable(ctx, logger.Test(t)) |
| 1614 | + assert.False(t, result) |
| 1615 | + }) |
| 1616 | + t.Run("returns false when first poll fails", func(t *testing.T) { |
| 1617 | + t.Parallel() |
| 1618 | + rpc := newMockRPCClient[ID, Head](t) |
| 1619 | + lggr, observedLogs := logger.TestObserved(t, zap.WarnLevel) |
| 1620 | + node := newTestNode(t, testNodeOpts{ |
| 1621 | + rpc: rpc, |
| 1622 | + lggr: lggr, |
| 1623 | + config: testNodeConfig{ |
| 1624 | + pollSuccessThreshold: 2, |
| 1625 | + pollInterval: tests.TestInterval, |
| 1626 | + }, |
| 1627 | + }) |
| 1628 | + rpc.On("ClientVersion", mock.Anything).Return("", errors.New("rpc unavailable")).Once() |
| 1629 | + result := node.probeUntilStable(t.Context(), lggr) |
| 1630 | + assert.False(t, result) |
| 1631 | + tests.AssertLogEventually(t, observedLogs, "Recovery probe poll failed; restarting redial") |
| 1632 | + }) |
| 1633 | + t.Run("returns true when all threshold polls succeed", func(t *testing.T) { |
| 1634 | + t.Parallel() |
| 1635 | + rpc := newMockRPCClient[ID, Head](t) |
| 1636 | + lggr, observedLogs := logger.TestObserved(t, zap.DebugLevel) |
| 1637 | + const threshold = 3 |
| 1638 | + node := newTestNode(t, testNodeOpts{ |
| 1639 | + rpc: rpc, |
| 1640 | + lggr: lggr, |
| 1641 | + config: testNodeConfig{ |
| 1642 | + pollSuccessThreshold: threshold, |
| 1643 | + pollInterval: tests.TestInterval, |
| 1644 | + }, |
| 1645 | + }) |
| 1646 | + rpc.On("ClientVersion", mock.Anything).Return("v1.0.0", nil).Times(threshold) |
| 1647 | + result := node.probeUntilStable(t.Context(), lggr) |
| 1648 | + assert.True(t, result) |
| 1649 | + tests.AssertLogCountEventually(t, observedLogs, "Recovery probe poll succeeded", threshold) |
| 1650 | + }) |
| 1651 | + t.Run("returns false when a later probe poll fails, logging correct successesSoFar", func(t *testing.T) { |
| 1652 | + t.Parallel() |
| 1653 | + rpc := newMockRPCClient[ID, Head](t) |
| 1654 | + lggr, observedLogs := logger.TestObserved(t, zap.DebugLevel) |
| 1655 | + const threshold = 3 |
| 1656 | + node := newTestNode(t, testNodeOpts{ |
| 1657 | + rpc: rpc, |
| 1658 | + lggr: lggr, |
| 1659 | + config: testNodeConfig{ |
| 1660 | + pollSuccessThreshold: threshold, |
| 1661 | + pollInterval: tests.TestInterval, |
| 1662 | + }, |
| 1663 | + }) |
| 1664 | + rpc.On("ClientVersion", mock.Anything).Return("v1.0.0", nil).Times(threshold - 1) |
| 1665 | + rpc.On("ClientVersion", mock.Anything).Return("", errors.New("rpc unavailable")).Once() |
| 1666 | + result := node.probeUntilStable(t.Context(), lggr) |
| 1667 | + assert.False(t, result) |
| 1668 | + // threshold-1 successes logged before the failure |
| 1669 | + tests.AssertLogCountEventually(t, observedLogs, "Recovery probe poll succeeded", threshold-1) |
| 1670 | + tests.AssertLogEventually(t, observedLogs, "Recovery probe poll failed; restarting redial") |
| 1671 | + }) |
1498 | 1672 | } |
1499 | 1673 |
|
1500 | 1674 | func TestUnit_NodeLifecycle_invalidChainIDLoop(t *testing.T) { |
|
0 commit comments