Skip to content

Commit ade086e

Browse files
committed
x
1 parent e23b74b commit ade086e

10 files changed

Lines changed: 93 additions & 47 deletions

File tree

apps/evm/go.mod

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@ module github.com/evstack/ev-node/apps/evm
22

33
go 1.25.7
44

5-
// replace (
6-
// github.com/evstack/ev-node => ../../
7-
// github.com/evstack/ev-node/execution/evm => ../../execution/evm
8-
// )
5+
replace (
6+
github.com/evstack/ev-node => ../../
7+
github.com/evstack/ev-node/execution/evm => ../../execution/evm
8+
)
99

1010
require (
1111
github.com/ethereum/go-ethereum v1.17.2

apps/evm/go.sum

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -472,12 +472,8 @@ github.com/ethereum/go-bigmodexpfix v0.0.0-20250911101455-f9e208c548ab h1:rvv6MJ
472472
github.com/ethereum/go-bigmodexpfix v0.0.0-20250911101455-f9e208c548ab/go.mod h1:IuLm4IsPipXKF7CW5Lzf68PIbZ5yl7FFd74l/E0o9A8=
473473
github.com/ethereum/go-ethereum v1.17.2 h1:ag6geu0kn8Hv5FLKTpH+Hm2DHD+iuFtuqKxEuwUsDOI=
474474
github.com/ethereum/go-ethereum v1.17.2/go.mod h1:KHcRXfGOUfUmKg51IhQ0IowiqZ6PqZf08CMtk0g5K1o=
475-
github.com/evstack/ev-node v1.1.0-rc.1 h1:NtPuuDLqN2h4/edu5zxRlZAxmLkTG3ncXBO2PlCDvVs=
476-
github.com/evstack/ev-node v1.1.0-rc.1/go.mod h1:6rhWWzuyiqNn/erDmWCk1aLxUuQphyOGIRq56/smSyk=
477475
github.com/evstack/ev-node/core v1.0.0 h1:s0Tx0uWHme7SJn/ZNEtee4qNM8UO6PIxXnHhPbbKTz8=
478476
github.com/evstack/ev-node/core v1.0.0/go.mod h1:n2w/LhYQTPsi48m6lMj16YiIqsaQw6gxwjyJvR+B3sY=
479-
github.com/evstack/ev-node/execution/evm v1.0.0 h1:UTAdCrnPsLoGzSgsBx4Kv76jkXpMmHBIpNv3MxyzWPo=
480-
github.com/evstack/ev-node/execution/evm v1.0.0/go.mod h1:UrqkiepfTMiot6M8jnswgu3VU8SSucZpaMIHIl22/1A=
481477
github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
482478
github.com/fatih/color v1.10.0/go.mod h1:ELkj/draVOlAH/xkhN6mQ50Qd0MPOk5AAr3maGEBuJM=
483479
github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk=

docs/guides/raft_production.md

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ Raft is configured via CLI flags or the `config.toml` file under the `[raft]` (o
3333
| `--evnode.raft.raft_addr` | `raft.raft_addr` | TCP address for Raft transport. | `0.0.0.0:5001` (Bind to private IP) |
3434
| `--evnode.raft.raft_dir` | `raft.raft_dir` | Directory for Raft data. | `/data/raft` (Must be persistent) |
3535
| `--evnode.raft.peers` | `raft.peers` | Comma-separated list of peer addresses in format `nodeID@host:port`. | `node-1@10.0.0.1:5001,node-2@10.0.0.2:5001,node-3@10.0.0.3:5001` |
36-
| `--evnode.raft.bootstrap` | `raft.bootstrap` | Bootstrap the cluster. **Required** for initial setup. | `true` (See Limitations) |
36+
| `--evnode.raft.bootstrap` | `raft.bootstrap` | Compatibility flag. Startup mode is selected automatically from persisted raft configuration state. | optional |
3737

3838
### Timeout Tuning
3939

@@ -55,11 +55,15 @@ Ideally, a failover should complete within `2 * BlockTime` to minimize user impa
5555
5656
## Production Deployment Principles
5757

58-
### 1. Static Peering & Bootstrap
59-
Current implementation requires **Bootstrap Mode** (`--evnode.raft.bootstrap=true`) for all nodes participating in the cluster initialization.
60-
* **All nodes** should list the full set of peers in `--evnode.raft.peers`.
58+
### 1. Static Peering & Automatic Startup Mode
59+
Use static peering with automatic mode selection from local raft configuration:
60+
* If local raft configuration already exists in `--evnode.raft.raft_dir`, the node starts in rejoin mode.
61+
* If no local raft configuration exists yet, the node bootstraps from configured peers.
62+
* `--evnode.raft.bootstrap` is retained for compatibility but does not control mode selection.
63+
* **All configured cluster members** should list the full set of peers in `--evnode.raft.peers`.
6164
* The `peers` list format is strict: `NodeID@Host:Port`.
62-
* **Limitation**: Dynamic addition of peers (Run-time Membership Changes) via RPC/CLI is not currently exposed. The cluster membership is static based on the initial bootstrap configuration.
65+
* **Limitation**: Dynamic addition of peers (run-time membership changes) via RPC/CLI is not currently exposed.
66+
* **Not supported**: Joining an existing cluster as a brand-new node that was not part of the initial static membership.
6367

6468
### 2. Infrastructure Requirements
6569
* **Encrypted Network (CRITICAL)**: Raft traffic is **unencrypted** (plain TCP). You **MUST** run the cluster inside a private network, VPN, or encrypted mesh (e.g., WireGuard, Tailscale). **Never expose Raft ports to the public internet**; doing so allows attackers to hijack the cluster consensus.
@@ -86,13 +90,13 @@ Monitor the following metrics (propagated via Prometheus if enabled):
8690

8791
```bash
8892
./ev-node start \
89-
--node.aggregator \
90-
--raft.enable \
91-
--raft.node_id="node-1" \
92-
--raft.raft_addr="0.0.0.0:5001" \
93-
--raft.raft_dir="/var/lib/ev-node/raft" \
94-
--raft.bootstrap=true \
95-
--raft.peers="node-1@10.0.1.1:5001,node-2@10.0.1.2:5001,node-3@10.0.1.3:5001" \
96-
--p2p.listen_address="/ip4/0.0.0.0/tcp/26656" \
93+
--rollkit.node.aggregator=true \
94+
--evnode.raft.enable=true \
95+
--evnode.raft.node_id="node-1" \
96+
--evnode.raft.raft_addr="0.0.0.0:5001" \
97+
--evnode.raft.raft_dir="/var/lib/ev-node/raft" \
98+
--evnode.raft.bootstrap=true \
99+
--evnode.raft.peers="node-1@10.0.1.1:5001,node-2@10.0.1.2:5001,node-3@10.0.1.3:5001" \
100+
--rollkit.p2p.listen_address="/ip4/0.0.0.0/tcp/26656" \
97101
...other flags
98102
```

docs/learn/config.md

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1321,7 +1321,7 @@ _Constant:_ `FlagRaftDir`
13211321
### Raft Bootstrap
13221322

13231323
**Description:**
1324-
If true, bootstraps a new Raft cluster. Only set this on the very first node when initializing a new cluster.
1324+
Legacy compatibility flag. Startup mode is now auto-selected from persisted raft configuration state, so this flag is not used to choose bootstrap vs rejoin.
13251325

13261326
**YAML:**
13271327

@@ -1352,6 +1352,16 @@ raft:
13521352
_Default:_ `""` (empty)
13531353
_Constant:_ `FlagRaftPeers`
13541354

1355+
### Raft Startup Mode
1356+
1357+
Raft startup mode is selected automatically from local raft configuration state:
1358+
1359+
* If the node already has persisted raft configuration in `raft.raft_dir`, it starts in rejoin mode.
1360+
* If no raft configuration exists yet, it bootstraps a cluster from configured peers.
1361+
* `raft.bootstrap` is retained for compatibility but does not control mode selection.
1362+
1363+
`--evnode.raft.rejoin` has been removed.
1364+
13551365
### Raft Snap Count
13561366

13571367
**Description:**

node/failover.go

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -183,22 +183,29 @@ func setupFailoverState(
183183
}, nil
184184
}
185185

186+
// shouldStartSyncInPublisherMode avoids startup deadlock when a raft leader boots
187+
// with empty sync stores and no peer can serve height 1 yet.
186188
func (f *failoverState) shouldStartSyncInPublisherMode(ctx context.Context) bool {
187189
if !f.isAggregator || f.raftNode == nil || !f.raftNode.IsLeader() {
188190
return false
189191
}
190192

191-
height, err := f.store.Height(ctx)
193+
storeHeight, err := f.store.Height(ctx)
192194
if err != nil {
193-
f.logger.Warn().Err(err).Msg("cannot determine local height; keeping blocking sync startup")
195+
f.logger.Warn().Err(err).Msg("cannot determine store height; keeping blocking sync startup")
194196
return false
195197
}
196-
if height > 0 {
198+
headerHeight := f.headerSyncService.Store().Height()
199+
dataHeight := f.dataSyncService.Store().Height()
200+
if headerHeight > 0 || dataHeight > 0 {
197201
return false
198202
}
199203

200204
f.logger.Info().
201-
Msg("raft leader with empty store: starting sync services in publisher mode")
205+
Uint64("store_height", storeHeight).
206+
Uint64("header_height", headerHeight).
207+
Uint64("data_height", dataHeight).
208+
Msg("raft-enabled aggregator with empty sync stores: starting sync services in publisher mode")
202209
return true
203210
}
204211

pkg/config/config.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -400,7 +400,7 @@ type RaftConfig struct {
400400
NodeID string `mapstructure:"node_id" yaml:"node_id" comment:"Unique identifier for this node in the Raft cluster"`
401401
RaftAddr string `mapstructure:"raft_addr" yaml:"raft_addr" comment:"Address for Raft communication (host:port)"`
402402
RaftDir string `mapstructure:"raft_dir" yaml:"raft_dir" comment:"Directory for Raft logs and snapshots"`
403-
Bootstrap bool `mapstructure:"bootstrap" yaml:"bootstrap" comment:"Bootstrap a new Raft cluster (only for the first node)"`
403+
Bootstrap bool `mapstructure:"bootstrap" yaml:"bootstrap" comment:"Bootstrap a new static Raft cluster during initial bring-up"`
404404
Peers string `mapstructure:"peers" yaml:"peers" comment:"Comma-separated list of peer Raft addresses (nodeID@host:port)"`
405405
SnapCount uint64 `mapstructure:"snap_count" yaml:"snap_count" comment:"Number of log entries between snapshots"`
406406
SendTimeout time.Duration `mapstructure:"send_timeout" yaml:"send_timeout" comment:"Max duration to wait for a message to be sent to a peer"`
@@ -646,7 +646,7 @@ func AddFlags(cmd *cobra.Command) {
646646
cmd.Flags().String(FlagRaftNodeID, def.Raft.NodeID, "unique identifier for this node in the Raft cluster")
647647
cmd.Flags().String(FlagRaftAddr, def.Raft.RaftAddr, "address for Raft communication (host:port)")
648648
cmd.Flags().String(FlagRaftDir, def.Raft.RaftDir, "directory for Raft logs and snapshots")
649-
cmd.Flags().Bool(FlagRaftBootstrap, def.Raft.Bootstrap, "bootstrap a new Raft cluster (only for the first node)")
649+
cmd.Flags().Bool(FlagRaftBootstrap, def.Raft.Bootstrap, "bootstrap a new static Raft cluster during initial bring-up")
650650
cmd.Flags().String(FlagRaftPeers, def.Raft.Peers, "comma-separated list of peer Raft addresses (nodeID@host:port)")
651651
cmd.Flags().Uint64(FlagRaftSnapCount, def.Raft.SnapCount, "number of log entries between snapshots")
652652
cmd.Flags().Duration(FlagRaftSendTimeout, def.Raft.SendTimeout, "max duration to wait for a message to be sent to a peer")

pkg/config/config_test.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,18 @@ func TestAddFlags(t *testing.T) {
122122
assertFlagValue(t, flags, FlagRPCAddress, DefaultConfig().RPC.Address)
123123
assertFlagValue(t, flags, FlagRPCEnableDAVisualization, DefaultConfig().RPC.EnableDAVisualization)
124124

125+
// Raft flags
126+
assertFlagValue(t, flags, FlagRaftEnable, DefaultConfig().Raft.Enable)
127+
assertFlagValue(t, flags, FlagRaftNodeID, DefaultConfig().Raft.NodeID)
128+
assertFlagValue(t, flags, FlagRaftAddr, DefaultConfig().Raft.RaftAddr)
129+
assertFlagValue(t, flags, FlagRaftDir, DefaultConfig().Raft.RaftDir)
130+
assertFlagValue(t, flags, FlagRaftBootstrap, DefaultConfig().Raft.Bootstrap)
131+
assertFlagValue(t, flags, FlagRaftPeers, DefaultConfig().Raft.Peers)
132+
assertFlagValue(t, flags, FlagRaftSnapCount, DefaultConfig().Raft.SnapCount)
133+
assertFlagValue(t, flags, FlagRaftSendTimeout, DefaultConfig().Raft.SendTimeout)
134+
assertFlagValue(t, flags, FlagRaftHeartbeatTimeout, DefaultConfig().Raft.HeartbeatTimeout)
135+
assertFlagValue(t, flags, FlagRaftLeaderLeaseTimeout, DefaultConfig().Raft.LeaderLeaseTimeout)
136+
125137
// Pruning flags
126138
assertFlagValue(t, flags, FlagPruningMode, DefaultConfig().Pruning.Mode)
127139
assertFlagValue(t, flags, FlagPruningKeepRecent, DefaultConfig().Pruning.KeepRecent)

pkg/raft/node.go

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -109,13 +109,8 @@ func (n *Node) Start(_ context.Context) error {
109109
if n == nil {
110110
return nil
111111
}
112-
if !n.config.Bootstrap {
113-
// it is intended to fail fast here. at this stage only bootstrap mode is supported.
114-
return fmt.Errorf("raft cluster requires bootstrap mode")
115-
}
116-
117112
if future := n.raft.GetConfiguration(); future.Error() == nil && len(future.Configuration().Servers) > 0 {
118-
n.logger.Info().Msg("cluster already bootstrapped, skipping")
113+
n.logger.Info().Msg("raft node started with existing local state")
119114
return nil
120115
}
121116

pkg/raft/node_test.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package raft
22

33
import (
4+
"context"
45
"errors"
56
"testing"
67

@@ -108,3 +109,8 @@ func TestDeduplicateServers(t *testing.T) {
108109
})
109110
}
110111
}
112+
113+
func TestNodeStartNilNoop(t *testing.T) {
114+
var node *Node
115+
require.NoError(t, node.Start(context.Background()))
116+
}

test/e2e/failover_e2e_test.go

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ func TestLeaseFailoverE2E(t *testing.T) {
101101
t.Log("Node1 is up")
102102
}()
103103

104-
// Start node2 (bootstrap node)
104+
// Start node2
105105
go func() {
106106
t.Log("Starting Node2")
107107
p2pPeers := node1P2PAddr + "," + node3P2PAddr
@@ -110,7 +110,7 @@ func TestLeaseFailoverE2E(t *testing.T) {
110110
t.Log("Node2 is up")
111111
}()
112112

113-
// Start node3 (bootstrap node)
113+
// Start node3
114114
node3EthAddr := fmt.Sprintf("http://127.0.0.1:%s", fullNode3EthPort)
115115
go func() {
116116
t.Log("Starting Node3")
@@ -181,7 +181,7 @@ func TestLeaseFailoverE2E(t *testing.T) {
181181
t.Log("Restarted old leader to sync with cluster: " + oldLeader)
182182

183183
if IsNodeUp(t, oldDetails.rpcAddr, NodeStartupTimeout) {
184-
clusterNodes.Set(oldLeader, oldDetails.rpcAddr, restartedNodeProcess, oldDetails.ethAddr, oldDetails.raftAddr, "", oldDetails.p2pPeerAddr, oldDetails.engineURL, oldDetails.ethAddr)
184+
clusterNodes.Set(oldLeader, oldDetails.rpcAddr, restartedNodeProcess, oldDetails.ethAddr, oldDetails.raftAddr, oldDetails.p2pAddr, oldDetails.p2pPeerAddr, oldDetails.engineURL, oldDetails.ethAddr)
185185
} else {
186186
t.Log("+++ old leader did not recover on restart. Skipping node verification")
187187
}
@@ -298,7 +298,7 @@ func TestHASequencerRollingRestartE2E(t *testing.T) {
298298
t.Log("Node1 is up")
299299
}()
300300

301-
// Start node2 (bootstrap node)
301+
// Start node2
302302
go func() {
303303
t.Log("Starting Node2")
304304
p2pPeers := node1P2PAddr + "," + node3P2PAddr
@@ -307,7 +307,7 @@ func TestHASequencerRollingRestartE2E(t *testing.T) {
307307
t.Log("Node2 is up")
308308
}()
309309

310-
// Start node3 (bootstrap node)
310+
// Start node3
311311
node3EthAddr := fmt.Sprintf("http://127.0.0.1:%s", fullNode3EthPort)
312312
go func() {
313313
t.Log("Starting Node3")
@@ -745,15 +745,15 @@ func setupRaftSequencerNode(
745745
workDir, nodeID, raftAddr, jwtSecret, genesisHash, daAddress, bootstrapDir string,
746746
allRaftClusterMembers []string,
747747
p2pPeers, rpcAddr, p2pAddr, engineURL, ethURL string,
748-
bootstrap bool,
748+
raftBootstrap bool,
749749
passphraseFile string,
750750
) *os.Process {
751751
t.Helper()
752752
nodeHome := filepath.Join(workDir, nodeID)
753753
raftDir := filepath.Join(nodeHome, "raft")
754754

755755
jwtSecretFile := filepath.Join(nodeHome, "jwt-secret.hex")
756-
if bootstrap {
756+
if bootstrapDir != "" {
757757
initChain(t, sut, nodeHome)
758758
jwtSecretFile = createJWTSecretFile(t, nodeHome, jwtSecret)
759759

@@ -788,7 +788,7 @@ func setupRaftSequencerNode(
788788
"--evnode.raft.node_id="+nodeID,
789789
"--evnode.raft.raft_addr="+raftAddr,
790790
"--evnode.raft.raft_dir="+raftDir,
791-
"--evnode.raft.bootstrap=true",
791+
fmt.Sprintf("--evnode.raft.bootstrap=%t", raftBootstrap),
792792
"--evnode.raft.peers="+strings.Join(raftPeers, ","),
793793
"--evnode.raft.snap_count=10",
794794
"--evnode.raft.send_timeout=100ms",
@@ -964,11 +964,27 @@ func leader(t require.TestingT, nodes map[string]*nodeDetails) (string, *nodeDet
964964
continue
965965
}
966966
resp, err := client.Get(details.rpcAddr + "/raft/node")
967-
require.NoError(t, err)
968-
var status nodeStatus
969-
require.NoError(t, json.NewDecoder(resp.Body).Decode(&status))
970-
_ = resp.Body.Close()
971-
if status.IsLeader {
967+
if err != nil {
968+
continue
969+
}
970+
971+
isLeader := false
972+
func() {
973+
defer resp.Body.Close()
974+
if resp.StatusCode != http.StatusOK {
975+
return
976+
}
977+
978+
var status nodeStatus
979+
if err := json.NewDecoder(resp.Body).Decode(&status); err != nil {
980+
return
981+
}
982+
983+
if status.IsLeader {
984+
isLeader = true
985+
}
986+
}()
987+
if isLeader {
972988
return node, details
973989
}
974990
}

0 commit comments

Comments
 (0)