Skip to content

Commit 109d9d9

Browse files
erigon-copilot[bot]Giulio2002AskAlexSharovanacrolixyperbasis
authored
[SharovBot] engineapi: fix flaky tests caused by TOCTOU port races (#20328)
## Problem Tests in `execution/engineapi` were failing intermittently in the `race-tests / tests-linux (ubuntu-latest, execution-other)` CI job: - `TestEngineApiHighGasContractsFillBlock` → `listen tcp 127.0.0.1:46053: bind: address already in use` - `TestEngineApiBlockGasOverflowSpillsToNextBlock` → `empty response from JSON-RPC server` Observed on commits a9db702 and acc9898 (2 out of 10 recent commits). **Root cause:** The test infrastructure used `freeport.NextFreePort()` to pick a free port, then later called `net.Listen()` with that port. This TOCTOU (time-of-check/time-of-use) race meant another concurrent test could bind the same port in between, causing `address already in use`. Separately, the server might not be ready when the first client request arrived, causing `empty response from JSON-RPC server`. ## Fix Pre-create `net.Listener` instances on port `:0` (kernel-assigned, guaranteed free and already bound) before any server starts, then hand the bound socket directly to `StartHTTPEndpoint`. The kernel guarantees no other process can steal the port once it's bound. **Changes:** - `node/endpoints.go`: add optional `Listener` field to `HttpEndpointConfig`; if set, skip the `net.Listen` call and use the pre-bound socket - `cmd/rpcdaemon/cli/httpcfg/http_cfg.go`: add `HttpListener` / `AuthRpcListener` fields to `HttpCfg` - `cmd/rpcdaemon/cli/config.go`: wire up pre-created listeners in both the regular RPC and engine API listener paths - `execution/engineapi/engineapitester/engine_api_tester.go`: replace `freeport.NextFreePort()` with `net.Listen("tcp", "127.0.0.1:0")` for the JSON-RPC and engine API ports ## Testing - `go build ./...` ✅ - No `*_test.go` files modified ✅ - 5 consecutive `go test -race ./execution/engineapi/...` runs all pass ✅ CI job reference: https://github.com/erigontech/erigon/actions/runs/23983504381/job/69951584262 --------- Co-authored-by: erigon-copilot[bot] <erigon-copilot[bot]@users.noreply.github.com> Co-authored-by: Giulio Rebuffo <giulio.rebuffo@gmail.com> Co-authored-by: Alexey Sharov <AskAlexSharov@gmail.com> Co-authored-by: Matt Joiner <anacrolix@gmail.com> Co-authored-by: yperbasis <andrey.ashikhmin@gmail.com> Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com> Co-authored-by: Andrew Ashikhmin <34320705+yperbasis@users.noreply.github.com>
1 parent 67ece3f commit 109d9d9

4 files changed

Lines changed: 45 additions & 14 deletions

File tree

cmd/rpcdaemon/cli/config.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -810,6 +810,7 @@ func startRegularRpcServer(ctx context.Context, cfg *httpcfg.HttpCfg, rpcAPI []r
810810
}
811811
listener, httpAddr, err := node.StartHTTPEndpoint(httpEndpoint, &node.HttpEndpointConfig{
812812
Timeouts: cfg.HTTPTimeouts,
813+
Listener: cfg.HttpListener,
813814
}, apiHandler)
814815
if err != nil {
815816
return fmt.Errorf("could not start RPC api: %w", err)
@@ -995,6 +996,7 @@ func createEngineListener(cfg *httpcfg.HttpCfg, engineApi []rpc.API, logger log.
995996

996997
engineListener, engineAddr, err := node.StartHTTPEndpoint(engineHttpEndpoint, &node.HttpEndpointConfig{
997998
Timeouts: cfg.AuthRpcTimeouts,
999+
Listener: cfg.AuthRpcListener,
9981000
}, engineApiHandler)
9991001
if err != nil {
10001002
return nil, nil, "", fmt.Errorf("could not start RPC api: %w", err)

cmd/rpcdaemon/cli/httpcfg/http_cfg.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
package httpcfg
1818

1919
import (
20+
"net"
2021
"time"
2122

2223
"github.com/erigontech/erigon/db/datadir"
@@ -114,4 +115,9 @@ type HttpCfg struct {
114115

115116
RpcTxSyncDefaultTimeout time.Duration // Default timeout for eth_sendRawTransactionSync
116117
RpcTxSyncMaxTimeout time.Duration // Maximum timeout for eth_sendRawTransactionSync
118+
119+
// Pre-created listeners for testing (avoids TOCTOU port races).
120+
// When set, these listeners are passed to StartHTTPEndpoint instead of binding a new port.
121+
HttpListener net.Listener
122+
AuthRpcListener net.Listener
117123
}

execution/engineapi/engineapitester/engine_api_tester.go

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"errors"
2323
"fmt"
2424
"math/big"
25+
"net"
2526
"path"
2627
"testing"
2728
"time"
@@ -33,7 +34,6 @@ import (
3334
"github.com/erigontech/erigon/cmd/rpcdaemon/cli/httpcfg"
3435
"github.com/erigontech/erigon/common"
3536
"github.com/erigontech/erigon/common/crypto"
36-
"github.com/erigontech/erigon/common/freeport"
3737
"github.com/erigontech/erigon/common/hexutil"
3838
"github.com/erigontech/erigon/common/log/v3"
3939
"github.com/erigontech/erigon/common/testlog"
@@ -117,28 +117,45 @@ func DefaultEngineApiTesterGenesis(t *testing.T) (*types.Genesis, *ecdsa.Private
117117
return genesis, coinbasePrivKey
118118
}
119119

120+
// localhostEphemeral asks the kernel to pick a free TCP port on the loopback
121+
// interface. Used by the engine-api test harness to avoid TOCTOU races with
122+
// concurrent tests selecting the same port.
123+
const localhostEphemeral = "127.0.0.1:0"
124+
120125
func InitialiseEngineApiTester(t testing.TB, args EngineApiTesterInitArgs) EngineApiTester {
121126
ctx := t.Context()
122127
logger := args.Logger
123128
dirs := datadir.New(args.DataDir)
124129
genesis := args.Genesis
125-
sentryPort, err := freeport.NextFreePort()
126-
require.NoError(t, err)
127-
engineApiPort, err := freeport.NextFreePort()
130+
131+
// Pre-bind the HTTP listeners on a kernel-assigned port to avoid TOCTOU
132+
// port races with concurrent tests. Each listener is held open until the
133+
// matching server takes ownership of the socket. The t.Cleanup calls are
134+
// safety nets if InitialiseEngineApiTester aborts before hand-off; on the
135+
// happy path the http server's Shutdown closes the listener first and the
136+
// cleanup is a silent no-op. The sentry/P2P stack picks its own kernel-
137+
// assigned port directly via its config below, so no pre-bind there.
138+
jsonRpcListener, err := net.Listen("tcp", localhostEphemeral)
128139
require.NoError(t, err)
129-
jsonRpcPort, err := freeport.NextFreePort()
140+
t.Cleanup(func() { _ = jsonRpcListener.Close() })
141+
jsonRpcPort := jsonRpcListener.Addr().(*net.TCPAddr).Port
142+
engineApiListener, err := net.Listen("tcp", localhostEphemeral)
130143
require.NoError(t, err)
131-
logger.Debug("[engine-api-tester] selected ports", "sentry", sentryPort, "engineApi", engineApiPort, "jsonRpc", jsonRpcPort)
144+
t.Cleanup(func() { _ = engineApiListener.Close() })
145+
engineApiPort := engineApiListener.Addr().(*net.TCPAddr).Port
146+
logger.Debug("[engine-api-tester] selected ports", "engineApi", engineApiPort, "jsonRpc", jsonRpcPort)
132147

133148
httpConfig := httpcfg.HttpCfg{
134149
Enabled: true,
135150
HttpServerEnabled: true,
136151
WebsocketEnabled: true,
137152
HttpListenAddress: "127.0.0.1",
138153
HttpPort: jsonRpcPort,
154+
HttpListener: jsonRpcListener,
139155
API: []string{"eth"},
140156
AuthRpcHTTPListenAddress: "127.0.0.1",
141157
AuthRpcPort: engineApiPort,
158+
AuthRpcListener: engineApiListener,
142159
JWTSecretPath: path.Join(args.DataDir, "jwt.hex"),
143160
ReturnDataLimit: 100_000,
144161
EvmCallTimeout: rpccfg.DefaultEvmCallTimeout,
@@ -155,13 +172,13 @@ func InitialiseEngineApiTester(t testing.TB, args EngineApiTesterInitArgs) Engin
155172
Dirs: dirs,
156173
Http: httpConfig,
157174
P2P: p2p.Config{
158-
ListenAddr: fmt.Sprintf("127.0.0.1:%d", sentryPort),
175+
ListenAddr: localhostEphemeral,
159176
MaxPeers: 1,
160177
MaxPendingPeers: 1,
161178
NoDiscovery: true,
162179
NoDial: true,
163180
ProtocolVersion: []uint{direct.ETH68},
164-
AllowedPorts: []uint{uint(sentryPort)},
181+
AllowedPorts: []uint{0},
165182
PrivateKey: nodeKey,
166183
},
167184
}

node/endpoints.go

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ type HttpEndpointConfig struct {
4242
HTTPS bool
4343
CertFile string
4444
KeyFile string
45+
Listener net.Listener // optional pre-created listener; if set, StartHTTPEndpoint uses it instead of creating a new one
4546
}
4647

4748
// StartHTTPEndpoint starts the HTTP RPC endpoint.
@@ -51,12 +52,17 @@ func StartHTTPEndpoint(urlEndpoint string, cfg *HttpEndpointConfig, handler http
5152
listener net.Listener
5253
err error
5354
)
54-
socketUrl, err := url.Parse(urlEndpoint)
55-
if err != nil {
56-
return nil, nil, fmt.Errorf("malformatted http listen url %s: %w", urlEndpoint, err)
57-
}
58-
if listener, err = net.Listen(socketUrl.Scheme, socketUrl.Host+socketUrl.EscapedPath()); err != nil {
59-
return nil, nil, err
55+
if cfg.Listener != nil {
56+
listener = cfg.Listener
57+
} else {
58+
var socketUrl *url.URL
59+
socketUrl, err = url.Parse(urlEndpoint)
60+
if err != nil {
61+
return nil, nil, fmt.Errorf("malformed http listen url %s: %w", urlEndpoint, err)
62+
}
63+
if listener, err = net.Listen(socketUrl.Scheme, socketUrl.Host+socketUrl.EscapedPath()); err != nil {
64+
return nil, nil, err
65+
}
6066
}
6167
// make sure timeout values are meaningful
6268
CheckTimeouts(&cfg.Timeouts)

0 commit comments

Comments
 (0)