Skip to content

Commit cb54f32

Browse files
committed
fix(tests): eliminate port collisions and flaky timing in parallel test suites
Replace ad-hoc port allocation with a ReservePort/ReleaseAllPorts pattern and assign each Ginkgo parallel process a dedicated, non-overlapping port range in [61000, 65534] — above the Linux ephemeral range (32768–60999) — to prevent the OS from stealing ports between allocation and bind.
1 parent a0e96ab commit cb54f32

13 files changed

Lines changed: 208 additions & 58 deletions

File tree

src/code.cloudfoundry.org/gorouter/common/component_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,8 @@ var _ = Describe("Component", func() {
167167
var natsRunner *test_util.NATSRunner
168168

169169
BeforeEach(func() {
170-
natsPort := test_util.NextAvailPort()
170+
natsPort := test_util.ReservePort()
171+
test_util.ReleasePort(natsPort)
171172
natsRunner = test_util.NewNATSRunner(int(natsPort))
172173
natsRunner.Start()
173174
mbusClient = natsRunner.MessageBus

src/code.cloudfoundry.org/gorouter/integration/common_integration_test.go

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -63,15 +63,17 @@ func (s *testState) SetOnlyTrustClientCACertsTrue() {
6363

6464
func NewTestState() *testState {
6565
// TODO: don't hide so much behind these test_util methods
66-
cfg, clientTLSConfig := test_util.SpecSSLConfig(test_util.NextAvailPort(), test_util.NextAvailPort(), test_util.NextAvailPort(), test_util.NextAvailPort(), test_util.NextAvailPort(), test_util.NextAvailPort(), test_util.NextAvailPort())
66+
// Use ReservePort to keep listeners open until the gorouter process
67+
// starts, preventing other processes from grabbing these ports.
68+
cfg, clientTLSConfig := test_util.SpecSSLConfig(test_util.ReservePort(), test_util.ReservePort(), test_util.ReservePort(), test_util.ReservePort(), test_util.ReservePort(), test_util.ReservePort(), test_util.ReservePort())
6769
cfg.SkipSSLValidation = false
6870
cfg.RouteServicesHairpinning = false
6971
cfg.CipherString = "ECDHE-RSA-AES128-GCM-SHA256:ECDHE-RSA-AES256-GCM-SHA384"
7072

7173
// TODO: why these magic numbers?
7274
cfg.PruneStaleDropletsInterval = 2 * time.Second
7375
cfg.DropletStaleThreshold = 10 * time.Second
74-
cfg.StartResponseDelayInterval = 1 * time.Second
76+
cfg.StartResponseDelayInterval = 0
7577
cfg.EndpointTimeout = 15 * time.Second
7678
cfg.EndpointDialTimeout = 500 * time.Millisecond
7779
cfg.DrainTimeout = 200 * time.Millisecond
@@ -258,6 +260,10 @@ func (s *testState) registerAndWait(rm mbus.RegistryMessage) {
258260
func (s *testState) StartGorouter() *Session {
259261
Expect(s.cfg).NotTo(BeNil(), "set up test cfg before calling this function")
260262

263+
// Release NATS port first so the NATS server can bind it, while keeping
264+
// the other ports reserved until the gorouter starts.
265+
test_util.ReleasePort(s.cfg.Nats.Hosts[0].Port)
266+
261267
s.natsRunner = test_util.NewNATSRunner(int(s.cfg.Nats.Hosts[0].Port))
262268
s.natsRunner.Start()
263269

@@ -271,6 +277,10 @@ func (s *testState) StartGorouter() *Session {
271277
Expect(err).ToNot(HaveOccurred())
272278
Expect(os.WriteFile(cfgFile, cfgBytes, 0644)).To(Succeed())
273279

280+
// Release remaining reserved ports just before the gorouter process
281+
// starts, minimizing the TOCTOU window between release and bind.
282+
test_util.ReleaseAllPorts()
283+
274284
cmd := exec.Command(gorouterPath, "-c", cfgFile)
275285
s.gorouterSession, err = Start(cmd, GinkgoWriter, GinkgoWriter)
276286
Expect(err).ToNot(HaveOccurred())
@@ -297,6 +307,12 @@ func (s *testState) StartGorouterOrFail() {
297307
}
298308

299309
func (s *testState) StopAndCleanup() {
310+
// Stop router before NATS to prevent subscriber's ClosedCB from
311+
// firing log.Fatal → os.Exit(1), which kills the test proc.
312+
if s.gorouterSession != nil && s.gorouterSession.ExitCode() == -1 {
313+
Eventually(s.gorouterSession.Terminate(), 5).Should(Exit(0))
314+
}
315+
300316
if s.natsRunner != nil {
301317
s.natsRunner.Stop()
302318
}
@@ -308,10 +324,6 @@ func (s *testState) StopAndCleanup() {
308324

309325
os.RemoveAll(s.tmpdir)
310326

311-
if s.gorouterSession != nil && s.gorouterSession.ExitCode() == -1 {
312-
Eventually(s.gorouterSession.Terminate(), 5).Should(Exit(0))
313-
}
314-
315327
if s.fakeMetron != nil {
316328
s.StopMetron()
317329
}

src/code.cloudfoundry.org/gorouter/integration/gdpr_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ var _ = Describe("GDPR", func() {
6565
testState.EnableAccessLog()
6666
testState.cfg.Status.Pass = "pass"
6767
testState.cfg.Status.User = "user"
68-
testState.cfg.Status.Routes.Port = 6705
68+
testState.cfg.Status.Routes.Port = test_util.NextAvailPort()
6969
testState.cfg.Logging.DisableLogForwardedFor = true
7070
testState.StartGorouterOrFail()
7171

@@ -136,7 +136,7 @@ var _ = Describe("GDPR", func() {
136136
testState.EnableAccessLog()
137137
testState.cfg.Status.Pass = "pass"
138138
testState.cfg.Status.User = "user"
139-
testState.cfg.Status.Routes.Port = 6706
139+
testState.cfg.Status.Routes.Port = test_util.NextAvailPort()
140140
testState.cfg.Logging.DisableLogSourceIP = true
141141
testState.StartGorouterOrFail()
142142

src/code.cloudfoundry.org/gorouter/integration/main_test.go

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -65,29 +65,33 @@ var _ = Describe("Router Integration", func() {
6565
Expect(err).ToNot(HaveOccurred())
6666
cfgFile = filepath.Join(tmpdir, "config.yml")
6767

68-
statusPort = test_util.NextAvailPort()
69-
statusTLSPort = test_util.NextAvailPort()
70-
statusRoutesPort = test_util.NextAvailPort()
71-
proxyPort = test_util.NextAvailPort()
72-
natsPort = test_util.NextAvailPort()
73-
sslPort = test_util.NextAvailPort()
74-
routeServiceServerPort = test_util.NextAvailPort()
75-
68+
statusPort = test_util.ReservePort()
69+
statusTLSPort = test_util.ReservePort()
70+
statusRoutesPort = test_util.ReservePort()
71+
proxyPort = test_util.ReservePort()
72+
natsPort = test_util.ReservePort()
73+
sslPort = test_util.ReservePort()
74+
routeServiceServerPort = test_util.ReservePort()
75+
76+
test_util.ReleasePort(natsPort)
7677
natsRunner = test_util.NewNATSRunner(int(natsPort))
7778
natsRunner.Start()
7879
oauthServerURL = oauthServer.Addr()
7980
})
8081

8182
AfterEach(func() {
83+
test_util.ReleaseAllPorts()
84+
// Stop router before NATS to prevent subscriber's ClosedCB from
85+
// firing log.Fatal → os.Exit(1), which kills the test proc.
86+
if gorouterSession != nil && gorouterSession.ExitCode() == -1 {
87+
stopGorouter(gorouterSession)
88+
}
89+
8290
if natsRunner != nil {
8391
natsRunner.Stop()
8492
}
8593

8694
os.RemoveAll(tmpdir)
87-
88-
if gorouterSession != nil && gorouterSession.ExitCode() == -1 {
89-
stopGorouter(gorouterSession)
90-
}
9195
})
9296

9397
Context("when config is invalid", func() {
@@ -1047,7 +1051,7 @@ var _ = Describe("Router Integration", func() {
10471051
Describe("prometheus metrics", func() {
10481052
It("starts a prometheus https server", func() {
10491053
c := createConfig(statusPort, statusTLSPort, statusRoutesPort, proxyPort, routeServiceServerPort, cfgFile, defaultPruneInterval, defaultPruneThreshold, 0, false, 0, natsPort)
1050-
metricsPort := test_util.NextAvailPort()
1054+
metricsPort := test_util.ReservePort()
10511055
serverCAPath, serverCertPath, serverKeyPath, clientCert := tls_helpers.GenerateCaAndMutualTlsCerts()
10521056

10531057
c.Prometheus.Enabled = true
@@ -1421,6 +1425,7 @@ var _ = Describe("Router Integration", func() {
14211425
It("does not exit", func() {
14221426
writeConfig(cfg, cfgFile)
14231427

1428+
test_util.ReleaseAllPorts()
14241429
gorouterCmd := exec.Command(gorouterPath, "-c", cfgFile)
14251430
session, err := Start(gorouterCmd, GinkgoWriter, GinkgoWriter)
14261431
Expect(err).ToNot(HaveOccurred())

src/code.cloudfoundry.org/gorouter/integration/nats_test.go

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -36,27 +36,31 @@ var _ = Describe("NATS Integration", func() {
3636
Expect(err).ToNot(HaveOccurred())
3737
cfgFile = filepath.Join(tmpdir, "config.yml")
3838

39-
statusPort = test_util.NextAvailPort()
40-
statusTLSPort = test_util.NextAvailPort()
41-
statusRoutesPort = test_util.NextAvailPort()
42-
proxyPort = test_util.NextAvailPort()
43-
natsPort = test_util.NextAvailPort()
44-
routeServiceServerPort = test_util.NextAvailPort()
45-
39+
statusPort = test_util.ReservePort()
40+
statusTLSPort = test_util.ReservePort()
41+
statusRoutesPort = test_util.ReservePort()
42+
proxyPort = test_util.ReservePort()
43+
natsPort = test_util.ReservePort()
44+
routeServiceServerPort = test_util.ReservePort()
45+
46+
test_util.ReleasePort(natsPort)
4647
natsRunner = test_util.NewNATSRunner(int(natsPort))
4748
natsRunner.Start()
4849
})
4950

5051
AfterEach(func() {
52+
test_util.ReleaseAllPorts()
53+
// Stop router before NATS to prevent subscriber's ClosedCB from
54+
// firing log.Fatal → os.Exit(1), which kills the test proc.
55+
if gorouterSession != nil && gorouterSession.ExitCode() == -1 {
56+
stopGorouter(gorouterSession)
57+
}
58+
5159
if natsRunner != nil {
5260
natsRunner.Stop()
5361
}
5462

5563
os.RemoveAll(tmpdir)
56-
57-
if gorouterSession != nil && gorouterSession.ExitCode() == -1 {
58-
stopGorouter(gorouterSession)
59-
}
6064
})
6165

6266
It("has Nats connectivity", func() {
@@ -162,7 +166,7 @@ var _ = Describe("NATS Integration", func() {
162166
)
163167

164168
BeforeEach(func() {
165-
natsPort2 = test_util.NextAvailPort()
169+
natsPort2 = test_util.ReservePort()
166170
natsRunner2 = test_util.NewNATSRunner(int(natsPort2))
167171

168172
pruneInterval = 2 * time.Second
@@ -222,7 +226,7 @@ var _ = Describe("NATS Integration", func() {
222226
Context("when suspend_pruning_if_nats_unavailable enabled", func() {
223227

224228
BeforeEach(func() {
225-
natsPort2 = test_util.NextAvailPort()
229+
natsPort2 = test_util.ReservePort()
226230
natsRunner2 = test_util.NewNATSRunner(int(natsPort2))
227231

228232
pruneInterval = 200 * time.Millisecond

src/code.cloudfoundry.org/gorouter/integration/test_utils_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ func configDrainSetup(cfg *config.Config, pruneInterval, pruneThreshold time.Dur
4646
// as part of pausing
4747
cfg.PruneStaleDropletsInterval = pruneInterval
4848
cfg.DropletStaleThreshold = pruneThreshold
49-
cfg.StartResponseDelayInterval = 1 * time.Second
49+
cfg.StartResponseDelayInterval = 0
5050
cfg.EndpointTimeout = 5 * time.Second
5151
cfg.EndpointDialTimeout = 500 * time.Millisecond
5252
cfg.DrainTimeout = 200 * time.Millisecond
@@ -60,6 +60,7 @@ func writeConfig(cfg *config.Config, cfgFile string) {
6060
}
6161

6262
func startGorouterSession(cfgFile string) *Session {
63+
test_util.ReleaseAllPorts()
6364
gorouterCmd := exec.Command(gorouterPath, "-c", cfgFile)
6465
session, err := Start(gorouterCmd, GinkgoWriter, GinkgoWriter)
6566
Expect(err).ToNot(HaveOccurred())

src/code.cloudfoundry.org/gorouter/logger/logger.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package logger
22

33
import (
4+
"fmt"
45
"io"
56
"log/slog"
67
"os"
@@ -235,6 +236,8 @@ via os.Exit(1) after writing the log message.
235236
*/
236237
func Fatal(logger *slog.Logger, message string, slogAttrs ...any) {
237238
logger.Error(message, slogAttrs...)
239+
// Write to stderr so the message survives os.Exit (stderr is unbuffered).
240+
fmt.Fprintf(os.Stderr, "FATAL: %s %v\n", message, slogAttrs)
238241
os.Exit(1)
239242
}
240243

src/code.cloudfoundry.org/gorouter/mbus/subscriber_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@ var _ = Describe("Subscriber", func() {
3838
)
3939

4040
BeforeEach(func() {
41-
natsPort = test_util.NextAvailPort()
42-
41+
natsPort = test_util.ReservePort()
42+
test_util.ReleasePort(natsPort)
4343
natsRunner = test_util.NewNATSRunner(int(natsPort))
4444
natsRunner.Start()
4545
natsClient = natsRunner.MessageBus

src/code.cloudfoundry.org/gorouter/router/route_service_server_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"net/http"
88
"time"
99

10+
"code.cloudfoundry.org/gorouter/test_util"
1011
. "github.com/onsi/ginkgo/v2"
1112
. "github.com/onsi/gomega"
1213

@@ -31,6 +32,7 @@ var _ = Describe("RouteServicesServer", func() {
3132
var err error
3233
cfg, err = config.DefaultConfig()
3334
Expect(err).NotTo(HaveOccurred())
35+
cfg.RouteServicesServerPort = test_util.NextAvailPort()
3436

3537
req, err = http.NewRequest("GET", "/foo", nil)
3638
Expect(err).NotTo(HaveOccurred())

src/code.cloudfoundry.org/gorouter/router/router_drain_test.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,8 @@ var _ = Describe("Router", func() {
151151

152152
BeforeEach(func() {
153153
logger = test_util.NewTestLogger("test")
154-
natsPort = test_util.NextAvailPort()
154+
natsPort = test_util.ReservePort()
155+
test_util.ReleasePort(natsPort)
155156
natsRunner = test_util.NewNATSRunner(int(natsPort))
156157
natsRunner.Start()
157158

@@ -161,11 +162,12 @@ var _ = Describe("Router", func() {
161162
statusRoutesPort := test_util.NextAvailPort()
162163

163164
sslPort := test_util.NextAvailPort()
165+
routeServiceServerPort := test_util.NextAvailPort()
164166

165167
defaultCert := test_util.CreateCert("default")
166168
cert2 := test_util.CreateCert("default")
167169

168-
config = test_util.SpecConfig(statusPort, statusTlsPort, statusRoutesPort, proxyPort, natsPort)
170+
config = test_util.SpecConfig(statusPort, statusTlsPort, statusRoutesPort, proxyPort, routeServiceServerPort, natsPort)
169171
config.EnableSSL = true
170172
config.SSLPort = sslPort
171173
config.SSLCertificates = []tls.Certificate{defaultCert, cert2}
@@ -202,13 +204,13 @@ var _ = Describe("Router", func() {
202204
})
203205

204206
AfterEach(func() {
205-
if natsRunner != nil {
206-
natsRunner.Stop()
207-
}
208207
if subscriber != nil {
209208
subscriber.Signal(os.Interrupt)
210209
<-subscriber.Wait()
211210
}
211+
if natsRunner != nil {
212+
natsRunner.Stop()
213+
}
212214
})
213215

214216
Context("Drain", func() {

0 commit comments

Comments
 (0)