Skip to content

Commit e476d9b

Browse files
authored
Merge pull request #33 from alicefr/fix-31
cluster start: wait for HAProxy to be healthy before returning
2 parents 35e51bf + a57b161 commit e476d9b

4 files changed

Lines changed: 73 additions & 9 deletions

File tree

.github/workflows/integration-tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ jobs:
135135
key: podman-images-v2-integration-${{ steps.digests.outputs.hash }}
136136

137137
- name: Run integration tests
138-
run: sudo make test-integration TEST_PROCS=3
138+
run: sudo make test-integration TEST_PROCS=2
139139
timeout-minutes: 90
140140
env:
141141
CONTAINER_HOST: unix:///run/podman/podman.sock

internal/cli/cluster/start.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,9 @@ func runStart(ctx context.Context, logger *logrus.Logger, nodeName string, nodeI
183183
if err := haproxyMgr.EnsureHAProxy(ctx, 0); err != nil {
184184
return fmt.Errorf("creating HAProxy load balancer: %w", err)
185185
}
186+
if err := haproxyMgr.WaitForHealthy(ctx); err != nil {
187+
return fmt.Errorf("waiting for HAProxy: %w", err)
188+
}
186189
logger.Info("")
187190

188191
logger.Info("✅ Cluster created successfully!")

internal/cluster/join.go

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"strings"
1010
"time"
1111

12+
"github.com/bootc-dev/bink/internal/config"
1213
"github.com/bootc-dev/bink/internal/ssh"
1314
)
1415

@@ -101,17 +102,24 @@ func (c *Cluster) Join(ctx context.Context, opts JoinOptions) error {
101102
c.logger.Info("")
102103
c.logger.Infof("=== Labeling %s ===", nodeName)
103104

104-
containerName := fmt.Sprintf("k8s-%s-%s", c.name, controlPlane)
105-
kubeClient, err := c.newKubeClient(ctx, cpSSHClient, containerName)
105+
haproxyContainer := fmt.Sprintf("%s%s-%s", config.ContainerNamePrefix, c.name, config.HAProxyContainerName)
106+
kubeClient, err := c.newKubeClient(ctx, cpSSHClient, haproxyContainer)
106107
if err != nil {
107-
c.logger.Warnf("Failed to create kubernetes client (non-fatal): %v", err)
108-
} else {
109-
if err := kubeClient.LabelNode(ctx, nodeName, labels); err != nil {
110-
c.logger.Warnf("Failed to label node (non-fatal): %v", err)
111-
} else {
112-
c.logger.Infof("✅ Node %s labeled", nodeName)
108+
return fmt.Errorf("labeling node %s: creating kubernetes client: %w", nodeName, err)
109+
}
110+
var labelErr error
111+
for attempt := 1; attempt <= 5; attempt++ {
112+
labelErr = kubeClient.LabelNode(ctx, nodeName, labels)
113+
if labelErr == nil {
114+
break
113115
}
116+
c.logger.Warnf("Failed to label node (attempt %d/5): %v", attempt, labelErr)
117+
time.Sleep(5 * time.Second)
118+
}
119+
if labelErr != nil {
120+
return fmt.Errorf("labeling node %s: %w", nodeName, labelErr)
114121
}
122+
c.logger.Infof("✅ Node %s labeled", nodeName)
115123
}
116124

117125
c.logger.Info("")

internal/haproxy/haproxy.go

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,12 @@ package haproxy
55

66
import (
77
"context"
8+
"crypto/tls"
89
"fmt"
10+
"net/http"
911
"strings"
1012
"text/template"
13+
"time"
1114

1215
"github.com/bootc-dev/bink/internal/config"
1316
"github.com/bootc-dev/bink/internal/podman"
@@ -202,6 +205,56 @@ func (m *Manager) GetPublishedPort(ctx context.Context) (int, error) {
202205
return m.podman.GetPublishedPort(ctx, m.containerName(), fmt.Sprintf("%d/tcp", config.HAProxyPort))
203206
}
204207

208+
// WaitForHealthy polls the Kubernetes API /healthz endpoint through HAProxy
209+
// until it returns HTTP 200, indicating that HAProxy has healthy backends.
210+
func (m *Manager) WaitForHealthy(ctx context.Context) error {
211+
port, err := m.GetPublishedPort(ctx)
212+
if err != nil {
213+
return fmt.Errorf("getting HAProxy published port: %w", err)
214+
}
215+
216+
client := &http.Client{
217+
Transport: &http.Transport{
218+
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
219+
},
220+
}
221+
222+
healthURL := fmt.Sprintf("https://localhost:%d/healthz", port)
223+
timer := time.NewTimer(2 * time.Minute)
224+
defer timer.Stop()
225+
ticker := time.NewTicker(2 * time.Second)
226+
defer ticker.Stop()
227+
228+
logrus.Info("Waiting for HAProxy to become healthy...")
229+
230+
for {
231+
select {
232+
case <-ctx.Done():
233+
return ctx.Err()
234+
case <-timer.C:
235+
return fmt.Errorf("timed out waiting for HAProxy to become healthy on port %d", port)
236+
case <-ticker.C:
237+
reqCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
238+
req, err := http.NewRequestWithContext(reqCtx, http.MethodGet, healthURL, nil)
239+
if err != nil {
240+
cancel()
241+
continue
242+
}
243+
resp, err := client.Do(req)
244+
if err != nil {
245+
cancel()
246+
continue
247+
}
248+
resp.Body.Close()
249+
cancel()
250+
if resp.StatusCode == http.StatusOK {
251+
logrus.Info("HAProxy is healthy")
252+
return nil
253+
}
254+
}
255+
}
256+
}
257+
205258
// discoverBackends finds all control-plane node containers in this cluster
206259
// and returns their bridge IPs as backends.
207260
func (m *Manager) discoverBackends(ctx context.Context) ([]backend, error) {

0 commit comments

Comments
 (0)