Skip to content

Commit fec9183

Browse files
committed
feat: add check to verify backend connection in readiness probe
1 parent 65d2c90 commit fec9183

5 files changed

Lines changed: 106 additions & 20 deletions

File tree

cmd/root.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -675,6 +675,8 @@ the maximum time has passed. Defaults to 0s.`)
675675
`Enables HTTP endpoints /startup, /liveness, and /readiness
676676
that report on the proxy's health. Endpoints are available on localhost
677677
only. Uses the port specified by the http-port flag.`)
678+
localFlags.BoolVar(&c.conf.WithBackendCheck, "with-backend-check", false,
679+
"Enables a backend connection check in the readiness probe.")
678680
localFlags.BoolVar(&c.conf.RunConnectionTest, "run-connection-test", false, `Runs a connection test
679681
against all specified instances. If an instance is unreachable, the Proxy exits with a failure
680682
status code.`)
@@ -1191,7 +1193,7 @@ func runSignalWrapper(cmd *Command) (err error) {
11911193
needsHTTPServer = true
11921194
cmd.logger.Infof("Starting health check server at %s",
11931195
net.JoinHostPort(cmd.conf.HTTPAddress, cmd.conf.HTTPPort))
1194-
hc := healthcheck.NewCheck(p, cmd.logger)
1196+
hc := healthcheck.NewCheck(p, cmd.logger, cmd.conf.WithBackendCheck)
11951197
mux.HandleFunc("/startup", hc.HandleStartup)
11961198
mux.HandleFunc("/readiness", hc.HandleReadiness)
11971199
mux.HandleFunc("/liveness", hc.HandleLiveness)

docs/cmd/alloydb-auth-proxy.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,7 @@ alloydb-auth-proxy instance_uri... [flags]
355355
-u, --unix-socket string (*) Enables Unix sockets for all listeners using the provided directory.
356356
--user-agent string Space separated list of additional user agents, e.g. custom-agent/0.0.1
357357
-v, --version Print the alloydb-auth-proxy version
358+
--with-backend-check Enables a backend connection check in the readiness probe.
358359
```
359360

360361
### SEE ALSO

internal/healthcheck/healthcheck.go

Lines changed: 44 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"fmt"
2222
"net/http"
2323
"sync"
24+
"time"
2425

2526
"github.com/GoogleCloudPlatform/alloydb-auth-proxy/alloydb"
2627
"github.com/GoogleCloudPlatform/alloydb-auth-proxy/internal/proxy"
@@ -29,23 +30,25 @@ import (
2930
// Check provides HTTP handlers for use as healthchecks typically in a
3031
// Kubernetes context.
3132
type Check struct {
32-
startedOnce *sync.Once
33-
started chan struct{}
34-
stoppedOnce *sync.Once
35-
stopped chan struct{}
36-
proxy *proxy.Client
37-
logger alloydb.Logger
33+
startedOnce *sync.Once
34+
started chan struct{}
35+
stoppedOnce *sync.Once
36+
stopped chan struct{}
37+
proxy *proxy.Client
38+
logger alloydb.Logger
39+
withBackendCheck bool
3840
}
3941

4042
// NewCheck is the initializer for Check.
41-
func NewCheck(p *proxy.Client, l alloydb.Logger) *Check {
43+
func NewCheck(p *proxy.Client, l alloydb.Logger, withBackendCheck bool) *Check {
4244
return &Check{
43-
startedOnce: &sync.Once{},
44-
started: make(chan struct{}),
45-
stoppedOnce: &sync.Once{},
46-
stopped: make(chan struct{}),
47-
proxy: p,
48-
logger: l,
45+
startedOnce: &sync.Once{},
46+
started: make(chan struct{}),
47+
stoppedOnce: &sync.Once{},
48+
stopped: make(chan struct{}),
49+
proxy: p,
50+
logger: l,
51+
withBackendCheck: withBackendCheck,
4952
}
5053
}
5154

@@ -80,7 +83,7 @@ var (
8083
// HandleReadiness ensures the Check has been notified of successful startup,
8184
// that the proxy has not reached maximum connections, and that the Proxy has
8285
// not started shutting down.
83-
func (c *Check) HandleReadiness(w http.ResponseWriter, _ *http.Request) {
86+
func (c *Check) HandleReadiness(w http.ResponseWriter, r *http.Request) {
8487
select {
8588
case <-c.started:
8689
// Proxy has started.
@@ -109,6 +112,33 @@ func (c *Check) HandleReadiness(w http.ResponseWriter, _ *http.Request) {
109112
return
110113
}
111114

115+
if c.withBackendCheck {
116+
var lastErr error
117+
for {
118+
if _, err := c.proxy.CheckConnections(r.Context()); err != nil {
119+
lastErr = err
120+
} else {
121+
// Connection was successful
122+
break
123+
}
124+
125+
select {
126+
case <-r.Context().Done():
127+
c.logger.Errorf("[Health Check] Readiness failed: %v (last error: %v)", r.Context().Err(), lastErr)
128+
w.WriteHeader(http.StatusServiceUnavailable)
129+
errMsg := fmt.Sprintf("%v (last error: %v)", r.Context().Err(), lastErr)
130+
w.Write([]byte(errMsg))
131+
return
132+
case <-c.stopped:
133+
c.logger.Errorf("[Health Check] Readiness failed: %v", errStopped)
134+
w.WriteHeader(http.StatusServiceUnavailable)
135+
w.Write([]byte(errStopped.Error()))
136+
return
137+
case <-time.After(time.Second):
138+
}
139+
}
140+
}
141+
112142
// No error cases apply, 200 status.
113143
w.WriteHeader(http.StatusOK)
114144
w.Write([]byte("ok"))

internal/healthcheck/healthcheck_test.go

Lines changed: 54 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ package healthcheck_test
1616

1717
import (
1818
"context"
19+
"errors"
1920
"fmt"
2021
"io"
2122
"net"
@@ -104,7 +105,7 @@ func TestHandleStartupWhenNotNotified(t *testing.T) {
104105
t.Logf("failed to close proxy client: %v", err)
105106
}
106107
}()
107-
check := healthcheck.NewCheck(p, logger)
108+
check := healthcheck.NewCheck(p, logger, false)
108109

109110
rec := httptest.NewRecorder()
110111
check.HandleStartup(rec, &http.Request{URL: &url.URL{}})
@@ -124,7 +125,7 @@ func TestHandleStartupWhenNotified(t *testing.T) {
124125
t.Logf("failed to close proxy client: %v", err)
125126
}
126127
}()
127-
check := healthcheck.NewCheck(p, logger)
128+
check := healthcheck.NewCheck(p, logger, false)
128129

129130
check.NotifyStarted()
130131

@@ -144,7 +145,7 @@ func TestHandleReadinessWhenNotNotified(t *testing.T) {
144145
t.Logf("failed to close proxy client: %v", err)
145146
}
146147
}()
147-
check := healthcheck.NewCheck(p, logger)
148+
check := healthcheck.NewCheck(p, logger, false)
148149

149150
rec := httptest.NewRecorder()
150151
check.HandleReadiness(rec, &http.Request{URL: &url.URL{}})
@@ -162,7 +163,7 @@ func TestHandleReadinessWhenStopped(t *testing.T) {
162163
t.Logf("failed to close proxy client: %v", err)
163164
}
164165
}()
165-
check := healthcheck.NewCheck(p, logger)
166+
check := healthcheck.NewCheck(p, logger, false)
166167

167168
check.NotifyStarted() // The Proxy has started.
168169
check.NotifyStopped() // And now the Proxy is shutting down.
@@ -184,7 +185,7 @@ func TestHandleReadinessForMaxConns(t *testing.T) {
184185
}
185186
}()
186187
started := make(chan struct{})
187-
check := healthcheck.NewCheck(p, logger)
188+
check := healthcheck.NewCheck(p, logger, false)
188189
go p.Serve(context.Background(), func() {
189190
check.NotifyStarted()
190191
close(started)
@@ -224,3 +225,51 @@ func TestHandleReadinessForMaxConns(t *testing.T) {
224225
t.Fatalf("want max connections error, got = %v", string(body))
225226
}
226227
}
228+
229+
type errorDialer struct {
230+
fakeDialer
231+
}
232+
233+
func (*errorDialer) Dial(_ context.Context, _ string, _ ...alloydbconn.DialOption) (net.Conn, error) {
234+
return nil, errors.New("dial error")
235+
}
236+
237+
func TestHandleReadinessWithBackendCheck(t *testing.T) {
238+
t.Run("when backend check passes", func(t *testing.T) {
239+
p := newTestProxy(t)
240+
defer p.Close()
241+
check := healthcheck.NewCheck(p, logger, true)
242+
check.NotifyStarted()
243+
244+
rec := httptest.NewRecorder()
245+
check.HandleReadiness(rec, &http.Request{URL: &url.URL{}})
246+
247+
resp := rec.Result()
248+
if got, want := resp.StatusCode, http.StatusOK; got != want {
249+
t.Fatalf("want = %v, got = %v", want, got)
250+
}
251+
})
252+
t.Run("when backend check fails", func(t *testing.T) {
253+
p := newProxyWithParams(t, 0, &errorDialer{}, []proxy.InstanceConnConfig{
254+
{Name: "projects/proj/locations/region/clusters/clust/instances/inst"},
255+
})
256+
defer p.Close()
257+
check := healthcheck.NewCheck(p, logger, true)
258+
check.NotifyStarted()
259+
260+
rec := httptest.NewRecorder()
261+
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
262+
defer cancel()
263+
req, _ := http.NewRequestWithContext(ctx, "GET", "/readiness", nil)
264+
check.HandleReadiness(rec, req)
265+
266+
resp := rec.Result()
267+
if got, want := resp.StatusCode, http.StatusServiceUnavailable; got != want {
268+
t.Fatalf("want = %v, got = %v", want, got)
269+
}
270+
body, _ := io.ReadAll(resp.Body)
271+
if !strings.Contains(string(body), "context deadline exceeded") || !strings.Contains(string(body), "dial error") {
272+
t.Fatalf("want context deadline exceeded and dial error, got = %v", string(body))
273+
}
274+
})
275+
}

internal/proxy/proxy.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,10 @@ type Config struct {
220220
// ExitZeroOnSigterm exits with 0 exit code when Sigterm received
221221
ExitZeroOnSigterm bool
222222

223+
// WithBackendCheck enables a backend connection check in the readiness
224+
// probe.
225+
WithBackendCheck bool
226+
223227
// DisableBuiltInTelemetry disables the internal metric export. By
224228
// default, the Dialer will report on its internal operations to the
225229
// alloydb.googleapis.com system metric prefix. These metrics help AlloyDB

0 commit comments

Comments
 (0)