diff --git a/cmd/root.go b/cmd/root.go index 05d3b31c..565ac040 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -675,6 +675,9 @@ the maximum time has passed. Defaults to 0s.`) `Enables HTTP endpoints /startup, /liveness, and /readiness that report on the proxy's health. Endpoints are available on localhost only. Uses the port specified by the http-port flag.`) + localFlags.BoolVar(&c.conf.WithBackendCheck, "with-backend-check", false, + `Enables a backend connection check in the startup probe. Use with +Cloud Run jobs only where Direct VPC egress setup may take some time.`) localFlags.BoolVar(&c.conf.RunConnectionTest, "run-connection-test", false, `Runs a connection test against all specified instances. If an instance is unreachable, the Proxy exits with a failure status code.`) @@ -1191,7 +1194,7 @@ func runSignalWrapper(cmd *Command) (err error) { needsHTTPServer = true cmd.logger.Infof("Starting health check server at %s", net.JoinHostPort(cmd.conf.HTTPAddress, cmd.conf.HTTPPort)) - hc := healthcheck.NewCheck(p, cmd.logger) + hc := healthcheck.NewCheck(p, cmd.logger, cmd.conf.WithBackendCheck) mux.HandleFunc("/startup", hc.HandleStartup) mux.HandleFunc("/readiness", hc.HandleReadiness) mux.HandleFunc("/liveness", hc.HandleLiveness) diff --git a/docs/cmd/alloydb-auth-proxy.md b/docs/cmd/alloydb-auth-proxy.md index 9febe386..cba65557 100644 --- a/docs/cmd/alloydb-auth-proxy.md +++ b/docs/cmd/alloydb-auth-proxy.md @@ -355,6 +355,8 @@ alloydb-auth-proxy instance_uri... [flags] -u, --unix-socket string (*) Enables Unix sockets for all listeners using the provided directory. --user-agent string Space separated list of additional user agents, e.g. custom-agent/0.0.1 -v, --version Print the alloydb-auth-proxy version + --with-backend-check Enables a backend connection check in the startup probe. Use with + Cloud Run jobs only where Direct VPC egress setup may take some time. ``` ### SEE ALSO diff --git a/internal/healthcheck/healthcheck.go b/internal/healthcheck/healthcheck.go index 9e9f1abd..48981b7e 100644 --- a/internal/healthcheck/healthcheck.go +++ b/internal/healthcheck/healthcheck.go @@ -29,23 +29,25 @@ import ( // Check provides HTTP handlers for use as healthchecks typically in a // Kubernetes context. type Check struct { - startedOnce *sync.Once - started chan struct{} - stoppedOnce *sync.Once - stopped chan struct{} - proxy *proxy.Client - logger alloydb.Logger + startedOnce *sync.Once + started chan struct{} + stoppedOnce *sync.Once + stopped chan struct{} + proxy *proxy.Client + logger alloydb.Logger + backendCheck bool } // NewCheck is the initializer for Check. -func NewCheck(p *proxy.Client, l alloydb.Logger) *Check { +func NewCheck(p *proxy.Client, l alloydb.Logger, backendCheck bool) *Check { return &Check{ - startedOnce: &sync.Once{}, - started: make(chan struct{}), - stoppedOnce: &sync.Once{}, - stopped: make(chan struct{}), - proxy: p, - logger: l, + startedOnce: &sync.Once{}, + started: make(chan struct{}), + stoppedOnce: &sync.Once{}, + stopped: make(chan struct{}), + proxy: p, + logger: l, + backendCheck: backendCheck, } } @@ -61,9 +63,18 @@ func (c *Check) NotifyStopped() { } // HandleStartup reports whether the Check has been notified of startup. -func (c *Check) HandleStartup(w http.ResponseWriter, _ *http.Request) { +func (c *Check) HandleStartup(w http.ResponseWriter, r *http.Request) { select { case <-c.started: + if c.backendCheck { + if _, err := c.proxy.CheckConnections(r.Context()); err != nil { + c.logger.Errorf("[Health Check] Startup failed: %v", err) + w.WriteHeader(http.StatusServiceUnavailable) + w.Write([]byte(err.Error())) + return + } + } + w.WriteHeader(http.StatusOK) w.Write([]byte("ok")) default: diff --git a/internal/healthcheck/healthcheck_test.go b/internal/healthcheck/healthcheck_test.go index 179268da..59eb63f7 100644 --- a/internal/healthcheck/healthcheck_test.go +++ b/internal/healthcheck/healthcheck_test.go @@ -16,6 +16,7 @@ package healthcheck_test import ( "context" + "errors" "fmt" "io" "net" @@ -104,10 +105,10 @@ func TestHandleStartupWhenNotNotified(t *testing.T) { t.Logf("failed to close proxy client: %v", err) } }() - check := healthcheck.NewCheck(p, logger) + check := healthcheck.NewCheck(p, logger, false) rec := httptest.NewRecorder() - check.HandleStartup(rec, &http.Request{URL: &url.URL{}}) + check.HandleStartup(rec, httptest.NewRequest("GET", "/startup", nil)) // Startup is not complete because the Check has not been notified of the // proxy's startup. @@ -124,12 +125,12 @@ func TestHandleStartupWhenNotified(t *testing.T) { t.Logf("failed to close proxy client: %v", err) } }() - check := healthcheck.NewCheck(p, logger) + check := healthcheck.NewCheck(p, logger, false) check.NotifyStarted() rec := httptest.NewRecorder() - check.HandleStartup(rec, &http.Request{URL: &url.URL{}}) + check.HandleStartup(rec, httptest.NewRequest("GET", "/startup", nil)) resp := rec.Result() if got, want := resp.StatusCode, http.StatusOK; got != want { @@ -144,7 +145,7 @@ func TestHandleReadinessWhenNotNotified(t *testing.T) { t.Logf("failed to close proxy client: %v", err) } }() - check := healthcheck.NewCheck(p, logger) + check := healthcheck.NewCheck(p, logger, false) rec := httptest.NewRecorder() check.HandleReadiness(rec, &http.Request{URL: &url.URL{}}) @@ -162,7 +163,7 @@ func TestHandleReadinessWhenStopped(t *testing.T) { t.Logf("failed to close proxy client: %v", err) } }() - check := healthcheck.NewCheck(p, logger) + check := healthcheck.NewCheck(p, logger, false) check.NotifyStarted() // The Proxy has started. check.NotifyStopped() // And now the Proxy is shutting down. @@ -184,7 +185,7 @@ func TestHandleReadinessForMaxConns(t *testing.T) { } }() started := make(chan struct{}) - check := healthcheck.NewCheck(p, logger) + check := healthcheck.NewCheck(p, logger, false) go p.Serve(context.Background(), func() { check.NotifyStarted() close(started) @@ -224,3 +225,51 @@ func TestHandleReadinessForMaxConns(t *testing.T) { t.Fatalf("want max connections error, got = %v", string(body)) } } + +type errorDialer struct { + fakeDialer +} + +func (*errorDialer) Dial(_ context.Context, _ string, _ ...alloydbconn.DialOption) (net.Conn, error) { + return nil, errors.New("dial error") +} + +func TestHandleStartupWithBackendCheck(t *testing.T) { + t.Run("when backend check passes", func(t *testing.T) { + p := newTestProxy(t) + defer p.Close() + check := healthcheck.NewCheck(p, logger, true) + check.NotifyStarted() + + rec := httptest.NewRecorder() + check.HandleStartup(rec, httptest.NewRequest("GET", "/startup", nil)) + + resp := rec.Result() + if got, want := resp.StatusCode, http.StatusOK; got != want { + t.Fatalf("want = %v, got = %v", want, got) + } + }) + t.Run("when backend check fails", func(t *testing.T) { + p := newProxyWithParams(t, 0, &errorDialer{}, []proxy.InstanceConnConfig{ + {Name: "projects/proj/locations/region/clusters/clust/instances/inst"}, + }) + defer p.Close() + check := healthcheck.NewCheck(p, logger, true) + check.NotifyStarted() + + rec := httptest.NewRecorder() + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + req, _ := http.NewRequestWithContext(ctx, "GET", "/startup", nil) + check.HandleStartup(rec, req) + + resp := rec.Result() + if got, want := resp.StatusCode, http.StatusServiceUnavailable; got != want { + t.Fatalf("want = %v, got = %v", want, got) + } + body, _ := io.ReadAll(resp.Body) + if !strings.Contains(string(body), "dial error") { + t.Fatalf("want dial error, got = %v", string(body)) + } + }) +} diff --git a/internal/proxy/proxy.go b/internal/proxy/proxy.go index c36d5fd1..c4887045 100644 --- a/internal/proxy/proxy.go +++ b/internal/proxy/proxy.go @@ -220,6 +220,10 @@ type Config struct { // ExitZeroOnSigterm exits with 0 exit code when Sigterm received ExitZeroOnSigterm bool + // WithBackendCheck enables a backend connection check in the readiness + // probe. + WithBackendCheck bool + // DisableBuiltInTelemetry disables the internal metric export. By // default, the Dialer will report on its internal operations to the // alloydb.googleapis.com system metric prefix. These metrics help AlloyDB