Skip to content

Commit 17142ab

Browse files
fix(client): add grpc.ConnectParams with Backoff override to avoid conflicting retry/backoff strategies (#175)
1 parent 4d394c8 commit 17142ab

1 file changed

Lines changed: 18 additions & 0 deletions

File tree

client/internal/bootstrap/grpc.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"go.uber.org/zap"
2020
"golang.org/x/oauth2"
2121
"google.golang.org/grpc"
22+
"google.golang.org/grpc/backoff"
2223
"google.golang.org/grpc/codes"
2324
"google.golang.org/grpc/credentials"
2425
"google.golang.org/grpc/credentials/oauth"
@@ -46,6 +47,14 @@ func getServiceClient(token string, cfg *Config) (v1.SecureTLSBootstrapServiceCl
4647
return nil, nil, fmt.Errorf("failed to get TLS config: %w", err)
4748
}
4849

50+
// override max delay to 3s (default is 120s) - this ensures the gRPC subchannel
51+
// re-attempts a real TCP+TLS connection at least every 3s, which aligns with
52+
// the ~2s RPC-level retry cadence. Without this cap, the subchannel exponential
53+
// backoff grows to 120s, causing the retry interceptor to receive cached errors
54+
// from the last real attempt rather than triggering new connection attempts.
55+
grpcConnectionBackoffConfig := backoff.DefaultConfig
56+
grpcConnectionBackoffConfig.MaxDelay = 3 * time.Second
57+
4958
conn, err := grpc.NewClient(
5059
fmt.Sprintf("%s:443", cfg.APIServerFQDN),
5160
grpc.WithUserAgent(internalhttp.GetUserAgent()),
@@ -55,6 +64,15 @@ func getServiceClient(token string, cfg *Config) (v1.SecureTLSBootstrapServiceCl
5564
AccessToken: token,
5665
}),
5766
}),
67+
// transport/connection-level config
68+
grpc.WithConnectParams(grpc.ConnectParams{
69+
Backoff: grpcConnectionBackoffConfig,
70+
// MinConnectTimeout caps the per-attempt connection timeout (default: 20s).
71+
// 5s balances fast retry cycles (~8s/cycle) against headroom for first-connection
72+
// latency through new LB paths — healthy intra-Azure TCP+TLS 1.3 handshakes complete in <1s.
73+
MinConnectTimeout: 5 * time.Second,
74+
}),
75+
// RPC-level retry config
5876
grpc.WithUnaryInterceptor(retry.UnaryClientInterceptor(
5977
retry.WithOnRetryCallback(getGRPCOnRetryCallbackFunc()),
6078
retry.WithBackoff(retry.BackoffLinearWithJitter(2*time.Second, 0.25)),

0 commit comments

Comments
 (0)