Skip to content

Commit c82910b

Browse files
Reconfigure API timeouts (#617)
Co-authored-by: Jakub Novak <jakub@e2b.dev>
1 parent 27c4423 commit c82910b

File tree

4 files changed

+28
-15
lines changed

4 files changed

+28
-15
lines changed

packages/api/main.go

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,11 @@ const (
4545
maxMultipartMemory = 1 << 27 // 128 MiB
4646
maxUploadLimit = 1 << 28 // 256 MiB
4747

48-
maxReadHeaderTimeout = 60 * time.Second
49-
maxReadTimeout = 75 * time.Second
50-
maxWriteTimeout = 75 * time.Second
48+
maxReadTimeout = 75 * time.Second
49+
maxWriteTimeout = 75 * time.Second
50+
// This timeout should be > 600 (GCP LB upstream idle timeout) to prevent race condition
51+
// https://cloud.google.com/load-balancing/docs/https#timeouts_and_retries%23:~:text=The%20load%20balancer%27s%20backend%20keepalive,is%20greater%20than%20600%20seconds
52+
idleTimeout = 620 * time.Second
5153

5254
defaultPort = 80
5355
)
@@ -171,12 +173,13 @@ func NewGinServer(ctx context.Context, logger *zap.Logger, apiStore *handlers.AP
171173
r.MaxMultipartMemory = maxMultipartMemory
172174

173175
s := &http.Server{
174-
Handler: r,
175-
Addr: fmt.Sprintf("0.0.0.0:%d", port),
176-
ReadHeaderTimeout: maxReadHeaderTimeout,
177-
ReadTimeout: maxReadTimeout,
178-
WriteTimeout: maxWriteTimeout,
179-
BaseContext: func(net.Listener) context.Context { return ctx },
176+
Handler: r,
177+
Addr: fmt.Sprintf("0.0.0.0:%d", port),
178+
// Configure timeouts to be greater than the proxy timeouts.
179+
ReadTimeout: maxReadTimeout,
180+
WriteTimeout: maxWriteTimeout,
181+
IdleTimeout: idleTimeout,
182+
BaseContext: func(net.Listener) context.Context { return ctx },
180183
}
181184

182185
return s

packages/client-proxy/internal/proxy/proxy.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,10 @@ const (
2323
maxRetries = 3
2424

2525
minOrchestratorProxyConns = 3
26-
idleTimeout = 620 * time.Second
26+
// This timeout should be > 600 (GCP LB upstream idle timeout) to prevent race condition
27+
// Also it's a good practice to set it to a value higher than the idle timeout of the backend service
28+
// https://cloud.google.com/load-balancing/docs/https#timeouts_and_retries%23:~:text=The%20load%20balancer%27s%20backend%20keepalive,is%20greater%20than%20600%20seconds
29+
idleTimeout = 610 * time.Second
2730

2831
// We use a constant connection key, because we don't have to separate connection pools
2932
// as we need to do when connecting to sandboxes (from orchestrator proxy) to prevent reuse of pool connections

packages/orchestrator/internal/proxy/proxy.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,10 @@ import (
1919

2020
const (
2121
minSandboxConns = 1
22-
idleTimeout = 630 * time.Second
22+
// This timeout should be > 600 (GCP LB upstream idle timeout) to prevent race condition
23+
// Also it's a good practice to set it to higher values as you progress in the stack
24+
// https://cloud.google.com/load-balancing/docs/https#timeouts_and_retries%23:~:text=The%20load%20balancer%27s%20backend%20keepalive,is%20greater%20than%20600%20seconds
25+
idleTimeout = 620 * time.Second
2326
)
2427

2528
type SandboxProxy struct {

packages/shared/pkg/proxy/proxy.go

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
)
1313

1414
const maxClientConns = 8192 // Reasonably big number that is lower than the number of available ports.
15+
const idleTimeoutBufferUpstreamDownstream = 10
1516

1617
type Proxy struct {
1718
http.Server
@@ -33,10 +34,13 @@ func New(
3334

3435
return &Proxy{
3536
Server: http.Server{
36-
Addr: fmt.Sprintf(":%d", port),
37-
ReadTimeout: 0,
38-
WriteTimeout: 0,
39-
IdleTimeout: idleTimeout,
37+
Addr: fmt.Sprintf(":%d", port),
38+
ReadTimeout: 0,
39+
WriteTimeout: 0,
40+
// Downstream (client side) idle > upstream (server side) idle
41+
// otherwise a new connection between downstream idle and upstream idle
42+
// will try to reuse an upstream connection which is in `CLOSE_WAIT` state resulting in error
43+
IdleTimeout: idleTimeout + idleTimeoutBufferUpstreamDownstream,
4044
ReadHeaderTimeout: 0,
4145
Handler: handler(p, getDestination),
4246
},

0 commit comments

Comments
 (0)