Skip to content

Commit 5e26fb5

Browse files
fix: align gateway rate limiting and improve plugin reconnect backoff
- Worker now uses DO's Retry-After as cache TTL (instead of fixed 10s) so two-layer cooldowns stay in sync after deployments - Plugin uses max(backoff, retryAfter) to prevent server's short Retry-After from reducing accumulated exponential backoff - Add ±25% jitter to reconnect timing to prevent thundering herd
1 parent 4156140 commit 5e26fb5

2 files changed

Lines changed: 21 additions & 8 deletions

File tree

packages/api/src/index.ts

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -424,13 +424,21 @@ app.all("/api/gateway/:connId", async (c) => {
424424

425425
// Cache the rate limit after the DO responds (success or rate-limited).
426426
// 101 = WebSocket accepted; 429 = DO's own rate limit.
427-
// Either way, prevent further DO wake-ups for GATEWAY_COOLDOWN_S.
427+
// Either way, prevent further DO wake-ups.
428+
// When the DO returns 429, honour its Retry-After so the Worker cache
429+
// aligns with the DO's actual cooldown (avoids the client retrying after
430+
// the Worker cache expires but before the DO cooldown ends).
428431
if (doResp.status === 101 || doResp.status === 429) {
432+
let cacheTtl = GATEWAY_COOLDOWN_S;
433+
if (doResp.status === 429) {
434+
const doRetry = parseInt(doResp.headers.get("Retry-After") ?? "", 10);
435+
if (doRetry > 0) cacheTtl = doRetry;
436+
}
429437
c.executionCtx.waitUntil(
430438
cache.put(
431439
rateCacheReq,
432440
new Response(null, {
433-
headers: { "Cache-Control": `public, max-age=${GATEWAY_COOLDOWN_S}` },
441+
headers: { "Cache-Control": `public, max-age=${cacheTtl}` },
434442
}),
435443
),
436444
);

packages/plugin/src/ws-client.ts

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -117,12 +117,14 @@ export class BotsChatCloudClient {
117117
const status = res?.statusCode ?? 0;
118118
const retryAfter = parseInt(res?.headers?.["retry-after"] ?? "0", 10);
119119
if (status === 429 && retryAfter > 0) {
120-
this.log("warn", `Rate-limited (429), backing off ${retryAfter}s`);
121-
this.backoffMs = retryAfter * 1000;
120+
// Never let the server's Retry-After reduce our exponential backoff
121+
const serverMs = retryAfter * 1000;
122+
this.backoffMs = Math.max(this.backoffMs, serverMs);
123+
this.log("warn", `Rate-limited (429), backing off ${Math.round(this.backoffMs / 1000)}s`);
122124
} else if (status === 503) {
123125
const secs = retryAfter || 300;
124-
this.log("warn", `Service unavailable (503), backing off ${secs}s`);
125-
this.backoffMs = secs * 1000;
126+
this.backoffMs = Math.max(this.backoffMs, secs * 1000);
127+
this.log("warn", `Service unavailable (503), backing off ${Math.round(this.backoffMs / 1000)}s`);
126128
}
127129
// ws will emit 'close' after this, triggering scheduleReconnect
128130
});
@@ -209,12 +211,15 @@ export class BotsChatCloudClient {
209211

210212
private scheduleReconnect(): void {
211213
if (this.intentionalClose) return;
212-
this.log("info", `Reconnecting in ${this.backoffMs}ms`);
214+
// Add ±25% jitter to prevent reconnection storms across multiple clients
215+
const jitter = 0.75 + Math.random() * 0.5; // 0.75 – 1.25
216+
const delayMs = Math.round(this.backoffMs * jitter);
217+
this.log("info", `Reconnecting in ${delayMs}ms (backoff=${this.backoffMs}ms)`);
213218
this.reconnectTimer = setTimeout(() => {
214219
this.reconnectTimer = null;
215220
this.backoffMs = Math.min(this.backoffMs * 2, MAX_BACKOFF_MS);
216221
this.connect();
217-
}, this.backoffMs);
222+
}, delayMs);
218223
}
219224

220225
private startPing(): void {

0 commit comments

Comments
 (0)