diff --git a/src/lib/retry.ts b/src/lib/retry.ts index 3540ff73e4..21507b2627 100644 --- a/src/lib/retry.ts +++ b/src/lib/retry.ts @@ -1,3 +1,5 @@ +import { TimeoutError } from "./errors.js"; + const MAX_REQUEST_RETRY_JITTER = 250; const MAX_REQUEST_RETRY_DELAY = 10000; const DEFAULT_NUMBER_RETRIES = 3; @@ -27,6 +29,26 @@ async function pause(delay: number) { return new Promise((resolve) => setTimeout(resolve, delay)); } +// Transient network errors that are safe to retry — failures that can self-heal +// without any config change (socket reset, broken pipe, aborted connection). +// Deliberately excludes ENOTFOUND, ECONNREFUSED, cert errors — those won't self-heal. +const RETRYABLE_ERROR_CODES = new Set(["ECONNRESET", "EPIPE", "ECONNABORTED"]); + +function isRetryableNetworkError(e: unknown): boolean { + if (typeof e !== "object" || e === null) return false; + // Check both e.code (old request-lib / nock shape) and e.cause.code + // (native fetch / undici shape: TypeError: fetch failed { cause: { code } }) + const err = e as NodeJS.ErrnoException & { cause?: NodeJS.ErrnoException }; + const code = err.code ?? err.cause?.code ?? ""; + return RETRYABLE_ERROR_CODES.has(code); +} + +function calculateWait(nrOfTries: number): number { + let wait = BASE_DELAY * Math.pow(2, nrOfTries - 1); + wait = getRandomInt(wait + 1, wait + MAX_REQUEST_RETRY_JITTER); + return Math.min(wait, MAX_REQUEST_RETRY_DELAY); +} + /** * Configure the retry logic for http calls. * By default, this retries any request that returns a 429 3 times. @@ -39,11 +61,16 @@ export interface RetryConfiguration { enabled?: boolean; /** * Configure the max amount of retries the SDK should do. - * Defaults to 5. + * Defaults to 3. + * Note: this budget is shared between HTTP status code retries (e.g. 429) and + * transient network error retries (e.g. ECONNRESET). For example, with maxRetries: 3, + * 2 network retries and 1 status code retry would exhaust the full budget. */ maxRetries?: number; /** - * Status Codes on which the SDK should trigger retries. + * HTTP Status Codes on which the SDK should trigger retries. + * Note: transient network errors (ECONNRESET, EPIPE, ECONNABORTED) are always retried + * up to maxRetries regardless of this setting. Use `enabled: false` to disable all retries. * Defaults to [429]. */ retryWhen?: number[]; @@ -57,19 +84,26 @@ export function retry(action: () => Promise, { maxRetries, retryWhen } const nrOfTriesToAttempt = Math.min(MAX_NUMBER_RETRIES, maxRetries ?? DEFAULT_NUMBER_RETRIES); let nrOfTries = 0; - const retryAndWait = async () => { + const retryAndWait = async (): Promise => { let result: Response; - result = await action(); + try { + result = await action(); + } catch (e: unknown) { + if (e instanceof TimeoutError) { + throw e; + } + if (isRetryableNetworkError(e) && nrOfTries < nrOfTriesToAttempt) { + nrOfTries++; + await pause(calculateWait(nrOfTries)); + return retryAndWait(); + } + throw e; + } if ((retryWhen || [429]).includes(result.status) && nrOfTries < nrOfTriesToAttempt) { nrOfTries++; - - let wait = BASE_DELAY * Math.pow(2, nrOfTries - 1); - wait = getRandomInt(wait + 1, wait + MAX_REQUEST_RETRY_JITTER); - wait = Math.min(wait, MAX_REQUEST_RETRY_DELAY); - - await pause(wait); + await pause(calculateWait(nrOfTries)); result = await retryAndWait(); } diff --git a/tests/lib/runtime.test.ts b/tests/lib/runtime.test.ts index 29ad35aee7..f6a9685856 100644 --- a/tests/lib/runtime.test.ts +++ b/tests/lib/runtime.test.ts @@ -268,6 +268,201 @@ describe("Runtime", () => { ).rejects.toThrowError(expect.objectContaining({ statusCode: 429 })); }); + it("should retry on ECONNRESET when retry is enabled", async () => { + const request = nock(URL, { encodedQueryParams: true }) + .get("/clients") + .times(2) + .replyWithError({ code: "ECONNRESET", message: "socket hang up" }) + .get("/clients") + .reply(200, [{ client_id: "123" }]); + + const client = new TestClient({ + baseUrl: URL, + parseError, + }); + + const response = await client.testRequest({ + path: `/clients`, + method: "GET", + }); + + const data = (await response.json()) as Array<{ client_id: string }>; + expect(data[0].client_id).toBe("123"); + expect(request.isDone()).toBe(true); + }); + + it("should retry on ECONNRESET wrapped in TypeError (native fetch shape)", async () => { + // Native fetch (undici) does not put code on the top-level error. + // It throws: TypeError: fetch failed { cause: Error: read ECONNRESET { code: "ECONNRESET" } } + // This test ensures isRetryableNetworkError handles that shape. + let callCount = 0; + const mockFetch = async (): Promise => { + callCount++; + if (callCount <= 2) { + const cause = Object.assign(new Error("read ECONNRESET"), { code: "ECONNRESET" }); + const err = new TypeError("fetch failed"); + (err as any).cause = cause; + throw err; + } + return new Response(JSON.stringify([{ client_id: "123" }]), { status: 200 }); + }; + + const client = new TestClient({ + baseUrl: URL, + parseError, + fetch: mockFetch, + }); + + const response = await client.testRequest({ + path: `/clients`, + method: "GET", + }); + + const data = (await response.json()) as Array<{ client_id: string }>; + expect(data[0].client_id).toBe("123"); + expect(callCount).toBe(3); + }); + + it("should retry on EPIPE when retry is enabled", async () => { + const request = nock(URL, { encodedQueryParams: true }) + .get("/clients") + .replyWithError({ code: "EPIPE", message: "write EPIPE" }) + .get("/clients") + .reply(200, [{ client_id: "123" }]); + + const client = new TestClient({ + baseUrl: URL, + parseError, + }); + + const response = await client.testRequest({ + path: `/clients`, + method: "GET", + }); + + const data = (await response.json()) as Array<{ client_id: string }>; + expect(data[0].client_id).toBe("123"); + expect(request.isDone()).toBe(true); + }); + + it("should retry on ECONNABORTED when retry is enabled", async () => { + const request = nock(URL, { encodedQueryParams: true }) + .get("/clients") + .replyWithError({ code: "ECONNABORTED", message: "connection aborted" }) + .get("/clients") + .reply(200, [{ client_id: "123" }]); + + const client = new TestClient({ + baseUrl: URL, + parseError, + }); + + const response = await client.testRequest({ + path: `/clients`, + method: "GET", + }); + + const data = (await response.json()) as Array<{ client_id: string }>; + expect(data[0].client_id).toBe("123"); + expect(request.isDone()).toBe(true); + }); + + it("should throw after exhausting retries on repeated ECONNRESET", async () => { + nock(URL, { encodedQueryParams: true }) + .get("/clients") + .times(4) + .replyWithError({ code: "ECONNRESET", message: "socket hang up" }); + + const client = new TestClient({ + baseUrl: URL, + parseError, + }); + + await expect( + client.testRequest({ + path: `/clients`, + method: "GET", + }), + ).rejects.toThrowError( + expect.objectContaining({ cause: expect.objectContaining({ message: "socket hang up" }) }), + ); + }); + + it("should not retry ECONNRESET when retry is disabled", async () => { + nock(URL, { encodedQueryParams: true }) + .get("/clients") + .replyWithError({ code: "ECONNRESET", message: "socket hang up" }) + .get("/clients") + .reply(200, [{ client_id: "123" }]); + + const client = new TestClient({ + baseUrl: URL, + parseError, + retry: { enabled: false }, + }); + + await expect( + client.testRequest({ + path: `/clients`, + method: "GET", + }), + ).rejects.toThrowError( + expect.objectContaining({ cause: expect.objectContaining({ message: "socket hang up" }) }), + ); + }); + + it("should not retry non-retryable errors like ECONNREFUSED", async () => { + nock(URL, { encodedQueryParams: true }) + .get("/clients") + .replyWithError({ code: "ECONNREFUSED", message: "connect ECONNREFUSED" }) + .get("/clients") + .reply(200, [{ client_id: "123" }]); + + const client = new TestClient({ + baseUrl: URL, + parseError, + }); + + await expect( + client.testRequest({ + path: `/clients`, + method: "GET", + }), + ).rejects.toThrowError( + expect.objectContaining({ cause: expect.objectContaining({ message: "connect ECONNREFUSED" }) }), + ); + + // Second nock was never consumed — confirms no retry occurred + expect(nock.pendingMocks().length).toBe(1); + nock.cleanAll(); + }); + + it("should not retry on timeout errors", async () => { + nock(URL) + .get("/clients") + .delayConnection(100) + .reply(200, [{ client_id: "123" }]) + .get("/clients") + .reply(200, [{ client_id: "123" }]); + + const client = new TestClient({ + baseUrl: URL, + parseError, + timeoutDuration: 50, + }); + + await expect( + client.testRequest({ + path: `/clients`, + method: "GET", + }), + ).rejects.toThrowError(expect.objectContaining({ cause: expect.objectContaining({ name: "TimeoutError" }) })); + + // Second nock was never consumed — confirms no retry occurred + expect(nock.pendingMocks().length).toBe(1); + nock.abortPendingRequests(); + }); + it("should timeout after default time", async () => { nock(URL).get("/clients").delayConnection(10000).reply(200, []);