Skip to content

Commit 1141ffd

Browse files
fix(billing): short-circuit retryWithBackoff on non-transient Stripe errors (#3697)
* fix(billing): short-circuit retryWithBackoff on non-transient Stripe errors retryWithBackoff retried every error type, wasting ~600ms of backoff on deterministic StripeInvalidRequestError before dead-lettering. Add an optional shouldRetry(err) predicate (default: always retry) and pass a non-transient classifier at the PI-backfill call site. The err.stack finding from the issue is already satisfied by #3690 (both logger.error calls include stack) — no change needed. Closes #3691 * test(billing): cover StripeInvalidRequestError class-name branch in retry short-circuit The shouldRetry predicate guards both Stripe error type spellings (StripeInvalidRequestError class name + invalid_request_error raw type, which vary by stripe-node version). Add a parallel short-circuit test for the class-name value and align the existing test's predicate with the production one. Closes a coverage gap flagged by the pre-push gate. * fix(billing): address reviewer nits — narrow comment + improve log wording - Narrow shouldRetry JSDoc comment to only mention invalid_request_error (not auth errors) — matches what the predicate actually checks (Copilot thread PRRT_kwDOBss37M6EM5HL) - Change failure log from "failed after retries" to "failed (retries exhausted or skipped)" to reflect short-circuit path (CodeRabbit thread PRRT_kwDOBss37M6EM5c1) - Update billing.refund-correlation tests to match new log wording
1 parent 3df0034 commit 1141ffd

4 files changed

Lines changed: 118 additions & 7 deletions

File tree

modules/billing/lib/billing.retry.js

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,27 +6,37 @@
66
* each non-final attempt i.
77
*
88
* Returns the result of the first successful call, or throws the last
9-
* error after all attempts are exhausted.
9+
* error after all attempts are exhausted. If `shouldRetry(err)` returns
10+
* false for a thrown error, the error is rethrown immediately without
11+
* further attempts or delay.
1012
*
1113
* @param {() => Promise<T>} fn - Async function to attempt.
1214
* @param {object} [opts]
1315
* @param {number} [opts.attempts=3] - Maximum number of attempts (including the first call).
1416
* @param {number} [opts.baseMs=200] - Base delay in ms for the first retry.
17+
* @param {(err: unknown) => boolean} [opts.shouldRetry] - Predicate; return false to stop retrying for a given error. Defaults to always retry.
1518
* @returns {Promise<T>}
1619
*/
17-
export async function retryWithBackoff(fn, { attempts = 3, baseMs = 200 } = {}) {
20+
export async function retryWithBackoff(fn, { attempts = 3, baseMs = 200, shouldRetry = () => true } = {}) {
1821
if (!Number.isInteger(attempts) || attempts < 1) {
1922
throw new TypeError(`retryWithBackoff: attempts must be a positive integer, received ${attempts}`);
2023
}
2124
if (!Number.isFinite(baseMs) || baseMs < 0) {
2225
throw new TypeError(`retryWithBackoff: baseMs must be a non-negative finite number, received ${baseMs}`);
2326
}
27+
if (typeof shouldRetry !== 'function') {
28+
throw new TypeError(`retryWithBackoff: shouldRetry must be a function, received ${typeof shouldRetry}`);
29+
}
2430
let lastErr;
2531
for (let i = 0; i < attempts; i++) {
2632
try {
2733
return await fn();
2834
} catch (err) {
2935
lastErr = err;
36+
// Deterministic errors never succeed on retry — rethrow now instead of burning the backoff budget.
37+
if (!shouldRetry(err)) {
38+
throw err;
39+
}
3040
if (i < attempts - 1) {
3141
await new Promise((resolve) => setTimeout(resolve, baseMs * 2 ** i));
3242
}

modules/billing/services/billing.webhook.service.js

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -320,14 +320,22 @@ const handleCheckoutPaymentCompleted = async (session) => {
320320
organizationId,
321321
packId,
322322
kind: 'extras',
323-
stripeSessionId, // real cs_* ID (replaces SENTINEL_PENDING)
323+
stripeSessionId, // real cs_* ID (replaces SENTINEL_PENDING)
324324
},
325325
}),
326-
{ attempts: 3, baseMs: 200 },
326+
{
327+
attempts: 3,
328+
baseMs: 200,
329+
// Skip retries on Stripe invalid-request errors (invalid_request_error /
330+
// StripeInvalidRequestError) — these are deterministic client errors that never
331+
// succeed on retry and only delay the dead-letter path.
332+
shouldRetry: (err) =>
333+
err?.type !== 'StripeInvalidRequestError' && err?.type !== 'invalid_request_error',
334+
},
327335
);
328336
} catch (err) {
329337
logger.error(
330-
'[billing.webhook] PI metadata backfill failed after retries — refund correlation at risk',
338+
'[billing.webhook] PI metadata backfill failed (retries exhausted or skipped) — refund correlation at risk',
331339
{ paymentIntentId, stripeSessionId, error: err?.message ?? String(err), stack: err?.stack },
332340
);
333341
try {

modules/billing/tests/billing.refund-correlation.unit.tests.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ describe('checkout.session.completed — metadata backfill retry:', () => {
158158

159159
// logger.error should be called for the final failure.
160160
expect(mockLogger.error).toHaveBeenCalledWith(
161-
expect.stringContaining('backfill failed after retries'),
161+
expect.stringContaining('backfill failed (retries exhausted or skipped)'),
162162
expect.objectContaining({ paymentIntentId }),
163163
);
164164

@@ -185,7 +185,7 @@ describe('checkout.session.completed — metadata backfill retry:', () => {
185185
// Both error paths should be logged.
186186
expect(mockLogger.error).toHaveBeenCalledTimes(2);
187187
expect(mockLogger.error).toHaveBeenCalledWith(
188-
expect.stringContaining('backfill failed after retries'),
188+
expect.stringContaining('backfill failed (retries exhausted or skipped)'),
189189
expect.objectContaining({ paymentIntentId }),
190190
);
191191
expect(mockLogger.error).toHaveBeenCalledWith(
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
/**
2+
* Module dependencies.
3+
*/
4+
import { jest, describe, test, afterEach, expect } from '@jest/globals';
5+
import { retryWithBackoff } from '../lib/billing.retry.js';
6+
7+
/**
8+
* Unit tests for retryWithBackoff:
9+
* - success / transient-retry / exhaustion paths
10+
* - shouldRetry short-circuit on non-transient (deterministic) errors
11+
* - argument validation
12+
* Retry-delay paths use fake timers so the suite never incurs real backoff waits.
13+
*/
14+
describe('retryWithBackoff', () => {
15+
afterEach(() => {
16+
jest.useRealTimers();
17+
});
18+
19+
test('returns the result on first success without retrying', async () => {
20+
const fn = jest.fn().mockResolvedValue('ok');
21+
await expect(retryWithBackoff(fn)).resolves.toBe('ok');
22+
expect(fn).toHaveBeenCalledTimes(1);
23+
});
24+
25+
test('retries a transient failure then resolves', async () => {
26+
jest.useFakeTimers();
27+
const fn = jest.fn().mockRejectedValueOnce(new Error('boom')).mockResolvedValueOnce('ok');
28+
const promise = retryWithBackoff(fn, { attempts: 3, baseMs: 200 });
29+
await jest.runAllTimersAsync();
30+
await expect(promise).resolves.toBe('ok');
31+
expect(fn).toHaveBeenCalledTimes(2);
32+
});
33+
34+
test('throws the last error after exhausting all attempts', async () => {
35+
jest.useFakeTimers();
36+
const fn = jest.fn().mockRejectedValue(new Error('always fails'));
37+
const promise = retryWithBackoff(fn, { attempts: 3, baseMs: 200 });
38+
const assertion = expect(promise).rejects.toThrow('always fails');
39+
await jest.runAllTimersAsync();
40+
await assertion;
41+
expect(fn).toHaveBeenCalledTimes(3);
42+
});
43+
44+
test('short-circuits (1 call, not 3) when shouldRetry returns false', async () => {
45+
const err = Object.assign(new Error('bad params'), { type: 'invalid_request_error' });
46+
const fn = jest.fn().mockRejectedValue(err);
47+
await expect(
48+
retryWithBackoff(fn, {
49+
attempts: 3,
50+
baseMs: 200,
51+
shouldRetry: (e) => e?.type !== 'StripeInvalidRequestError' && e?.type !== 'invalid_request_error',
52+
}),
53+
).rejects.toThrow('bad params');
54+
expect(fn).toHaveBeenCalledTimes(1);
55+
});
56+
57+
test('short-circuits on the StripeInvalidRequestError class-name type too', async () => {
58+
// stripe-node exposes the error class name on .type in some SDK versions and the
59+
// raw API type string in others; the call-site predicate guards both, so cover both.
60+
const err = Object.assign(new Error('bad params'), { type: 'StripeInvalidRequestError' });
61+
const fn = jest.fn().mockRejectedValue(err);
62+
await expect(
63+
retryWithBackoff(fn, {
64+
attempts: 3,
65+
baseMs: 200,
66+
shouldRetry: (e) => e?.type !== 'StripeInvalidRequestError' && e?.type !== 'invalid_request_error',
67+
}),
68+
).rejects.toThrow('bad params');
69+
expect(fn).toHaveBeenCalledTimes(1);
70+
});
71+
72+
test('retries to exhaustion when shouldRetry returns true', async () => {
73+
jest.useFakeTimers();
74+
const fn = jest.fn().mockRejectedValue(new Error('transient'));
75+
const promise = retryWithBackoff(fn, {
76+
attempts: 3,
77+
baseMs: 200,
78+
shouldRetry: () => true,
79+
});
80+
const assertion = expect(promise).rejects.toThrow('transient');
81+
await jest.runAllTimersAsync();
82+
await assertion;
83+
expect(fn).toHaveBeenCalledTimes(3);
84+
});
85+
86+
test('rejects invalid attempts / baseMs / shouldRetry with TypeError', async () => {
87+
const fn = jest.fn().mockResolvedValue('ok');
88+
await expect(retryWithBackoff(fn, { attempts: 0 })).rejects.toThrow(TypeError);
89+
await expect(retryWithBackoff(fn, { baseMs: -1 })).rejects.toThrow(TypeError);
90+
await expect(retryWithBackoff(fn, { shouldRetry: 'nope' })).rejects.toThrow(TypeError);
91+
expect(fn).not.toHaveBeenCalled();
92+
});
93+
});

0 commit comments

Comments
 (0)