Skip to content

Commit a1f7ec7

Browse files
committed
Fix Retry-After unit conversion and add jitter to retry delays
The HTTP Retry-After header is expressed in seconds (RFC 9110) but was being passed directly to setTimeout, which takes milliseconds. A server sending Retry-After: 2 caused the CLI to wait 2 ms instead of 2 seconds, triggering near-instant retry storms against upstream throttles. This was especially painful against the theme-kit-access proxy and Core Admin API, where parallel bulk theme uploads would retry in lockstep with no effective backoff. Retry-After values are now parsed as seconds and converted to ms before being used as a delay. Parallel retries also now receive +/-20% uniform random jitter via a new applyRetryJitterMs helper, which breaks up the lockstep pattern that caused thundering-herd retries against the same upstream throttle window.
1 parent 243750a commit a1f7ec7

2 files changed

Lines changed: 201 additions & 6 deletions

File tree

packages/cli-kit/src/private/node/api.test.ts

Lines changed: 177 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import {retryAwareRequest, isNetworkError, isTransientNetworkError} from './api.js'
1+
import {applyRetryJitterMs, retryAwareRequest, isNetworkError, isTransientNetworkError} from './api.js'
22
import {recordRetry} from '../../public/node/analytics.js'
33
import {ClientError} from 'graphql-request'
44
import {describe, test, vi, expect, beforeEach, afterEach} from 'vitest'
@@ -7,13 +7,21 @@ vi.mock('../../public/node/analytics.js', () => ({
77
recordRetry: vi.fn(),
88
}))
99

10+
// Pins Math.random so jitter multiplies the base delay by exactly 1.0 (the midpoint of the ±20% band).
11+
// Individual tests that need a different multiplier spy on Math.random directly.
12+
const MIDPOINT_RANDOM_VALUE = 0.5
13+
1014
describe('retryAwareRequest', () => {
15+
let randomSpy: ReturnType<typeof vi.spyOn>
16+
1117
beforeEach(() => {
1218
vi.useFakeTimers()
19+
randomSpy = vi.spyOn(Math, 'random').mockReturnValue(MIDPOINT_RANDOM_VALUE)
1320
})
1421

1522
afterEach(() => {
1623
vi.useRealTimers()
24+
randomSpy.mockRestore()
1725
})
1826

1927
test('handles retries', async () => {
@@ -88,7 +96,9 @@ describe('retryAwareRequest', () => {
8896

8997
expect(mockRequestFn).toHaveBeenCalledTimes(4)
9098
expect(mockScheduleDelayFn).toHaveBeenCalledTimes(2)
91-
expect(mockScheduleDelayFn).toHaveBeenNthCalledWith(1, expect.anything(), 200)
99+
// Retry-After: 200 seconds -> 200_000 ms; jitter pinned to midpoint (1.0x).
100+
expect(mockScheduleDelayFn).toHaveBeenNthCalledWith(1, expect.anything(), 200_000)
101+
// defaultDelayMs: 500 ms; jitter pinned to midpoint (1.0x).
92102
expect(mockScheduleDelayFn).toHaveBeenNthCalledWith(2, expect.anything(), 500)
93103
})
94104

@@ -602,6 +612,171 @@ describe('retryAwareRequest', () => {
602612
expect(recordRetry).toHaveBeenCalledTimes(1)
603613
expect(recordRetry).toHaveBeenCalledWith('https://themes.example.com/auth', 'http-retry-1:can-retry:')
604614
})
615+
616+
test('parses Retry-After header as seconds and converts to milliseconds', async () => {
617+
const rateLimitedResponse = {
618+
status: 200,
619+
errors: [
620+
{
621+
extensions: {
622+
code: '429',
623+
},
624+
} as any,
625+
],
626+
headers: new Headers({'retry-after': '2'}),
627+
}
628+
629+
const successResponse = {
630+
status: 200,
631+
data: {ok: true},
632+
headers: new Headers(),
633+
}
634+
635+
const mockRequestFn = vi
636+
.fn()
637+
.mockImplementationOnce(() => {
638+
throw new ClientError(rateLimitedResponse, {query: ''})
639+
})
640+
.mockImplementation(() => Promise.resolve(successResponse))
641+
642+
const mockScheduleDelayFn = vi.fn((fn) => fn())
643+
644+
const result = retryAwareRequest(
645+
{
646+
request: mockRequestFn,
647+
url: 'https://example.com',
648+
useNetworkLevelRetry: true,
649+
maxRetryTimeMs: 10_000,
650+
},
651+
undefined,
652+
{scheduleDelay: mockScheduleDelayFn},
653+
)
654+
await vi.runAllTimersAsync()
655+
await expect(result).resolves.toEqual(successResponse)
656+
657+
// Retry-After: 2 -> 2000 ms (not 2 ms). Jitter pinned to midpoint (1.0x) via Math.random mock.
658+
expect(mockScheduleDelayFn).toHaveBeenCalledTimes(1)
659+
expect(mockScheduleDelayFn).toHaveBeenNthCalledWith(1, expect.anything(), 2000)
660+
})
661+
662+
test('uses DEFAULT_RETRY_DELAY_MS when Retry-After header is absent and no caller default', async () => {
663+
const rateLimitedResponse = {
664+
status: 200,
665+
errors: [{extensions: {code: '429'}} as any],
666+
headers: new Headers(),
667+
}
668+
669+
const successResponse = {
670+
status: 200,
671+
data: {ok: true},
672+
headers: new Headers(),
673+
}
674+
675+
const mockRequestFn = vi
676+
.fn()
677+
.mockImplementationOnce(() => {
678+
throw new ClientError(rateLimitedResponse, {query: ''})
679+
})
680+
.mockImplementation(() => Promise.resolve(successResponse))
681+
682+
const mockScheduleDelayFn = vi.fn((fn) => fn())
683+
684+
const result = retryAwareRequest(
685+
{
686+
request: mockRequestFn,
687+
url: 'https://example.com',
688+
useNetworkLevelRetry: true,
689+
maxRetryTimeMs: 10_000,
690+
},
691+
undefined,
692+
{scheduleDelay: mockScheduleDelayFn},
693+
)
694+
await vi.runAllTimersAsync()
695+
await expect(result).resolves.toEqual(successResponse)
696+
697+
// No Retry-After, no defaultDelayMs -> falls back to DEFAULT_RETRY_DELAY_MS (1000).
698+
// Jitter pinned to midpoint (1.0x) via Math.random mock.
699+
expect(mockScheduleDelayFn).toHaveBeenCalledTimes(1)
700+
expect(mockScheduleDelayFn).toHaveBeenNthCalledWith(1, expect.anything(), 1000)
701+
})
702+
703+
test('applies jitter multiplier from Math.random to retry delay', async () => {
704+
// Math.random() = 0.0 -> multiplier = 0.8 (lower bound).
705+
randomSpy.mockReturnValue(0)
706+
707+
const rateLimitedResponse = {
708+
status: 200,
709+
errors: [{extensions: {code: '429'}} as any],
710+
headers: new Headers(),
711+
}
712+
713+
const successResponse = {
714+
status: 200,
715+
data: {ok: true},
716+
headers: new Headers(),
717+
}
718+
719+
const mockRequestFn = vi
720+
.fn()
721+
.mockImplementationOnce(() => {
722+
throw new ClientError(rateLimitedResponse, {query: ''})
723+
})
724+
.mockImplementation(() => Promise.resolve(successResponse))
725+
726+
const mockScheduleDelayFn = vi.fn((fn) => fn())
727+
728+
const result = retryAwareRequest(
729+
{
730+
request: mockRequestFn,
731+
url: 'https://example.com',
732+
useNetworkLevelRetry: true,
733+
maxRetryTimeMs: 10_000,
734+
},
735+
undefined,
736+
{defaultDelayMs: 1000, scheduleDelay: mockScheduleDelayFn},
737+
)
738+
await vi.runAllTimersAsync()
739+
await expect(result).resolves.toEqual(successResponse)
740+
741+
// 1000 * 0.8 = 800.
742+
expect(mockScheduleDelayFn).toHaveBeenNthCalledWith(1, expect.anything(), 800)
743+
})
744+
})
745+
746+
describe('applyRetryJitterMs', () => {
747+
test('returns the lower bound (0.8x) when random() is 0', () => {
748+
expect(applyRetryJitterMs(1000, () => 0)).toBe(800)
749+
})
750+
751+
test('returns the upper bound (~1.2x) when random() approaches 1', () => {
752+
// Math.random() returns [0, 1). Using 0.9999... exercises the top of the range.
753+
const nearOne = 1 - Number.EPSILON
754+
expect(applyRetryJitterMs(1000, () => nearOne)).toBeCloseTo(1200, 5)
755+
})
756+
757+
test('returns the midpoint (1.0x) when random() is 0.5', () => {
758+
expect(applyRetryJitterMs(1000, () => 0.5)).toBe(1000)
759+
})
760+
761+
test('keeps output within [0.8x, 1.2x] across the random range', () => {
762+
const baseDelayMs = 2500
763+
const samples = [0, 0.1, 0.25, 0.5, 0.75, 0.9, 0.9999]
764+
for (const randomValue of samples) {
765+
const jittered = applyRetryJitterMs(baseDelayMs, () => randomValue)
766+
expect(jittered).toBeGreaterThanOrEqual(baseDelayMs * 0.8)
767+
expect(jittered).toBeLessThanOrEqual(baseDelayMs * 1.2)
768+
}
769+
})
770+
771+
test('defaults to Math.random when no generator is passed', () => {
772+
const spy = vi.spyOn(Math, 'random').mockReturnValue(0.5)
773+
try {
774+
expect(applyRetryJitterMs(1000)).toBe(1000)
775+
expect(spy).toHaveBeenCalled()
776+
} finally {
777+
spy.mockRestore()
778+
}
779+
})
605780
})
606781

607782
describe('isTransientNetworkError', () => {

packages/cli-kit/src/private/node/api.ts

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,20 @@ export const allAPIs: API[] = ['admin', 'storefront-renderer', 'partners', 'busi
1515

1616
const DEFAULT_RETRY_DELAY_MS = 1000
1717
const DEFAULT_RETRY_LIMIT = 10
18+
const MS_PER_SECOND = 1000
19+
const RETRY_JITTER_LOWER_BOUND = 0.8
20+
const RETRY_JITTER_UPPER_BOUND = 1.2
21+
22+
/**
23+
* Applies uniform random jitter in the range [RETRY_JITTER_LOWER_BOUND, RETRY_JITTER_UPPER_BOUND]
24+
* to a retry delay. Breaks up lockstep retries from parallel requests that would otherwise
25+
* produce a thundering herd against the same upstream throttle window.
26+
*/
27+
export function applyRetryJitterMs(delayMs: number, random: () => number = Math.random): number {
28+
const jitterRange = RETRY_JITTER_UPPER_BOUND - RETRY_JITTER_LOWER_BOUND
29+
const multiplier = RETRY_JITTER_LOWER_BOUND + random() * jitterRange
30+
return delayMs * multiplier
31+
}
1832

1933
export type NetworkRetryBehaviour =
2034
| {
@@ -197,7 +211,11 @@ async function makeVerboseRequest<T extends {headers: Headers; status: number}>(
197211
let delayMs: number | undefined
198212

199213
try {
200-
delayMs = responseHeaders['retry-after'] ? Number.parseInt(responseHeaders['retry-after'], 10) : undefined
214+
// The HTTP Retry-After header is expressed in seconds; setTimeout takes milliseconds.
215+
const retryAfterSeconds = responseHeaders['retry-after']
216+
? Number.parseInt(responseHeaders['retry-after'], 10)
217+
: undefined
218+
delayMs = retryAfterSeconds === undefined ? undefined : retryAfterSeconds * MS_PER_SECOND
201219
// eslint-disable-next-line no-catch-all/no-catch-all
202220
} catch {
203221
// ignore errors in extracting retry-after header
@@ -388,14 +406,16 @@ ${result.sanitizedHeaders}
388406
}
389407

390408
// prefer to wait based on a header if given; the caller's preference if not; and a default if neither.
391-
const retryDelayMs = result.delayMs ?? retryOptions.defaultDelayMs ?? DEFAULT_RETRY_DELAY_MS
392-
outputDebug(`Scheduling retry request #${retriesUsed} to ${result.sanitizedUrl} in ${retryDelayMs} ms`)
409+
const baseRetryDelayMs = result.delayMs ?? retryOptions.defaultDelayMs ?? DEFAULT_RETRY_DELAY_MS
410+
// Jitter prevents parallel retries from synchronising on the same upstream throttle window.
411+
const jitteredRetryDelayMs = applyRetryJitterMs(baseRetryDelayMs)
412+
outputDebug(`Scheduling retry request #${retriesUsed} to ${result.sanitizedUrl} in ${jitteredRetryDelayMs} ms`)
393413

394414
// eslint-disable-next-line no-await-in-loop
395415
result = await new Promise<VerboseResponse<T>>((resolve) => {
396416
retryOptions.scheduleDelay(() => {
397417
resolve(makeVerboseRequest(requestOptions))
398-
}, retryDelayMs)
418+
}, jitteredRetryDelayMs)
399419
})
400420
}
401421
}

0 commit comments

Comments
 (0)