Skip to content

Commit 74884be

Browse files
abueideclaude
andauthored
feat(core): honor Retry-After on any retryable code (HTTP 529 + beyond) (#1294)
Per the "Generic Retry-After Header Support" design (Option B), the Retry-After header — not the specific status code — is the authoritative signal to back off. Previously the SDK only parsed Retry-After on 429 and let every other retryable code (529, 503, 408, …) fall into exponential backoff, ignoring the server's requested wait. SegmentDestination now parses Retry-After on any error response and routes retryable responses that carry it through the server-directed wait path, while codes without the header keep exponential backoff and permanent codes still drop. RetryManager gains handleRetryAfter() as the generic entry point; handle429() delegates to it, preserving existing 429 behavior. Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
1 parent ccd53ca commit 74884be

5 files changed

Lines changed: 359 additions & 23 deletions

File tree

e2e-cli/e2e-config.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
"patch": null,
66
"env": {
77
"BROWSER_BATCHING": "true",
8-
"HTTP_CONFIG_SETTINGS": "true"
8+
"HTTP_CONFIG_SETTINGS": "true",
9+
"GENERIC_RETRY_AFTER": "true"
910
}
1011
}

packages/core/src/backoff/RetryManager.ts

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -164,10 +164,18 @@ export class RetryManager {
164164
}
165165

166166
/**
167-
* Handle a 429 rate limit response.
168-
* Uses server-specified wait time from Retry-After header.
167+
* Handle a response that carries a server-directed wait via the Retry-After
168+
* header. Originally added for 429 rate limiting, this is the authoritative
169+
* "the server told us exactly how long to wait" path and is reused for any
170+
* retryable status code (429, 529, 503, 408, …) that includes Retry-After.
171+
*
172+
* Uses the server-specified wait time (clamped to maxRetryInterval), pauses
173+
* all uploads, and enforces the same maxRetryCount / maxRateLimitDuration
174+
* safety valves as the 429 path.
169175
*/
170-
async handle429(retryAfterSeconds: number): Promise<RetryResult | undefined> {
176+
async handleRetryAfter(
177+
retryAfterSeconds: number
178+
): Promise<RetryResult | undefined> {
171179
if (this.rateLimitConfig?.enabled !== true) {
172180
return undefined;
173181
}
@@ -189,6 +197,14 @@ export class RetryManager {
189197
);
190198
}
191199

200+
/**
201+
* Handle a 429 rate limit response.
202+
* Delegates to the shared Retry-After (server-directed wait) path.
203+
*/
204+
async handle429(retryAfterSeconds: number): Promise<RetryResult | undefined> {
205+
return this.handleRetryAfter(retryAfterSeconds);
206+
}
207+
192208
/**
193209
* Handle a transient error (5xx, network failure).
194210
* Uses exponential backoff to calculate wait time.

packages/core/src/backoff/__tests__/RetryManager.test.ts

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,75 @@ describe('RetryManager', () => {
146146
});
147147
});
148148

149+
describe('handleRetryAfter (generic server-directed wait)', () => {
150+
it('waits the server-directed time for a non-429 code', async () => {
151+
const now = 1000000;
152+
jest.spyOn(Date, 'now').mockReturnValue(now);
153+
154+
const rm = new RetryManager(
155+
'test-key',
156+
mockPersistor,
157+
defaultRateLimitConfig,
158+
defaultBackoffConfig,
159+
mockLogger
160+
);
161+
162+
// e.g. a 529 with Retry-After: 30
163+
expect(await rm.handleRetryAfter(30)).toBe('rate_limited');
164+
165+
jest.spyOn(Date, 'now').mockReturnValue(now + 29000);
166+
expect(await rm.canRetry()).toBe(false);
167+
jest.spyOn(Date, 'now').mockReturnValue(now + 31000);
168+
expect(await rm.canRetry()).toBe(true);
169+
});
170+
171+
it('clamps the wait to maxRetryInterval', async () => {
172+
const rm = new RetryManager(
173+
'test-key',
174+
mockPersistor,
175+
defaultRateLimitConfig,
176+
defaultBackoffConfig,
177+
mockLogger
178+
);
179+
180+
await rm.handleRetryAfter(500); // exceeds maxRetryInterval of 300
181+
182+
expect(mockLogger.warn).toHaveBeenCalledWith(
183+
expect.stringContaining('exceeds maxRetryInterval')
184+
);
185+
});
186+
187+
it('returns undefined when rate limit config is disabled', async () => {
188+
const disabledConfig: RateLimitConfig = {
189+
...defaultRateLimitConfig,
190+
enabled: false,
191+
};
192+
const rm = new RetryManager(
193+
'test-key',
194+
mockPersistor,
195+
disabledConfig,
196+
defaultBackoffConfig,
197+
mockLogger
198+
);
199+
200+
expect(await rm.handleRetryAfter(30)).toBeUndefined();
201+
});
202+
203+
it('handle429 delegates to the same path', async () => {
204+
const rm = new RetryManager(
205+
'test-key',
206+
mockPersistor,
207+
defaultRateLimitConfig,
208+
defaultBackoffConfig,
209+
mockLogger
210+
);
211+
212+
const spy = jest.spyOn(rm, 'handleRetryAfter');
213+
await rm.handle429(15);
214+
expect(spy).toHaveBeenCalledWith(15);
215+
});
216+
});
217+
149218
describe('handle429', () => {
150219
it('increments retry count', async () => {
151220
const now = 1000000;

packages/core/src/plugins/SegmentDestination.ts

Lines changed: 48 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -34,14 +34,22 @@ export const SEGMENT_DESTINATION_KEY = 'Segment.io';
3434
type BatchResult = {
3535
batch: SegmentEvent[];
3636
messageIds: string[];
37-
status: 'success' | '429' | 'transient' | 'permanent' | 'network_error';
37+
// 'retry_after' = a retryable response that carried a Retry-After header
38+
// (429 or any other retryable code), so we wait the server-directed time
39+
// instead of computing exponential backoff.
40+
status:
41+
| 'success'
42+
| 'retry_after'
43+
| 'transient'
44+
| 'permanent'
45+
| 'network_error';
3846
statusCode?: number;
3947
retryAfterSeconds?: number;
4048
};
4149

4250
type ErrorAggregation = {
4351
successfulMessageIds: string[];
44-
rateLimitResults: BatchResult[];
52+
serverDirectedResults: BatchResult[];
4553
hasTransientError: boolean;
4654
permanentErrorMessageIds: string[];
4755
retryableMessageIds: string[];
@@ -92,21 +100,36 @@ export class SegmentDestination extends DestinationPlugin {
92100

93101
switch (classification.errorType) {
94102
case 'rate_limit':
103+
// 429: always a server-directed wait. Default to 60s when the header
104+
// is missing/invalid, preserving prior behavior.
95105
return {
96106
batch,
97107
messageIds,
98-
status: '429',
108+
status: 'retry_after',
99109
statusCode: res.status,
100110
retryAfterSeconds: retryAfterSeconds ?? 60,
101111
};
102112
case 'transient':
113+
// Any other retryable code (529, 503, 408, …): if the server sent a
114+
// valid Retry-After, honor it as a server-directed wait. Otherwise use
115+
// exponential backoff (unchanged behavior).
116+
if (retryAfterSeconds !== undefined) {
117+
return {
118+
batch,
119+
messageIds,
120+
status: 'retry_after',
121+
statusCode: res.status,
122+
retryAfterSeconds,
123+
};
124+
}
103125
return {
104126
batch,
105127
messageIds,
106128
status: 'transient',
107129
statusCode: res.status,
108130
};
109131
default:
132+
// Permanent: drop. Retry-After is ignored on non-retryable codes.
110133
return {
111134
batch,
112135
messageIds,
@@ -136,13 +159,16 @@ export class SegmentDestination extends DestinationPlugin {
136159
retryCount,
137160
});
138161

139-
const retryAfterSeconds =
140-
res.status === 429
141-
? parseRetryAfter(
142-
res.headers.get('Retry-After'),
143-
this.getRateLimitConfig()?.maxRetryInterval
144-
)
145-
: undefined;
162+
// Parse Retry-After on any error response (not just 429). The header —
163+
// regardless of which retryable status code carries it — is the
164+
// authoritative signal for how long to wait. classifyBatchResult decides
165+
// whether to honor it (retryable codes) or ignore it (permanent codes).
166+
const retryAfterSeconds = res.ok
167+
? undefined
168+
: parseRetryAfter(
169+
res.headers.get('Retry-After'),
170+
this.getRateLimitConfig()?.maxRetryInterval
171+
);
146172

147173
return this.classifyBatchResult(
148174
res,
@@ -173,7 +199,7 @@ export class SegmentDestination extends DestinationPlugin {
173199
private aggregateErrors(results: BatchResult[]): ErrorAggregation {
174200
const aggregation: ErrorAggregation = {
175201
successfulMessageIds: [],
176-
rateLimitResults: [],
202+
serverDirectedResults: [],
177203
hasTransientError: false,
178204
permanentErrorMessageIds: [],
179205
retryableMessageIds: [],
@@ -184,8 +210,8 @@ export class SegmentDestination extends DestinationPlugin {
184210
case 'success':
185211
aggregation.successfulMessageIds.push(...result.messageIds);
186212
break;
187-
case '429':
188-
aggregation.rateLimitResults.push(result);
213+
case 'retry_after':
214+
aggregation.serverDirectedResults.push(result);
189215
aggregation.retryableMessageIds.push(...result.messageIds);
190216
break;
191217
case 'transient':
@@ -256,12 +282,14 @@ export class SegmentDestination extends DestinationPlugin {
256282
return false;
257283
}
258284

259-
const has429 = aggregation.rateLimitResults.length > 0;
285+
const hasServerDirectedWait = aggregation.serverDirectedResults.length > 0;
260286
let result: RetryResult | undefined;
261287

262-
if (has429) {
263-
for (const r of aggregation.rateLimitResults) {
264-
result = await this.retryManager.handle429(r.retryAfterSeconds ?? 60);
288+
if (hasServerDirectedWait) {
289+
for (const r of aggregation.serverDirectedResults) {
290+
result = await this.retryManager.handleRetryAfter(
291+
r.retryAfterSeconds ?? 60
292+
);
265293
}
266294
} else if (aggregation.hasTransientError) {
267295
result = await this.retryManager.handleTransientError();
@@ -316,9 +344,10 @@ export class SegmentDestination extends DestinationPlugin {
316344
aggregation.successfulMessageIds.length -
317345
aggregation.permanentErrorMessageIds.length;
318346
if (failedCount > 0) {
319-
const has429 = aggregation.rateLimitResults.length > 0;
347+
const hasServerDirectedWait =
348+
aggregation.serverDirectedResults.length > 0;
320349
this.analytics?.logger.warn(
321-
`${failedCount} events will retry (429: ${has429}, transient: ${aggregation.hasTransientError})`
350+
`${failedCount} events will retry (retry-after: ${hasServerDirectedWait}, transient: ${aggregation.hasTransientError})`
322351
);
323352
}
324353
}

0 commit comments

Comments
 (0)