Skip to content

Commit 6595152

Browse files
authored
Improve HTTP response handling and retry behavior (#1342)
Add error handling and retries to more HTTP error code cases for both browser and Node
1 parent ef93c3b commit 6595152

36 files changed

Lines changed: 3735 additions & 251 deletions
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
---
2+
'@segment/analytics-next': minor
3+
'@segment/analytics-node': minor
4+
'@segment/analytics-core': patch
5+
---
6+
7+
Unify and harden HTTP response handling and retry behavior across browser and node SDKs.
8+
9+
- Browser (`@segment/analytics-next`)
10+
- Add config-driven response handling for Segment.io delivery (`httpConfig` with rate-limit/backoff controls).
11+
- Improve batching/dispatcher retry semantics for 429 and transient failures.
12+
- Use configured `protocol` for batching requests when `apiHost` has no scheme, while preserving compatibility for `apiHost` values that already include `http://` or `https://`.
13+
14+
- Node (`@segment/analytics-node`)
15+
- Align publisher retry/status behavior with updated response handling rules.
16+
- Add `maxTotalBackoffDuration` and `maxRateLimitDuration` settings to control retry ceilings.
17+
- Update default retry configuration to increase resilience under transient failures.
18+
19+
- Core (`@segment/analytics-core`)
20+
- Standardize backoff defaults used by retry queues.

.github/workflows/e2e-browser-tests.yml

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,9 @@ jobs:
4747
with:
4848
node-version: '20'
4949

50-
- name: Apply HTTP patch for testing
51-
working-directory: sdk
52-
run: |
53-
git apply ../sdk-e2e-tests/patches/analytics-browser-http.patch
54-
echo "HTTP patch applied successfully"
50+
# The batched-dispatcher double-scheme bug is fixed in the SDK source,
51+
# so the HTTP patch is no longer needed. run-tests.sh will gracefully
52+
# skip it via --check if the e2e-config.json still references it.
5553

5654
- name: Install SDK dependencies
5755
working-directory: sdk
Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
{
22
"sdk": "browser",
3-
"test_suites": "basic,settings",
3+
"test_suites": "basic,settings,retry,retry-settings",
44
"auto_settings": true,
55
"patch": "analytics-browser-http.patch",
66
"env": {
77
"BROWSER_BATCHING": "false",
8-
"SETTINGS_ERROR_FALLBACK": "false"
8+
"HTTP_CONFIG_SETTINGS": "true",
9+
"SETTINGS_ERROR_FALLBACK": "false",
10+
"E2E_TEST_SKIP": "settings-enabled-flag"
911
}
1012
}

packages/browser/e2e-cli/src/cli.ts

Lines changed: 145 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,98 @@ interface CLIInput {
5252
config?: CLIConfig
5353
}
5454

55+
// --- Fetch Monitor ---
56+
// The browser SDK's Segment.io plugin handles retries internally and swallows
57+
// all errors (never fires delivery_failure events). We monitor fetch calls to
58+
// detect when delivery activity has settled and to observe final HTTP statuses.
59+
60+
let lastApiResponseTime = 0
61+
let inflightApiRequests = 0
62+
let lastApiStatus = 0
63+
let firstApiErrorStatus = 0
64+
let apiHostPattern = ''
65+
66+
function installFetchMonitor(apiHost: string): void {
67+
apiHostPattern = apiHost.replace(/^https?:\/\//, '')
68+
const nativeFetch = globalThis.fetch
69+
70+
;(globalThis as any).fetch = async function monitoredFetch(
71+
input: RequestInfo | URL,
72+
init?: RequestInit
73+
): Promise<Response> {
74+
const url =
75+
typeof input === 'string'
76+
? input
77+
: input instanceof URL
78+
? input.href
79+
: (input as Request).url
80+
81+
// Only monitor API requests, not CDN settings/project requests
82+
const isApi =
83+
apiHostPattern &&
84+
url.includes(apiHostPattern) &&
85+
!url.includes('/settings') &&
86+
!url.includes('/projects')
87+
88+
if (!isApi) {
89+
return nativeFetch.call(globalThis, input, init)
90+
}
91+
92+
inflightApiRequests++
93+
try {
94+
const response = await nativeFetch.call(globalThis, input, init)
95+
lastApiStatus = response.status
96+
lastApiResponseTime = Date.now()
97+
if (response.status >= 400 && firstApiErrorStatus === 0) {
98+
firstApiErrorStatus = response.status
99+
}
100+
return response
101+
} catch (err) {
102+
lastApiResponseTime = Date.now()
103+
throw err
104+
} finally {
105+
inflightApiRequests--
106+
}
107+
}
108+
}
109+
110+
/**
111+
* Wait for all API delivery activity to settle.
112+
*
113+
* The browser SDK's scheduleFlush uses a small random delay (100-600ms)
114+
* between retry cycles, plus exponential backoff from pushWithBackoff.
115+
* We wait until no API activity for a settling period.
116+
*/
117+
async function waitForDelivery(maxWaitMs = 60000): Promise<void> {
118+
const start = Date.now()
119+
120+
// Wait for at least one API request
121+
while (lastApiResponseTime === 0 && Date.now() - start < maxWaitMs) {
122+
await sleep(100)
123+
}
124+
125+
// Wait until no in-flight requests and enough quiet time
126+
while (Date.now() - start < maxWaitMs) {
127+
if (inflightApiRequests > 0) {
128+
await sleep(100)
129+
continue
130+
}
131+
132+
const elapsed = Date.now() - lastApiResponseTime
133+
// After success: brief settle for any remaining event dispatches.
134+
// After error: longer settle to allow for retry scheduling + backoff.
135+
// The fetch-dispatcher's core backoff uses minTimeout=500ms with
136+
// exponential growth: attempt 5 reaches ~500*2^4*random ≈ 8-16s.
137+
// Use 20s settle to accommodate higher retry attempts.
138+
const settleMs = lastApiStatus < 400 ? 1500 : 20000
139+
140+
if (elapsed >= settleMs) {
141+
return
142+
}
143+
await sleep(200)
144+
}
145+
}
146+
55147
// --- Helpers ---
56148

57149
function parseArgs(): string | null {
@@ -63,7 +155,7 @@ function parseArgs(): string | null {
63155
return args[inputIndex + 1]
64156
}
65157

66-
function delay(ms: number): Promise<void> {
158+
function sleep(ms: number): Promise<void> {
67159
return new Promise((resolve) => setTimeout(resolve, ms))
68160
}
69161

@@ -80,6 +172,11 @@ async function main(): Promise<void> {
80172

81173
const input: CLIInput = JSON.parse(inputJson)
82174

175+
// Install fetch monitor BEFORE importing the SDK
176+
if (input.apiHost) {
177+
installFetchMonitor(input.apiHost)
178+
}
179+
83180
// Create jsdom environment with the browser SDK
84181
const html = `
85182
<!DOCTYPE html>
@@ -112,7 +209,6 @@ async function main(): Promise<void> {
112209
;(global as any).XMLHttpRequest = window.XMLHttpRequest
113210

114211
// Import the browser SDK after setting up globals
115-
// We need to dynamically import to ensure globals are set first
116212
const { AnalyticsBrowser } = await import('@segment/analytics-next')
117213

118214
// Check if batching mode is enabled via environment variable
@@ -126,27 +222,40 @@ async function main(): Promise<void> {
126222
segmentConfig.protocol = protocol
127223

128224
if (useBatching) {
129-
// Batching mode: pass full URL (with scheme) since we patched batched-dispatcher
130-
// to check for existing scheme
131225
segmentConfig.apiHost = input.apiHost
132226
} else {
133-
// Standard mode: fetch-dispatcher uses the URL directly
134227
const apiHostStripped = input.apiHost.replace(/^https?:\/\//, '')
135228
segmentConfig.apiHost = apiHostStripped + '/v1'
136229
}
137230
}
138231

232+
// Wire maxRetries and backoff timing through httpConfig — this controls
233+
// both the plugin's PriorityQueue (fetch-dispatcher path) and the
234+
// batched-dispatcher's internal retry loop.
235+
{
236+
const backoffConfig: Record<string, unknown> = {
237+
// Use a short base interval so batched-dispatcher backoff aligns with
238+
// fetch-dispatcher's core backoff (100ms base). The default 500ms base
239+
// produces gaps that exceed the CLI's settle-time detection.
240+
baseBackoffInterval: 0.1,
241+
}
242+
if (input.config?.maxRetries != null) {
243+
backoffConfig.maxRetryCount = input.config.maxRetries
244+
}
245+
segmentConfig.httpConfig = { backoffConfig }
246+
}
247+
139248
if (useBatching) {
140249
segmentConfig.deliveryStrategy = {
141250
strategy: 'batching',
142251
config: {
143-
size: input.config?.flushAt ?? 1, // flush immediately for testing
252+
size: input.config?.flushAt ?? 1,
144253
timeout: 1000,
145254
},
146255
}
147256
}
148257

149-
// Initialize analytics with the provided config
258+
// Initialize analytics
150259
const [analytics] = await AnalyticsBrowser.load(
151260
{
152261
writeKey: input.writeKey,
@@ -160,21 +269,45 @@ async function main(): Promise<void> {
160269
}
161270
)
162271

272+
// Listen for delivery errors (now emitted by the Segment.io plugin)
273+
const deliveryErrors: string[] = []
274+
analytics.on('error', (err) => {
275+
const reason = (err as any).reason
276+
const msg =
277+
reason instanceof Error
278+
? reason.message
279+
: String(reason ?? (err as any).code)
280+
deliveryErrors.push(msg)
281+
})
282+
163283
// Process event sequences
164284
for (const seq of input.sequences) {
165285
if (seq.delayMs > 0) {
166-
await delay(seq.delayMs)
286+
await sleep(seq.delayMs)
167287
}
168288

169289
for (const event of seq.events) {
170290
await sendEvent(analytics, event)
171291
}
172292
}
173293

174-
// Wait for events to be sent (browser SDK auto-flushes)
175-
await delay(3000)
176-
177-
output = { success: true, sentBatches: 1 }
294+
// Wait for all delivery activity to settle
295+
await waitForDelivery()
296+
297+
// Determine success/failure from delivery errors (emitted by the
298+
// Segment.io plugin) and observed fetch responses as fallback.
299+
if (deliveryErrors.length > 0) {
300+
output = { success: false, error: deliveryErrors[0], sentBatches: 0 }
301+
} else if (lastApiStatus >= 400) {
302+
// Fetch monitor fallback: last response was an error
303+
output = {
304+
success: false,
305+
error: `HTTP ${firstApiErrorStatus || lastApiStatus}`,
306+
sentBatches: 0,
307+
}
308+
} else {
309+
output = { success: true, sentBatches: 1 }
310+
}
178311

179312
// Cleanup
180313
dom.window.close()

packages/browser/jest.config.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@ module.exports = createJestTSConfig(__dirname, {
44
modulePathIgnorePatterns: ['<rootDir>/e2e-tests', '<rootDir>/qa'],
55
setupFilesAfterEnv: ['./jest.setup.js'],
66
testEnvironment: 'jsdom',
7+
moduleNameMapper: {
8+
'^@segment/analytics-page-tools$': '<rootDir>/../page-tools/src',
9+
},
710
coverageThreshold: {
811
global: {
912
branches: 0,

packages/browser/src/browser/__tests__/integration.test.ts

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1600,12 +1600,17 @@ describe('setting headers', () => {
16001600
await ajs.track('sup')
16011601

16021602
await sleep(10)
1603-
const [call] = fetchCalls.filter((el) =>
1604-
el.url.toString().includes('api.segment.io')
1603+
const call = fetchCalls.find(
1604+
(el) =>
1605+
el.url.toString().includes('api.segment.io') &&
1606+
(el.headers as Record<string, string>)?.['X-Test'] === 'foo'
1607+
)
1608+
expect(call).toBeDefined()
1609+
expect(call!.headers).toEqual(
1610+
expect.objectContaining({
1611+
'Content-Type': 'text/plain',
1612+
'X-Test': 'foo',
1613+
})
16051614
)
1606-
expect(call.headers).toEqual({
1607-
'Content-Type': 'text/plain',
1608-
'X-Test': 'foo',
1609-
})
16101615
})
16111616
})

packages/browser/src/browser/settings.ts

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import { UserOptions } from '../core/user'
1212
import { HighEntropyHint } from '../lib/client-hints/interfaces'
1313
import { IntegrationsOptions } from '@segment/analytics-core'
1414
import { SegmentioSettings } from '../plugins/segmentio'
15+
import { HttpConfig } from '../plugins/segmentio/shared-dispatcher'
1516

1617
interface VersionSettings {
1718
version?: string
@@ -74,6 +75,13 @@ export interface RemoteSegmentIOIntegrationSettings
7475
bundledConfigIds?: string[]
7576
unbundledConfigIds?: string[]
7677
maybeBundledConfigIds?: Record<string, string[]>
78+
79+
/**
80+
* HTTP retry and backoff configuration.
81+
* Controls rate-limit handling (429) and exponential backoff for transient errors.
82+
* Fetched from CDN settings; can be overridden via init options.
83+
*/
84+
httpConfig?: HttpConfig
7785
}
7886

7987
/**
@@ -188,7 +196,7 @@ export interface AnalyticsSettings {
188196
*/
189197
export type SegmentioIntegrationInitOptions = Pick<
190198
SegmentioSettings,
191-
'apiHost' | 'protocol' | 'deliveryStrategy'
199+
'apiHost' | 'protocol' | 'deliveryStrategy' | 'httpConfig'
192200
>
193201

194202
/**

packages/browser/src/lib/priority-queue/__tests__/backoff.test.ts

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,14 @@ import { backoff } from '../backoff'
22

33
describe('backoff', () => {
44
it('increases with the number of attempts', () => {
5-
expect(backoff({ attempt: 1 })).toBeGreaterThan(1000)
6-
expect(backoff({ attempt: 2 })).toBeGreaterThan(2000)
7-
expect(backoff({ attempt: 3 })).toBeGreaterThan(3000)
8-
expect(backoff({ attempt: 4 })).toBeGreaterThan(4000)
5+
expect(backoff({ attempt: 1 })).toBeGreaterThan(200)
6+
expect(backoff({ attempt: 2 })).toBeGreaterThan(400)
7+
expect(backoff({ attempt: 3 })).toBeGreaterThan(800)
8+
expect(backoff({ attempt: 4 })).toBeGreaterThan(1600)
99
})
1010

1111
it('accepts a max timeout', () => {
12-
expect(backoff({ attempt: 1, maxTimeout: 3000 })).toBeGreaterThan(1000)
12+
expect(backoff({ attempt: 1, maxTimeout: 3000 })).toBeGreaterThan(200)
1313
expect(backoff({ attempt: 3, maxTimeout: 3000 })).toBeLessThanOrEqual(3000)
1414
expect(backoff({ attempt: 4, maxTimeout: 3000 })).toBeLessThanOrEqual(3000)
1515
})

packages/browser/src/lib/priority-queue/__tests__/index.test.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ describe('backoffs', () => {
121121
expect(spy).toHaveBeenCalled()
122122

123123
const delay = spy.mock.calls[0][1]
124-
expect(delay).toBeGreaterThan(1000)
124+
expect(delay).toBeGreaterThan(200)
125125
})
126126

127127
it('increases the delay as work gets requeued', () => {
@@ -147,12 +147,12 @@ describe('backoffs', () => {
147147
queue.pop()
148148

149149
const firstDelay = spy.mock.calls[0][1]
150-
expect(firstDelay).toBeGreaterThan(1000)
150+
expect(firstDelay).toBeGreaterThan(200)
151151

152152
const secondDelay = spy.mock.calls[1][1]
153-
expect(secondDelay).toBeGreaterThan(2000)
153+
expect(secondDelay).toBeGreaterThan(400)
154154

155155
const thirdDelay = spy.mock.calls[2][1]
156-
expect(thirdDelay).toBeGreaterThan(3000)
156+
expect(thirdDelay).toBeGreaterThan(800)
157157
})
158158
})

0 commit comments

Comments
 (0)