Skip to content

Commit 65f1ab9

Browse files
fix(ai-gateway): override status code on in-stream error events (#2782)
* fix(ai-gateway): override status code on in-stream error events When an OpenRouter SSE chunk contains an `error.code` during usage processing (e.g. a 200 response stream that ends up carrying a 502 provider_unavailable error), propagate that numeric code into the microdollar_usage_metadata.status_code column instead of recording the outer HTTP 200. `has_error` was already set in this path. * refactor: use status_code field directly on NotYetCostedUsageStats Per review, drop the separate status_code_override concept; instead let the parsers emit the effective status code on the stats as required `status_code`, and have logMicrodollarUsage apply it to usageContext so all paths (chat/messages/responses/fim/embedding) behave uniformly. --------- Co-authored-by: kiloconnect[bot] <240665456+kiloconnect[bot]@users.noreply.github.com>
1 parent c294da6 commit 65f1ab9

22 files changed

Lines changed: 77 additions & 13 deletions

apps/web/src/lib/ai-gateway/llm-proxy-helpers.test.ts

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,7 @@ describe('parseEmbeddingUsageFromResponse', () => {
288288
usage: { prompt_tokens: 100, total_tokens: 100, cost: 0.00005 },
289289
});
290290

291-
const result = parseEmbeddingUsageFromResponse(response);
291+
const result = parseEmbeddingUsageFromResponse(response, 200);
292292

293293
// toMicrodollars(0.00005) = Math.round(0.00005 * 1_000_000) = 50
294294
expect(result.cost_mUsd).toBe(50);
@@ -299,15 +299,15 @@ describe('parseEmbeddingUsageFromResponse', () => {
299299
usage: { prompt_tokens: 1000, total_tokens: 1000 },
300300
});
301301

302-
const result = parseEmbeddingUsageFromResponse(response);
302+
const result = parseEmbeddingUsageFromResponse(response, 200);
303303

304304
expect(result.cost_mUsd).toBe(0);
305305
});
306306

307307
it('should extract id as messageId', () => {
308308
const response = makeResponse({ id: 'embd-abc' });
309309

310-
const result = parseEmbeddingUsageFromResponse(response);
310+
const result = parseEmbeddingUsageFromResponse(response, 200);
311311

312312
expect(result.messageId).toBe('embd-abc');
313313
});
@@ -317,31 +317,31 @@ describe('parseEmbeddingUsageFromResponse', () => {
317317
const parsed = JSON.parse(response);
318318
delete parsed.id;
319319

320-
const result = parseEmbeddingUsageFromResponse(JSON.stringify(parsed));
320+
const result = parseEmbeddingUsageFromResponse(JSON.stringify(parsed), 200);
321321

322322
expect(result.messageId).toBeNull();
323323
});
324324

325325
it('should set hasError to true when model is empty', () => {
326326
const response = makeResponse({ model: '' });
327327

328-
const result = parseEmbeddingUsageFromResponse(response);
328+
const result = parseEmbeddingUsageFromResponse(response, 200);
329329

330330
expect(result.hasError).toBe(true);
331331
});
332332

333333
it('should set hasError to false when model is present', () => {
334334
const response = makeResponse({ model: 'text-embedding-3-small' });
335335

336-
const result = parseEmbeddingUsageFromResponse(response);
336+
const result = parseEmbeddingUsageFromResponse(response, 200);
337337

338338
expect(result.hasError).toBe(false);
339339
});
340340

341341
it('should always set outputTokens to 0 and streamed/cancelled to false', () => {
342342
const response = makeResponse();
343343

344-
const result = parseEmbeddingUsageFromResponse(response);
344+
const result = parseEmbeddingUsageFromResponse(response, 200);
345345

346346
expect(result.outputTokens).toBe(0);
347347
expect(result.streamed).toBe(false);
@@ -353,7 +353,7 @@ describe('parseEmbeddingUsageFromResponse', () => {
353353
usage: { prompt_tokens: 42, total_tokens: 42 },
354354
});
355355

356-
const result = parseEmbeddingUsageFromResponse(response);
356+
const result = parseEmbeddingUsageFromResponse(response, 200);
357357

358358
expect(result.inputTokens).toBe(42);
359359
});

apps/web/src/lib/ai-gateway/llm-proxy-helpers.ts

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -513,6 +513,7 @@ function parseMistralFimUsageFromString(
513513
generation_time: null,
514514
streamed: null,
515515
cancelled: null,
516+
status_code: statusCode,
516517
};
517518
}
518519

@@ -612,6 +613,7 @@ async function parseMistralFimUsageFromStream(
612613
generation_time: null,
613614
streamed: null,
614615
cancelled: null,
616+
status_code: statusCode,
615617
};
616618
}
617619

@@ -679,7 +681,10 @@ type EmbeddingResponse = {
679681
usage: EmbeddingUsage;
680682
};
681683

682-
export function parseEmbeddingUsageFromResponse(responseText: string): MicrodollarUsageStats {
684+
export function parseEmbeddingUsageFromResponse(
685+
responseText: string,
686+
statusCode: number
687+
): MicrodollarUsageStats {
683688
const json: EmbeddingResponse = JSON.parse(responseText);
684689

685690
// Upstream providers (OpenRouter, Vercel) include cost in USD → convert to microdollars.
@@ -689,7 +694,7 @@ export function parseEmbeddingUsageFromResponse(responseText: string): Microdoll
689694
messageId: json.id ?? null,
690695
model: json.model,
691696
responseContent: '',
692-
hasError: !json.model,
697+
hasError: !json.model || statusCode >= 400,
693698
inference_provider: null,
694699
inputTokens: json.usage.prompt_tokens,
695700
outputTokens: 0,
@@ -704,6 +709,7 @@ export function parseEmbeddingUsageFromResponse(responseText: string): Microdoll
704709
generation_time: null,
705710
streamed: false,
706711
cancelled: false,
712+
status_code: statusCode,
707713
};
708714
}
709715

@@ -730,11 +736,12 @@ export function countAndStoreEmbeddingUsage(
730736
) {
731737
debugSaveProxyResponseStream(clonedResponse, '.log.resp.json');
732738

739+
const statusCode = usageContext.status_code ?? 0;
733740
const usageStatsPromise = !clonedResponse.body
734741
? Promise.resolve(null)
735742
: clonedResponse
736743
.text()
737-
.then(text => parseEmbeddingUsageFromResponse(text))
744+
.then(text => parseEmbeddingUsageFromResponse(text, statusCode))
738745
.catch(() => null);
739746

740747
after(

apps/web/src/lib/ai-gateway/processUsage.messages.test.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ describe('processMessagesApiUsage', () => {
2626
generation_time: null,
2727
streamed: null,
2828
cancelled: null,
29+
status_code: 200,
2930
};
3031

3132
test('correctly processes OpenRouter usage for a non-byok case', () => {

apps/web/src/lib/ai-gateway/processUsage.messages.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,7 @@ export async function parseMessagesMicrodollarUsageFromStream(
180180
generation_time: null,
181181
streamed: true,
182182
cancelled: null,
183+
status_code: statusCode,
183184
} satisfies NotYetCostedUsageStats;
184185

185186
const costs = processMessagesApiUsage(usage, providerMetadata, coreProps);
@@ -216,6 +217,7 @@ export function parseMessagesMicrodollarUsageFromString(
216217
generation_time: null,
217218
streamed: false,
218219
cancelled: null,
220+
status_code: statusCode,
219221
} satisfies NotYetCostedUsageStats;
220222

221223
const costs = processMessagesApiUsage(usage, providerMetadata, coreProps);

apps/web/src/lib/ai-gateway/processUsage.responses.test.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ describe('processResponsesApiUsage', () => {
2626
generation_time: null,
2727
streamed: null,
2828
cancelled: null,
29+
status_code: 200,
2930
};
3031

3132
test('correctly processes OpenRouter usage for a non-byok case', () => {

apps/web/src/lib/ai-gateway/processUsage.responses.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,7 @@ export async function parseResponsesMicrodollarUsageFromStream(
214214
generation_time: null,
215215
streamed: true,
216216
cancelled: null,
217+
status_code: statusCode,
217218
} satisfies NotYetCostedUsageStats;
218219

219220
const costs = processResponsesApiUsage(usage, providerMetadata, coreProps);
@@ -244,6 +245,7 @@ export function parseResponsesMicrodollarUsageFromString(
244245
generation_time: null,
245246
streamed: false,
246247
cancelled: null,
248+
status_code: statusCode,
247249
} satisfies NotYetCostedUsageStats;
248250

249251
const costs = processResponsesApiUsage(usage, providerMetadata, coreProps);

apps/web/src/lib/ai-gateway/processUsage.test.ts

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ describe('processOpenRouterUsage', () => {
4343
generation_time: null,
4444
streamed: null,
4545
cancelled: null,
46+
status_code: 200,
4647
};
4748

4849
test('should correctly process usage for a non-byok case', () => {
@@ -181,6 +182,28 @@ describe('parseMicrodollarUsageFromStream approval tests', () => {
181182
expect(result.responseContent).toBe('Hello world');
182183
expect(result.hasError).toBe(true); // Should be marked as error due to abort
183184
});
185+
186+
test('captures numeric error.code from in-stream error event as status_code_override', async () => {
187+
const errorChunk = `data: {"id":"gen-1","object":"chat.completion.chunk","created":1,"model":"","provider":"Amazon Bedrock","choices":[],"error":{"code":502,"message":"Internal server error","metadata":{"error_type":"provider_unavailable"}}}\n\n`;
188+
189+
const stream = new ReadableStream<Uint8Array>({
190+
start(controller) {
191+
controller.enqueue(new TextEncoder().encode(errorChunk));
192+
controller.close();
193+
},
194+
});
195+
196+
const result = await parseMicrodollarUsageFromStream(
197+
stream,
198+
'fake-user-id',
199+
undefined,
200+
'openrouter',
201+
200
202+
);
203+
204+
expect(result.hasError).toBe(true);
205+
expect(result.status_code).toBe(502);
206+
});
184207
});
185208

186209
const sampleReqDir = join(process.cwd(), 'src/tests/req_sample');
@@ -328,6 +351,7 @@ describe('logMicrodollarUsage', () => {
328351
generation_time: null,
329352
streamed: null,
330353
cancelled: null,
354+
status_code: 200,
331355
};
332356
const createBaseUsageContext = (user: {
333357
id: string;
@@ -770,6 +794,7 @@ describe('toInsertableDbUsageRecord NUL-byte sanitization', () => {
770794
generation_time: null,
771795
streamed: null,
772796
cancelled: null,
797+
status_code: 200,
773798
};
774799

775800
// Node's Headers constructor rejects values containing NUL bytes (invalid

apps/web/src/lib/ai-gateway/processUsage.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,7 @@ export async function logMicrodollarUsage(
240240
usageStats: MicrodollarUsageStats,
241241
usageContext: MicrodollarUsageContext
242242
) {
243+
usageContext.status_code = usageStats.status_code;
243244
const contextInfo = extractUsageContextInfo(usageContext);
244245
const { core, metadata } = toInsertableDbUsageRecord(usageStats, contextInfo);
245246

@@ -696,6 +697,7 @@ export async function parseMicrodollarUsageFromStream(
696697
let model: string | null = null;
697698
let responseContent = ''; // for abuse investigation
698699
let reportedError = statusCode >= 400;
700+
let effectiveStatusCode = statusCode;
699701
const startedAt = performance.now();
700702
let firstTokenReceived = false;
701703
let usage: OpenRouterUsage | null = null;
@@ -729,6 +731,9 @@ export async function parseMicrodollarUsageFromStream(
729731
if ('error' in json) {
730732
const error = json.error as OpenRouterError;
731733
reportedError = true;
734+
if (typeof error.code === 'number') {
735+
effectiveStatusCode = error.code;
736+
}
732737
captureException(new Error(`OpenRouter error: ${error.message}`), {
733738
tags: { source: 'sse_processing' },
734739
extra: { json, event },
@@ -780,6 +785,7 @@ export async function parseMicrodollarUsageFromStream(
780785
generation_time: null,
781786
streamed: true,
782787
cancelled: null,
788+
status_code: effectiveStatusCode,
783789
};
784790

785791
const costs = processOpenRouterUsage(usage, coreProps);
@@ -822,6 +828,7 @@ export function parseMicrodollarUsageFromString(
822828
generation_time: null,
823829
streamed: false,
824830
cancelled: null,
831+
status_code: statusCode,
825832
};
826833

827834
const costs = processOpenRouterUsage(responseJson?.usage, coreProps);
@@ -892,6 +899,7 @@ async function processTokenData(
892899

893900
genStats.model = usageStats.model; // openrouter bug?
894901
genStats.hasError = usageStats.hasError; // retain by choice
902+
genStats.status_code = usageStats.status_code; // retain by choice
895903
genStats.streamed ??= usageContext.isStreaming;
896904
if (genStats.cost_mUsd !== usageStats.cost_mUsd) {
897905
console.warn(
@@ -1004,5 +1012,6 @@ export const mapToUsageStats = (
10041012
generation_time: data.generation_time ?? null,
10051013
streamed: data.streamed ?? null,
10061014
cancelled: data.cancelled ?? null,
1015+
status_code: 200,
10071016
};
10081017
};

apps/web/src/lib/ai-gateway/processUsage.types.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,10 @@ export type NotYetCostedUsageStats = {
6565
generation_time: number | null;
6666
streamed: boolean | null;
6767
cancelled: boolean | null;
68+
/** Effective HTTP status code for this usage record. Starts from the upstream
69+
* response status and is overwritten by a numeric `error.code` encountered
70+
* in-stream (e.g. a 200 response that ends up carrying a 502 error event). */
71+
status_code: number;
6872
};
6973

7074
export type JustTheCostsUsageStats = {
@@ -171,7 +175,7 @@ export type UsageMetaData = {
171175

172176
export type OpenRouterError = {
173177
message: string;
174-
code: string;
178+
code: number | string;
175179
metadata?: Record<string, unknown>;
176180
provider_name?: string;
177181
};

apps/web/src/lib/usageDeduction.test.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ function createMockUsageStats(cost_mUsd: number): MicrodollarUsageStats {
3131
generation_time: null,
3232
streamed: null,
3333
cancelled: null,
34+
status_code: 200,
3435
};
3536
}
3637

0 commit comments

Comments
 (0)