From f28df179e563ba9deb967e505e3c044e15bb5a9e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 23 Jun 2026 18:20:49 +0000 Subject: [PATCH 1/2] Initial plan From 4da9af1306fa6ddbf8b6b866b757c53a72dc0f86 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 23 Jun 2026 18:35:16 +0000 Subject: [PATCH 2/2] refactor(api-proxy): split upstream handlers --- containers/api-proxy/Dockerfile | 1 + containers/api-proxy/upstream-log.js | 89 ++++++++ containers/api-proxy/upstream-log.test.js | 62 ++++++ containers/api-proxy/upstream-response.js | 227 +++----------------- containers/api-proxy/upstream-retry.js | 76 +++++++ containers/api-proxy/upstream-retry.test.js | 72 +++++++ containers/api-proxy/upstream-token.js | 35 +++ containers/api-proxy/upstream-token.test.js | 46 ++++ 8 files changed, 411 insertions(+), 197 deletions(-) create mode 100644 containers/api-proxy/upstream-log.js create mode 100644 containers/api-proxy/upstream-log.test.js create mode 100644 containers/api-proxy/upstream-retry.js create mode 100644 containers/api-proxy/upstream-retry.test.js create mode 100644 containers/api-proxy/upstream-token.js create mode 100644 containers/api-proxy/upstream-token.test.js diff --git a/containers/api-proxy/Dockerfile b/containers/api-proxy/Dockerfile index 8be0a2e4..c9a7fbfc 100644 --- a/containers/api-proxy/Dockerfile +++ b/containers/api-proxy/Dockerfile @@ -27,6 +27,7 @@ COPY server.js logging.js metrics.js rate-limiter.js \ ai-credits-pricing.js models-dev-catalog.js models.dev.catalog.json \ oidc-refresh-utils.js body-transform.js body-utils.js rate-limit.js websocket-proxy.js \ deprecated-header-tracker.js billing-headers.js upstream-response.js \ + upstream-log.js upstream-retry.js upstream-token.js \ anthropic-cache.js otel.js otel-exporters.js otel-serialization.js \ token-budget-log.js blocked-request-diagnostics.js \ provider-env-constants.js provider-names.js ./ diff --git a/containers/api-proxy/upstream-log.js b/containers/api-proxy/upstream-log.js new file mode 100644 index 00000000..904998b0 --- /dev/null +++ b/containers/api-proxy/upstream-log.js @@ -0,0 +1,89 @@ +'use strict'; + +const { COPILOT_PLACEHOLDER_TOKEN } = require('./providers/copilot-byok'); +const { stripBearerPrefix } = require('./providers/copilot-auth'); + +function buildCopilotAuthErrorMessage(statusCode, env = process.env) { + const baseMessage = `Upstream returned ${statusCode}`; + const byokBaseUrl = (env.COPILOT_PROVIDER_BASE_URL || '').trim(); + const byokKey = stripBearerPrefix(env.COPILOT_PROVIDER_API_KEY); + const hasByokBaseUrl = Boolean(byokBaseUrl); + + if (hasByokBaseUrl && byokKey === COPILOT_PLACEHOLDER_TOKEN) { + return `${baseMessage} — COPILOT_PROVIDER_API_KEY is the AWF placeholder sentinel. ` + + 'This indicates an internal credential-isolation misconfiguration (real BYOK key not forwarded to api-proxy).'; + } + + if (hasByokBaseUrl && !byokKey) { + return `${baseMessage} — BYOK provider request to COPILOT_PROVIDER_BASE_URL failed because COPILOT_PROVIDER_API_KEY is not set.`; + } + + if (hasByokBaseUrl) { + return `${baseMessage} — BYOK provider request to COPILOT_PROVIDER_BASE_URL failed. ` + + 'Verify COPILOT_PROVIDER_BASE_URL and COPILOT_PROVIDER_API_KEY.'; + } + + return `${baseMessage} — check that the API key is valid and correctly formatted`; +} + +function createLogRequestCompletion({ metrics, logRequest, sanitizeForLog, applyMaxRunsInvocation }) { + return function logRequestCompletion(statusCode, responseBytes, initiatorSent, billingInfo, { + startTime, provider, req, requestBytes, targetHost, requestId, + }) { + const duration = Date.now() - startTime; + const sc = metrics.statusClass(statusCode); + metrics.gaugeDec('active_requests', { provider }); + metrics.increment('requests_total', { provider, method: req.method, status_class: sc }); + metrics.increment('response_bytes_total', { provider }, responseBytes); + metrics.observe('request_duration_ms', duration, { provider }); + if (statusCode >= 200 && statusCode < 300) { + applyMaxRunsInvocation(); + } + const logFields = { + request_id: requestId, provider, method: req.method, + path: sanitizeForLog(req.url), status: statusCode, + duration_ms: duration, request_bytes: requestBytes, + response_bytes: responseBytes, upstream_host: targetHost, + }; + if (initiatorSent) logFields.x_initiator = initiatorSent; + if (billingInfo) logFields.billing = billingInfo; + logRequest('info', 'request_complete', logFields); + }; +} + +function createLogUpstreamAuthError({ + logRequest, + sanitizeForLog, + applyPermissionDenied, + parseModelNotSupportedFromBody, +}) { + return function logUpstreamAuthError(statusCode, { requestId, provider, targetHost, req, responseBody }) { + const authErrorMessage = provider === 'copilot' + ? buildCopilotAuthErrorMessage(statusCode) + : `Upstream returned ${statusCode} — check that the API key is valid and correctly formatted`; + + if (statusCode === 401 || statusCode === 403) { + applyPermissionDenied(); + logRequest('warn', 'upstream_auth_error', { + request_id: requestId, provider, status: statusCode, + upstream_host: targetHost, path: sanitizeForLog(req.url), + message: authErrorMessage, + }); + } else if (statusCode === 400) { + // Suppress generic auth-error message when the 400 is a model-not-supported + // error — that case is handled by the model_unavailable diagnostic. + if (responseBody && parseModelNotSupportedFromBody(responseBody)) return; + logRequest('warn', 'upstream_auth_error', { + request_id: requestId, provider, status: statusCode, + upstream_host: targetHost, path: sanitizeForLog(req.url), + message: authErrorMessage, + }); + } + }; +} + +module.exports = { + createLogRequestCompletion, + createLogUpstreamAuthError, + buildCopilotAuthErrorMessage, +}; diff --git a/containers/api-proxy/upstream-log.test.js b/containers/api-proxy/upstream-log.test.js new file mode 100644 index 00000000..7db115af --- /dev/null +++ b/containers/api-proxy/upstream-log.test.js @@ -0,0 +1,62 @@ +const { + createLogRequestCompletion, + createLogUpstreamAuthError, +} = require('./upstream-log'); + +describe('upstream-log', () => { + test('logRequestCompletion records metrics and invokes max-runs on success', () => { + const metrics = { + statusClass: jest.fn(() => '2xx'), + gaugeDec: jest.fn(), + increment: jest.fn(), + observe: jest.fn(), + }; + const logRequest = jest.fn(); + const applyMaxRunsInvocation = jest.fn(); + const logRequestCompletion = createLogRequestCompletion({ + metrics, + logRequest, + sanitizeForLog: (value) => value, + applyMaxRunsInvocation, + }); + + logRequestCompletion(200, 42, 'agent', { prompt_tokens: 10 }, { + startTime: Date.now() - 5, + provider: 'copilot', + req: { method: 'POST', url: '/v1/chat/completions' }, + requestBytes: 12, + targetHost: 'api.githubcopilot.com', + requestId: 'req-1', + }); + + expect(metrics.gaugeDec).toHaveBeenCalledWith('active_requests', { provider: 'copilot' }); + expect(applyMaxRunsInvocation).toHaveBeenCalledTimes(1); + expect(logRequest).toHaveBeenCalledWith('info', 'request_complete', expect.objectContaining({ + request_id: 'req-1', + status: 200, + x_initiator: 'agent', + })); + }); + + test('logUpstreamAuthError suppresses 400 model-not-supported auth log noise', () => { + const logRequest = jest.fn(); + const applyPermissionDenied = jest.fn(); + const logUpstreamAuthError = createLogUpstreamAuthError({ + logRequest, + sanitizeForLog: (value) => value, + applyPermissionDenied, + parseModelNotSupportedFromBody: () => true, + }); + + logUpstreamAuthError(400, { + requestId: 'req-1', + provider: 'copilot', + targetHost: 'api.githubcopilot.com', + req: { url: '/v1/chat/completions' }, + responseBody: Buffer.from('The requested model is not supported'), + }); + + expect(logRequest).not.toHaveBeenCalled(); + expect(applyPermissionDenied).not.toHaveBeenCalled(); + }); +}); diff --git a/containers/api-proxy/upstream-response.js b/containers/api-proxy/upstream-response.js index bd43b0b7..439e1a5b 100644 --- a/containers/api-proxy/upstream-response.js +++ b/containers/api-proxy/upstream-response.js @@ -1,8 +1,8 @@ 'use strict'; -const { computeTokenBudgetUsage } = require('./token-budget-log'); -const { COPILOT_PLACEHOLDER_TOKEN } = require('./providers/copilot-byok'); -const { stripBearerPrefix } = require('./providers/copilot-auth'); +const { createLogRequestCompletion, createLogUpstreamAuthError, buildCopilotAuthErrorMessage } = require('./upstream-log'); +const { handle400WithRetry } = require('./upstream-retry'); +const { setupTokenTracking } = require('./upstream-token'); /** Maximum number of times to retry a Copilot 400 "model not supported" response. */ const MAX_MODEL_NOT_SUPPORTED_RETRIES = 2; @@ -25,29 +25,6 @@ function parseModelNotSupportedFromBody(body) { return MODEL_NOT_SUPPORTED_PATTERN.test(body.toString('utf8')); } -function buildCopilotAuthErrorMessage(statusCode, env = process.env) { - const baseMessage = `Upstream returned ${statusCode}`; - const byokBaseUrl = (env.COPILOT_PROVIDER_BASE_URL || '').trim(); - const byokKey = stripBearerPrefix(env.COPILOT_PROVIDER_API_KEY); - const hasByokBaseUrl = Boolean(byokBaseUrl); - - if (hasByokBaseUrl && byokKey === COPILOT_PLACEHOLDER_TOKEN) { - return `${baseMessage} — COPILOT_PROVIDER_API_KEY is the AWF placeholder sentinel. ` + - 'This indicates an internal credential-isolation misconfiguration (real BYOK key not forwarded to api-proxy).'; - } - - if (hasByokBaseUrl && !byokKey) { - return `${baseMessage} — BYOK provider request to COPILOT_PROVIDER_BASE_URL failed because COPILOT_PROVIDER_API_KEY is not set.`; - } - - if (hasByokBaseUrl) { - return `${baseMessage} — BYOK provider request to COPILOT_PROVIDER_BASE_URL failed. ` + - 'Verify COPILOT_PROVIDER_BASE_URL and COPILOT_PROVIDER_API_KEY.'; - } - - return `${baseMessage} — check that the API key is valid and correctly formatted`; -} - function createUpstreamResponseHandlers({ metrics, logRequest, @@ -61,176 +38,19 @@ function createUpstreamResponseHandlers({ parseDeprecatedHeaderFromBody, learnAndStripDeprecatedHeaderValue, }) { - function logRequestCompletion(statusCode, responseBytes, initiatorSent, billingInfo, { - startTime, provider, req, requestBytes, targetHost, requestId, - }) { - const duration = Date.now() - startTime; - const sc = metrics.statusClass(statusCode); - metrics.gaugeDec('active_requests', { provider }); - metrics.increment('requests_total', { provider, method: req.method, status_class: sc }); - metrics.increment('response_bytes_total', { provider }, responseBytes); - metrics.observe('request_duration_ms', duration, { provider }); - if (statusCode >= 200 && statusCode < 300) { - applyMaxRunsInvocation(); - } - const logFields = { - request_id: requestId, provider, method: req.method, - path: sanitizeForLog(req.url), status: statusCode, - duration_ms: duration, request_bytes: requestBytes, - response_bytes: responseBytes, upstream_host: targetHost, - }; - if (initiatorSent) logFields.x_initiator = initiatorSent; - if (billingInfo) logFields.billing = billingInfo; - logRequest('info', 'request_complete', logFields); - } - - function logUpstreamAuthError(statusCode, { requestId, provider, targetHost, req, responseBody }) { - const authErrorMessage = provider === 'copilot' - ? buildCopilotAuthErrorMessage(statusCode) - : `Upstream returned ${statusCode} — check that the API key is valid and correctly formatted`; - - if (statusCode === 401 || statusCode === 403) { - applyPermissionDenied(); - logRequest('warn', 'upstream_auth_error', { - request_id: requestId, provider, status: statusCode, - upstream_host: targetHost, path: sanitizeForLog(req.url), - message: authErrorMessage, - }); - } else if (statusCode === 400) { - // Suppress generic auth-error message when the 400 is a model-not-supported - // error — that case is handled by the model_unavailable diagnostic. - if (responseBody && parseModelNotSupportedFromBody(responseBody)) return; - logRequest('warn', 'upstream_auth_error', { - request_id: requestId, provider, status: statusCode, - upstream_host: targetHost, path: sanitizeForLog(req.url), - message: authErrorMessage, - }); - } - } - - /** - * Handle a buffered 400 response body: attempt retry via the deprecated-header - * or model-not-supported paths, or log the model_unavailable diagnostic and - * forward the response to the client. - * - * Returns true when a retry was triggered (caller should return early), - * false when the response was forwarded to the client. - * - * @param {import('http').IncomingMessage} proxyRes - * @param {object} requestHeaders - * @param {Buffer} responseBody - * @param {{ provider: string, requestId: string, hasRetried: boolean, - * onRetry: Function, modelNotSupportedRetryCount: number, - * onModelNotSupportedRetry: Function|undefined, completionCtx: object, - * authErrCtx: object, initiatorSent: string|null, - * billingInfo: object|null, res: object, span: object }} opts - * @returns {boolean} - */ - function handle400WithRetry(proxyRes, requestHeaders, responseBody, { - provider, requestId, hasRetried, onRetry, - modelNotSupportedRetryCount, onModelNotSupportedRetry, - completionCtx, authErrCtx, initiatorSent, billingInfo, res, span, - }) { - // ── (a) Deprecated beta-header retry (first attempt for anthropic/copilot) ── - if (!hasRetried && (provider === 'anthropic' || provider === 'copilot')) { - const deprecated = parseDeprecatedHeaderFromBody(responseBody); - if (deprecated) { - const retryHeaders = { ...requestHeaders }; - const stripped = learnAndStripDeprecatedHeaderValue( - retryHeaders, deprecated.header, deprecated.value, requestId, provider, - ); - if (stripped) { - onRetry(retryHeaders); - return true; - } - } - } - - // ── (b) Transient model-not-supported retry (copilot only, up to MAX) ────── - if ( - provider === 'copilot' && - modelNotSupportedRetryCount < MAX_MODEL_NOT_SUPPORTED_RETRIES && - onModelNotSupportedRetry && - parseModelNotSupportedFromBody(responseBody) - ) { - logRequest('warn', 'model_not_supported_retry', { - request_id: requestId, - provider, - retry_attempt: modelNotSupportedRetryCount + 1, - max_retries: MAX_MODEL_NOT_SUPPORTED_RETRIES, - message: `Copilot returned 400 model not supported (transient); retrying (attempt ${modelNotSupportedRetryCount + 1}/${MAX_MODEL_NOT_SUPPORTED_RETRIES})`, - }); - onModelNotSupportedRetry(); - return true; - } - - // ── (c) Model-unavailable diagnostic (non-retryable model-not-supported 400) ─── - if (proxyRes.statusCode === 400 && parseModelNotSupportedFromBody(responseBody)) { - const { req } = authErrCtx; - logRequest('error', 'model_unavailable', { - request_id: requestId, - provider, - status: proxyRes.statusCode, - path: sanitizeForLog(req.url), - retries_attempted: modelNotSupportedRetryCount, - message: `Model is unavailable or retired — the requested model is not supported by ${provider}. ` + - 'Check that the model name is correct and not deprecated. ' + - 'If using model aliases, verify the alias resolves to an available model.', - }); - } - - logRequestCompletion(proxyRes.statusCode, responseBody.length, initiatorSent, billingInfo, completionCtx); - logUpstreamAuthError(proxyRes.statusCode, { ...authErrCtx, responseBody }); - - const resHeaders = { - ...proxyRes.headers, - 'x-request-id': requestId, - 'content-length': String(responseBody.length), - }; - delete resHeaders['transfer-encoding']; - res.writeHead(proxyRes.statusCode, resHeaders); - res.end(responseBody); - otel.endSpan(span, proxyRes.statusCode); - return false; - } - - /** - * Wire up token-usage tracking for a streaming or non-streaming normal - * (non-400) response. Parses the request body for a model fallback and - * attaches OTEL span callbacks. - * - * @param {import('http').IncomingMessage} proxyRes - * @param {Buffer} body - Original request body (used to extract the model name) - * @param {{ requestId: string, provider: string, req: object, startTime: number, - * billingInfo: object|null, initiatorSent: string|null, - * span: object, isStreaming: boolean }} opts - */ - function setupTokenTracking(proxyRes, body, { - requestId, provider, req, startTime, billingInfo, - initiatorSent, span, isStreaming, - }) { - // Extract model from request body as fallback for token tracking when the - // upstream response omits the model field (e.g., Copilot SDK streaming). - let requestModel = null; - if (body && body.length > 0) { - try { - const parsed = JSON.parse(body.toString('utf8')); - if (parsed && typeof parsed.model === 'string') requestModel = parsed.model; - } catch { /* non-JSON body */ } - } - trackTokenUsage(proxyRes, { - requestId, provider, path: sanitizeForLog(req.url), startTime, metrics, billingInfo, initiatorSent, requestModel, - onUsage: (normalizedUsage, model) => { - otel.setTokenAttributes(span, { provider, model, normalizedUsage, streaming: isStreaming }); - const budgetResult = computeTokenBudgetUsage({ logRequest, requestId, provider }, normalizedUsage, model); - otel.setBudgetAttributes(span, budgetResult); - return budgetResult; - }, - onSpanEnd: (statusCode) => { - otel.endSpan(span, statusCode); - }, - }); - } + const logRequestCompletion = createLogRequestCompletion({ + metrics, + logRequest, + sanitizeForLog, + applyMaxRunsInvocation, + }); + + const logUpstreamAuthError = createLogUpstreamAuthError({ + logRequest, + sanitizeForLog, + applyPermissionDenied, + parseModelNotSupportedFromBody, + }); function handleUpstreamResponse(proxyRes, requestHeaders, { body, res, provider, requestId, req, targetHost, startTime, span, requestBytes, @@ -275,8 +95,16 @@ function createUpstreamResponseHandlers({ const responseBody = Buffer.concat(bufferedChunks); const didRetry = handle400WithRetry(proxyRes, requestHeaders, responseBody, { provider, requestId, hasRetried, onRetry, - modelNotSupportedRetryCount, onModelNotSupportedRetry, + modelNotSupportedRetryCount, maxModelNotSupportedRetries: MAX_MODEL_NOT_SUPPORTED_RETRIES, onModelNotSupportedRetry, completionCtx, authErrCtx, initiatorSent, billingInfo, res, span, + parseDeprecatedHeaderFromBody, + learnAndStripDeprecatedHeaderValue, + parseModelNotSupportedFromBody, + logRequest, + sanitizeForLog, + logRequestCompletion, + logUpstreamAuthError, + otel, }); if (didRetry) return; }); @@ -297,6 +125,11 @@ function createUpstreamResponseHandlers({ setupTokenTracking(proxyRes, body, { requestId, provider, req, startTime, billingInfo, initiatorSent, span, isStreaming, + trackTokenUsage, + sanitizeForLog, + metrics, + otel, + logRequest, }); } diff --git a/containers/api-proxy/upstream-retry.js b/containers/api-proxy/upstream-retry.js new file mode 100644 index 00000000..cf579bda --- /dev/null +++ b/containers/api-proxy/upstream-retry.js @@ -0,0 +1,76 @@ +'use strict'; + +function handle400WithRetry(proxyRes, requestHeaders, responseBody, { + provider, requestId, hasRetried, onRetry, + modelNotSupportedRetryCount, maxModelNotSupportedRetries, onModelNotSupportedRetry, + completionCtx, authErrCtx, initiatorSent, billingInfo, res, span, + parseDeprecatedHeaderFromBody, learnAndStripDeprecatedHeaderValue, + parseModelNotSupportedFromBody, logRequest, sanitizeForLog, + logRequestCompletion, logUpstreamAuthError, otel, +}) { + // ── (a) Deprecated beta-header retry (first attempt for anthropic/copilot) ── + if (!hasRetried && (provider === 'anthropic' || provider === 'copilot')) { + const deprecated = parseDeprecatedHeaderFromBody(responseBody); + if (deprecated) { + const retryHeaders = { ...requestHeaders }; + const stripped = learnAndStripDeprecatedHeaderValue( + retryHeaders, deprecated.header, deprecated.value, requestId, provider, + ); + if (stripped) { + onRetry(retryHeaders); + return true; + } + } + } + + // ── (b) Transient model-not-supported retry (copilot only, up to MAX) ────── + if ( + provider === 'copilot' && + modelNotSupportedRetryCount < maxModelNotSupportedRetries && + onModelNotSupportedRetry && + parseModelNotSupportedFromBody(responseBody) + ) { + logRequest('warn', 'model_not_supported_retry', { + request_id: requestId, + provider, + retry_attempt: modelNotSupportedRetryCount + 1, + max_retries: maxModelNotSupportedRetries, + message: `Copilot returned 400 model not supported (transient); retrying (attempt ${modelNotSupportedRetryCount + 1}/${maxModelNotSupportedRetries})`, + }); + onModelNotSupportedRetry(); + return true; + } + + // ── (c) Model-unavailable diagnostic (non-retryable model-not-supported 400) ─── + if (proxyRes.statusCode === 400 && parseModelNotSupportedFromBody(responseBody)) { + const { req } = authErrCtx; + logRequest('error', 'model_unavailable', { + request_id: requestId, + provider, + status: proxyRes.statusCode, + path: sanitizeForLog(req.url), + retries_attempted: modelNotSupportedRetryCount, + message: `Model is unavailable or retired — the requested model is not supported by ${provider}. ` + + 'Check that the model name is correct and not deprecated. ' + + 'If using model aliases, verify the alias resolves to an available model.', + }); + } + + logRequestCompletion(proxyRes.statusCode, responseBody.length, initiatorSent, billingInfo, completionCtx); + logUpstreamAuthError(proxyRes.statusCode, { ...authErrCtx, responseBody }); + + const resHeaders = { + ...proxyRes.headers, + 'x-request-id': requestId, + 'content-length': String(responseBody.length), + }; + delete resHeaders['transfer-encoding']; + res.writeHead(proxyRes.statusCode, resHeaders); + res.end(responseBody); + otel.endSpan(span, proxyRes.statusCode); + return false; +} + +module.exports = { + handle400WithRetry, +}; diff --git a/containers/api-proxy/upstream-retry.test.js b/containers/api-proxy/upstream-retry.test.js new file mode 100644 index 00000000..51939343 --- /dev/null +++ b/containers/api-proxy/upstream-retry.test.js @@ -0,0 +1,72 @@ +const { handle400WithRetry } = require('./upstream-retry'); + +describe('upstream-retry', () => { + function createBaseOptions() { + return { + provider: 'copilot', + requestId: 'req-1', + hasRetried: false, + onRetry: jest.fn(), + modelNotSupportedRetryCount: 0, + maxModelNotSupportedRetries: 2, + onModelNotSupportedRetry: jest.fn(), + completionCtx: {}, + authErrCtx: { req: { url: '/v1/chat/completions' } }, + initiatorSent: null, + billingInfo: null, + res: { writeHead: jest.fn(), end: jest.fn() }, + span: {}, + parseDeprecatedHeaderFromBody: jest.fn(() => null), + learnAndStripDeprecatedHeaderValue: jest.fn(() => false), + parseModelNotSupportedFromBody: jest.fn(() => false), + logRequest: jest.fn(), + sanitizeForLog: (value) => value, + logRequestCompletion: jest.fn(), + logUpstreamAuthError: jest.fn(), + otel: { endSpan: jest.fn() }, + }; + } + + test('triggers deprecated-header retry on first attempt', () => { + const opts = createBaseOptions(); + opts.parseDeprecatedHeaderFromBody.mockReturnValue({ + header: 'anthropic-beta', + value: 'deprecated-value', + }); + opts.learnAndStripDeprecatedHeaderValue.mockReturnValue(true); + const proxyRes = { statusCode: 400, headers: {} }; + + const didRetry = handle400WithRetry(proxyRes, { 'anthropic-beta': 'deprecated-value' }, Buffer.from('{}'), opts); + + expect(didRetry).toBe(true); + expect(opts.onRetry).toHaveBeenCalledWith({ 'anthropic-beta': 'deprecated-value' }); + expect(opts.res.writeHead).not.toHaveBeenCalled(); + }); + + test('logs model_unavailable and forwards response when retry is exhausted', () => { + const opts = createBaseOptions(); + opts.hasRetried = true; + opts.modelNotSupportedRetryCount = 2; + opts.parseModelNotSupportedFromBody.mockReturnValue(true); + const proxyRes = { + statusCode: 400, + headers: { 'content-type': 'application/json', 'transfer-encoding': 'chunked' }, + }; + const responseBody = Buffer.from('{"error":"The requested model is not supported"}'); + + const didRetry = handle400WithRetry(proxyRes, {}, responseBody, opts); + + expect(didRetry).toBe(false); + expect(opts.logRequest).toHaveBeenCalledWith('error', 'model_unavailable', expect.objectContaining({ + request_id: 'req-1', + retries_attempted: 2, + })); + expect(opts.logRequestCompletion).toHaveBeenCalledWith(400, responseBody.length, null, null, {}); + expect(opts.logUpstreamAuthError).toHaveBeenCalledWith(400, expect.objectContaining({ responseBody })); + expect(opts.res.writeHead).toHaveBeenCalledWith(400, expect.objectContaining({ + 'x-request-id': 'req-1', + 'content-length': String(responseBody.length), + })); + expect(opts.otel.endSpan).toHaveBeenCalledWith(opts.span, 400); + }); +}); diff --git a/containers/api-proxy/upstream-token.js b/containers/api-proxy/upstream-token.js new file mode 100644 index 00000000..ee2a16ea --- /dev/null +++ b/containers/api-proxy/upstream-token.js @@ -0,0 +1,35 @@ +'use strict'; + +const { computeTokenBudgetUsage } = require('./token-budget-log'); + +function setupTokenTracking(proxyRes, body, { + requestId, provider, req, startTime, billingInfo, + initiatorSent, span, isStreaming, + trackTokenUsage, sanitizeForLog, metrics, otel, logRequest, +}) { + // Extract model from request body as fallback for token tracking when the + // upstream response omits the model field (e.g., Copilot SDK streaming). + let requestModel = null; + if (body && body.length > 0) { + try { + const parsed = JSON.parse(body.toString('utf8')); + if (parsed && typeof parsed.model === 'string') requestModel = parsed.model; + } catch { /* non-JSON body */ } + } + trackTokenUsage(proxyRes, { + requestId, provider, path: sanitizeForLog(req.url), startTime, metrics, billingInfo, initiatorSent, requestModel, + onUsage: (normalizedUsage, model) => { + otel.setTokenAttributes(span, { provider, model, normalizedUsage, streaming: isStreaming }); + const budgetResult = computeTokenBudgetUsage({ logRequest, requestId, provider }, normalizedUsage, model); + otel.setBudgetAttributes(span, budgetResult); + return budgetResult; + }, + onSpanEnd: (statusCode) => { + otel.endSpan(span, statusCode); + }, + }); +} + +module.exports = { + setupTokenTracking, +}; diff --git a/containers/api-proxy/upstream-token.test.js b/containers/api-proxy/upstream-token.test.js new file mode 100644 index 00000000..d8a8cbdc --- /dev/null +++ b/containers/api-proxy/upstream-token.test.js @@ -0,0 +1,46 @@ +const { setupTokenTracking } = require('./upstream-token'); + +describe('upstream-token', () => { + test('extracts request model and wires OTEL usage/span callbacks', () => { + const trackTokenUsage = jest.fn(); + const otel = { + setTokenAttributes: jest.fn(), + setBudgetAttributes: jest.fn(), + endSpan: jest.fn(), + }; + const logRequest = jest.fn(); + + setupTokenTracking({}, Buffer.from(JSON.stringify({ model: 'gpt-5.4' })), { + requestId: 'req-1', + provider: 'copilot', + req: { url: '/v1/chat/completions' }, + startTime: 123, + billingInfo: null, + initiatorSent: null, + span: { id: 'span-1' }, + isStreaming: true, + trackTokenUsage, + sanitizeForLog: (value) => value, + metrics: { increment: jest.fn(), observe: jest.fn() }, + otel, + logRequest, + }); + + expect(trackTokenUsage).toHaveBeenCalledWith({}, expect.objectContaining({ + requestId: 'req-1', + requestModel: 'gpt-5.4', + path: '/v1/chat/completions', + })); + + const trackingOptions = trackTokenUsage.mock.calls[0][1]; + trackingOptions.onUsage({ input_tokens: 1, output_tokens: 2, total_tokens: 3 }, 'gpt-5.4'); + trackingOptions.onSpanEnd(200); + + expect(otel.setTokenAttributes).toHaveBeenCalledWith( + { id: 'span-1' }, + expect.objectContaining({ provider: 'copilot', model: 'gpt-5.4', streaming: true }), + ); + expect(otel.setBudgetAttributes).toHaveBeenCalled(); + expect(otel.endSpan).toHaveBeenCalledWith({ id: 'span-1' }, 200); + }); +});