From f28df179e563ba9deb967e505e3c044e15bb5a9e Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 23 Jun 2026 18:20:49 +0000
Subject: [PATCH 1/2] Initial plan


From 4da9af1306fa6ddbf8b6b866b757c53a72dc0f86 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 23 Jun 2026 18:35:16 +0000
Subject: [PATCH 2/2] refactor(api-proxy): split upstream handlers

---
 containers/api-proxy/Dockerfile             |   1 +
 containers/api-proxy/upstream-log.js        |  89 ++++++++
 containers/api-proxy/upstream-log.test.js   |  62 ++++++
 containers/api-proxy/upstream-response.js   | 227 +++-----------------
 containers/api-proxy/upstream-retry.js      |  76 +++++++
 containers/api-proxy/upstream-retry.test.js |  72 +++++++
 containers/api-proxy/upstream-token.js      |  35 +++
 containers/api-proxy/upstream-token.test.js |  46 ++++
 8 files changed, 411 insertions(+), 197 deletions(-)
 create mode 100644 containers/api-proxy/upstream-log.js
 create mode 100644 containers/api-proxy/upstream-log.test.js
 create mode 100644 containers/api-proxy/upstream-retry.js
 create mode 100644 containers/api-proxy/upstream-retry.test.js
 create mode 100644 containers/api-proxy/upstream-token.js
 create mode 100644 containers/api-proxy/upstream-token.test.js

diff --git a/containers/api-proxy/Dockerfile b/containers/api-proxy/Dockerfile
index 8be0a2e4..c9a7fbfc 100644
--- a/containers/api-proxy/Dockerfile
+++ b/containers/api-proxy/Dockerfile
@@ -27,6 +27,7 @@ COPY server.js logging.js metrics.js rate-limiter.js \
      ai-credits-pricing.js models-dev-catalog.js models.dev.catalog.json \
      oidc-refresh-utils.js body-transform.js body-utils.js rate-limit.js websocket-proxy.js \
      deprecated-header-tracker.js billing-headers.js upstream-response.js \
+     upstream-log.js upstream-retry.js upstream-token.js \
      anthropic-cache.js otel.js otel-exporters.js otel-serialization.js \
      token-budget-log.js blocked-request-diagnostics.js \
      provider-env-constants.js provider-names.js ./
diff --git a/containers/api-proxy/upstream-log.js b/containers/api-proxy/upstream-log.js
new file mode 100644
index 00000000..904998b0
--- /dev/null
+++ b/containers/api-proxy/upstream-log.js
@@ -0,0 +1,89 @@
+'use strict';
+
+const { COPILOT_PLACEHOLDER_TOKEN } = require('./providers/copilot-byok');
+const { stripBearerPrefix } = require('./providers/copilot-auth');
+
+function buildCopilotAuthErrorMessage(statusCode, env = process.env) {
+  const baseMessage = `Upstream returned ${statusCode}`;
+  const byokBaseUrl = (env.COPILOT_PROVIDER_BASE_URL || '').trim();
+  const byokKey = stripBearerPrefix(env.COPILOT_PROVIDER_API_KEY);
+  const hasByokBaseUrl = Boolean(byokBaseUrl);
+
+  if (hasByokBaseUrl && byokKey === COPILOT_PLACEHOLDER_TOKEN) {
+    return `${baseMessage} — COPILOT_PROVIDER_API_KEY is the AWF placeholder sentinel. ` +
+      'This indicates an internal credential-isolation misconfiguration (real BYOK key not forwarded to api-proxy).';
+  }
+
+  if (hasByokBaseUrl && !byokKey) {
+    return `${baseMessage} — BYOK provider request to COPILOT_PROVIDER_BASE_URL failed because COPILOT_PROVIDER_API_KEY is not set.`;
+  }
+
+  if (hasByokBaseUrl) {
+    return `${baseMessage} — BYOK provider request to COPILOT_PROVIDER_BASE_URL failed. ` +
+      'Verify COPILOT_PROVIDER_BASE_URL and COPILOT_PROVIDER_API_KEY.';
+  }
+
+  return `${baseMessage} — check that the API key is valid and correctly formatted`;
+}
+
+function createLogRequestCompletion({ metrics, logRequest, sanitizeForLog, applyMaxRunsInvocation }) {
+  return function logRequestCompletion(statusCode, responseBytes, initiatorSent, billingInfo, {
+    startTime, provider, req, requestBytes, targetHost, requestId,
+  }) {
+    const duration = Date.now() - startTime;
+    const sc = metrics.statusClass(statusCode);
+    metrics.gaugeDec('active_requests', { provider });
+    metrics.increment('requests_total', { provider, method: req.method, status_class: sc });
+    metrics.increment('response_bytes_total', { provider }, responseBytes);
+    metrics.observe('request_duration_ms', duration, { provider });
+    if (statusCode >= 200 && statusCode < 300) {
+      applyMaxRunsInvocation();
+    }
+    const logFields = {
+      request_id: requestId, provider, method: req.method,
+      path: sanitizeForLog(req.url), status: statusCode,
+      duration_ms: duration, request_bytes: requestBytes,
+      response_bytes: responseBytes, upstream_host: targetHost,
+    };
+    if (initiatorSent) logFields.x_initiator = initiatorSent;
+    if (billingInfo) logFields.billing = billingInfo;
+    logRequest('info', 'request_complete', logFields);
+  };
+}
+
+function createLogUpstreamAuthError({
+  logRequest,
+  sanitizeForLog,
+  applyPermissionDenied,
+  parseModelNotSupportedFromBody,
+}) {
+  return function logUpstreamAuthError(statusCode, { requestId, provider, targetHost, req, responseBody }) {
+    const authErrorMessage = provider === 'copilot'
+      ? buildCopilotAuthErrorMessage(statusCode)
+      : `Upstream returned ${statusCode} — check that the API key is valid and correctly formatted`;
+
+    if (statusCode === 401 || statusCode === 403) {
+      applyPermissionDenied();
+      logRequest('warn', 'upstream_auth_error', {
+        request_id: requestId, provider, status: statusCode,
+        upstream_host: targetHost, path: sanitizeForLog(req.url),
+        message: authErrorMessage,
+      });
+    } else if (statusCode === 400) {
+      // Suppress generic auth-error message when the 400 is a model-not-supported
+      // error — that case is handled by the model_unavailable diagnostic.
+      if (responseBody && parseModelNotSupportedFromBody(responseBody)) return;
+      logRequest('warn', 'upstream_auth_error', {
+        request_id: requestId, provider, status: statusCode,
+        upstream_host: targetHost, path: sanitizeForLog(req.url),
+        message: authErrorMessage,
+      });
+    }
+  };
+}
+
+module.exports = {
+  createLogRequestCompletion,
+  createLogUpstreamAuthError,
+  buildCopilotAuthErrorMessage,
+};
diff --git a/containers/api-proxy/upstream-log.test.js b/containers/api-proxy/upstream-log.test.js
new file mode 100644
index 00000000..7db115af
--- /dev/null
+++ b/containers/api-proxy/upstream-log.test.js
@@ -0,0 +1,62 @@
+const {
+  createLogRequestCompletion,
+  createLogUpstreamAuthError,
+} = require('./upstream-log');
+
+describe('upstream-log', () => {
+  test('logRequestCompletion records metrics and invokes max-runs on success', () => {
+    const metrics = {
+      statusClass: jest.fn(() => '2xx'),
+      gaugeDec: jest.fn(),
+      increment: jest.fn(),
+      observe: jest.fn(),
+    };
+    const logRequest = jest.fn();
+    const applyMaxRunsInvocation = jest.fn();
+    const logRequestCompletion = createLogRequestCompletion({
+      metrics,
+      logRequest,
+      sanitizeForLog: (value) => value,
+      applyMaxRunsInvocation,
+    });
+
+    logRequestCompletion(200, 42, 'agent', { prompt_tokens: 10 }, {
+      startTime: Date.now() - 5,
+      provider: 'copilot',
+      req: { method: 'POST', url: '/v1/chat/completions' },
+      requestBytes: 12,
+      targetHost: 'api.githubcopilot.com',
+      requestId: 'req-1',
+    });
+
+    expect(metrics.gaugeDec).toHaveBeenCalledWith('active_requests', { provider: 'copilot' });
+    expect(applyMaxRunsInvocation).toHaveBeenCalledTimes(1);
+    expect(logRequest).toHaveBeenCalledWith('info', 'request_complete', expect.objectContaining({
+      request_id: 'req-1',
+      status: 200,
+      x_initiator: 'agent',
+    }));
+  });
+
+  test('logUpstreamAuthError suppresses 400 model-not-supported auth log noise', () => {
+    const logRequest = jest.fn();
+    const applyPermissionDenied = jest.fn();
+    const logUpstreamAuthError = createLogUpstreamAuthError({
+      logRequest,
+      sanitizeForLog: (value) => value,
+      applyPermissionDenied,
+      parseModelNotSupportedFromBody: () => true,
+    });
+
+    logUpstreamAuthError(400, {
+      requestId: 'req-1',
+      provider: 'copilot',
+      targetHost: 'api.githubcopilot.com',
+      req: { url: '/v1/chat/completions' },
+      responseBody: Buffer.from('The requested model is not supported'),
+    });
+
+    expect(logRequest).not.toHaveBeenCalled();
+    expect(applyPermissionDenied).not.toHaveBeenCalled();
+  });
+});
diff --git a/containers/api-proxy/upstream-response.js b/containers/api-proxy/upstream-response.js
index bd43b0b7..439e1a5b 100644
--- a/containers/api-proxy/upstream-response.js
+++ b/containers/api-proxy/upstream-response.js
@@ -1,8 +1,8 @@
 'use strict';
 
-const { computeTokenBudgetUsage } = require('./token-budget-log');
-const { COPILOT_PLACEHOLDER_TOKEN } = require('./providers/copilot-byok');
-const { stripBearerPrefix } = require('./providers/copilot-auth');
+const { createLogRequestCompletion, createLogUpstreamAuthError, buildCopilotAuthErrorMessage } = require('./upstream-log');
+const { handle400WithRetry } = require('./upstream-retry');
+const { setupTokenTracking } = require('./upstream-token');
 
 /** Maximum number of times to retry a Copilot 400 "model not supported" response. */
 const MAX_MODEL_NOT_SUPPORTED_RETRIES = 2;
@@ -25,29 +25,6 @@ function parseModelNotSupportedFromBody(body) {
   return MODEL_NOT_SUPPORTED_PATTERN.test(body.toString('utf8'));
 }
 
-function buildCopilotAuthErrorMessage(statusCode, env = process.env) {
-  const baseMessage = `Upstream returned ${statusCode}`;
-  const byokBaseUrl = (env.COPILOT_PROVIDER_BASE_URL || '').trim();
-  const byokKey = stripBearerPrefix(env.COPILOT_PROVIDER_API_KEY);
-  const hasByokBaseUrl = Boolean(byokBaseUrl);
-
-  if (hasByokBaseUrl && byokKey === COPILOT_PLACEHOLDER_TOKEN) {
-    return `${baseMessage} — COPILOT_PROVIDER_API_KEY is the AWF placeholder sentinel. ` +
-      'This indicates an internal credential-isolation misconfiguration (real BYOK key not forwarded to api-proxy).';
-  }
-
-  if (hasByokBaseUrl && !byokKey) {
-    return `${baseMessage} — BYOK provider request to COPILOT_PROVIDER_BASE_URL failed because COPILOT_PROVIDER_API_KEY is not set.`;
-  }
-
-  if (hasByokBaseUrl) {
-    return `${baseMessage} — BYOK provider request to COPILOT_PROVIDER_BASE_URL failed. ` +
-      'Verify COPILOT_PROVIDER_BASE_URL and COPILOT_PROVIDER_API_KEY.';
-  }
-
-  return `${baseMessage} — check that the API key is valid and correctly formatted`;
-}
-
 function createUpstreamResponseHandlers({
   metrics,
   logRequest,
@@ -61,176 +38,19 @@ function createUpstreamResponseHandlers({
   parseDeprecatedHeaderFromBody,
   learnAndStripDeprecatedHeaderValue,
 }) {
-  function logRequestCompletion(statusCode, responseBytes, initiatorSent, billingInfo, {
-    startTime, provider, req, requestBytes, targetHost, requestId,
-  }) {
-    const duration = Date.now() - startTime;
-    const sc = metrics.statusClass(statusCode);
-    metrics.gaugeDec('active_requests', { provider });
-    metrics.increment('requests_total', { provider, method: req.method, status_class: sc });
-    metrics.increment('response_bytes_total', { provider }, responseBytes);
-    metrics.observe('request_duration_ms', duration, { provider });
-    if (statusCode >= 200 && statusCode < 300) {
-      applyMaxRunsInvocation();
-    }
-    const logFields = {
-      request_id: requestId, provider, method: req.method,
-      path: sanitizeForLog(req.url), status: statusCode,
-      duration_ms: duration, request_bytes: requestBytes,
-      response_bytes: responseBytes, upstream_host: targetHost,
-    };
-    if (initiatorSent) logFields.x_initiator = initiatorSent;
-    if (billingInfo) logFields.billing = billingInfo;
-    logRequest('info', 'request_complete', logFields);
-  }
-
-  function logUpstreamAuthError(statusCode, { requestId, provider, targetHost, req, responseBody }) {
-    const authErrorMessage = provider === 'copilot'
-      ? buildCopilotAuthErrorMessage(statusCode)
-      : `Upstream returned ${statusCode} — check that the API key is valid and correctly formatted`;
-
-    if (statusCode === 401 || statusCode === 403) {
-      applyPermissionDenied();
-      logRequest('warn', 'upstream_auth_error', {
-        request_id: requestId, provider, status: statusCode,
-        upstream_host: targetHost, path: sanitizeForLog(req.url),
-        message: authErrorMessage,
-      });
-    } else if (statusCode === 400) {
-      // Suppress generic auth-error message when the 400 is a model-not-supported
-      // error — that case is handled by the model_unavailable diagnostic.
-      if (responseBody && parseModelNotSupportedFromBody(responseBody)) return;
-      logRequest('warn', 'upstream_auth_error', {
-        request_id: requestId, provider, status: statusCode,
-        upstream_host: targetHost, path: sanitizeForLog(req.url),
-        message: authErrorMessage,
-      });
-    }
-  }
-
-  /**
-   * Handle a buffered 400 response body: attempt retry via the deprecated-header
-   * or model-not-supported paths, or log the model_unavailable diagnostic and
-   * forward the response to the client.
-   *
-   * Returns true when a retry was triggered (caller should return early),
-   * false when the response was forwarded to the client.
-   *
-   * @param {import('http').IncomingMessage} proxyRes
-   * @param {object} requestHeaders
-   * @param {Buffer} responseBody
-   * @param {{ provider: string, requestId: string, hasRetried: boolean,
-   *           onRetry: Function, modelNotSupportedRetryCount: number,
-   *           onModelNotSupportedRetry: Function|undefined, completionCtx: object,
-   *           authErrCtx: object, initiatorSent: string|null,
-   *           billingInfo: object|null, res: object, span: object }} opts
-   * @returns {boolean}
-   */
-  function handle400WithRetry(proxyRes, requestHeaders, responseBody, {
-    provider, requestId, hasRetried, onRetry,
-    modelNotSupportedRetryCount, onModelNotSupportedRetry,
-    completionCtx, authErrCtx, initiatorSent, billingInfo, res, span,
-  }) {
-    // ── (a) Deprecated beta-header retry (first attempt for anthropic/copilot) ──
-    if (!hasRetried && (provider === 'anthropic' || provider === 'copilot')) {
-      const deprecated = parseDeprecatedHeaderFromBody(responseBody);
-      if (deprecated) {
-        const retryHeaders = { ...requestHeaders };
-        const stripped = learnAndStripDeprecatedHeaderValue(
-          retryHeaders, deprecated.header, deprecated.value, requestId, provider,
-        );
-        if (stripped) {
-          onRetry(retryHeaders);
-          return true;
-        }
-      }
-    }
-
-    // ── (b) Transient model-not-supported retry (copilot only, up to MAX) ──────
-    if (
-      provider === 'copilot' &&
-      modelNotSupportedRetryCount < MAX_MODEL_NOT_SUPPORTED_RETRIES &&
-      onModelNotSupportedRetry &&
-      parseModelNotSupportedFromBody(responseBody)
-    ) {
-      logRequest('warn', 'model_not_supported_retry', {
-        request_id: requestId,
-        provider,
-        retry_attempt: modelNotSupportedRetryCount + 1,
-        max_retries: MAX_MODEL_NOT_SUPPORTED_RETRIES,
-        message: `Copilot returned 400 model not supported (transient); retrying (attempt ${modelNotSupportedRetryCount + 1}/${MAX_MODEL_NOT_SUPPORTED_RETRIES})`,
-      });
-      onModelNotSupportedRetry();
-      return true;
-    }
-
-    // ── (c) Model-unavailable diagnostic (non-retryable model-not-supported 400) ───
-    if (proxyRes.statusCode === 400 && parseModelNotSupportedFromBody(responseBody)) {
-      const { req } = authErrCtx;
-      logRequest('error', 'model_unavailable', {
-        request_id: requestId,
-        provider,
-        status: proxyRes.statusCode,
-        path: sanitizeForLog(req.url),
-        retries_attempted: modelNotSupportedRetryCount,
-        message: `Model is unavailable or retired — the requested model is not supported by ${provider}. ` +
-          'Check that the model name is correct and not deprecated. ' +
-          'If using model aliases, verify the alias resolves to an available model.',
-      });
-    }
-
-    logRequestCompletion(proxyRes.statusCode, responseBody.length, initiatorSent, billingInfo, completionCtx);
-    logUpstreamAuthError(proxyRes.statusCode, { ...authErrCtx, responseBody });
-
-    const resHeaders = {
-      ...proxyRes.headers,
-      'x-request-id': requestId,
-      'content-length': String(responseBody.length),
-    };
-    delete resHeaders['transfer-encoding'];
-    res.writeHead(proxyRes.statusCode, resHeaders);
-    res.end(responseBody);
-    otel.endSpan(span, proxyRes.statusCode);
-    return false;
-  }
-
-  /**
-   * Wire up token-usage tracking for a streaming or non-streaming normal
-   * (non-400) response.  Parses the request body for a model fallback and
-   * attaches OTEL span callbacks.
-   *
-   * @param {import('http').IncomingMessage} proxyRes
-   * @param {Buffer} body - Original request body (used to extract the model name)
-   * @param {{ requestId: string, provider: string, req: object, startTime: number,
-   *           billingInfo: object|null, initiatorSent: string|null,
-   *           span: object, isStreaming: boolean }} opts
-   */
-  function setupTokenTracking(proxyRes, body, {
-    requestId, provider, req, startTime, billingInfo,
-    initiatorSent, span, isStreaming,
-  }) {
-    // Extract model from request body as fallback for token tracking when the
-    // upstream response omits the model field (e.g., Copilot SDK streaming).
-    let requestModel = null;
-    if (body && body.length > 0) {
-      try {
-        const parsed = JSON.parse(body.toString('utf8'));
-        if (parsed && typeof parsed.model === 'string') requestModel = parsed.model;
-      } catch { /* non-JSON body */ }
-    }
-    trackTokenUsage(proxyRes, {
-      requestId, provider, path: sanitizeForLog(req.url), startTime, metrics, billingInfo, initiatorSent, requestModel,
-      onUsage: (normalizedUsage, model) => {
-        otel.setTokenAttributes(span, { provider, model, normalizedUsage, streaming: isStreaming });
-        const budgetResult = computeTokenBudgetUsage({ logRequest, requestId, provider }, normalizedUsage, model);
-        otel.setBudgetAttributes(span, budgetResult);
-        return budgetResult;
-      },
-      onSpanEnd: (statusCode) => {
-        otel.endSpan(span, statusCode);
-      },
-    });
-  }
+  const logRequestCompletion = createLogRequestCompletion({
+    metrics,
+    logRequest,
+    sanitizeForLog,
+    applyMaxRunsInvocation,
+  });
+
+  const logUpstreamAuthError = createLogUpstreamAuthError({
+    logRequest,
+    sanitizeForLog,
+    applyPermissionDenied,
+    parseModelNotSupportedFromBody,
+  });
 
   function handleUpstreamResponse(proxyRes, requestHeaders, {
     body, res, provider, requestId, req, targetHost, startTime, span, requestBytes,
@@ -275,8 +95,16 @@ function createUpstreamResponseHandlers({
         const responseBody = Buffer.concat(bufferedChunks);
         const didRetry = handle400WithRetry(proxyRes, requestHeaders, responseBody, {
           provider, requestId, hasRetried, onRetry,
-          modelNotSupportedRetryCount, onModelNotSupportedRetry,
+          modelNotSupportedRetryCount, maxModelNotSupportedRetries: MAX_MODEL_NOT_SUPPORTED_RETRIES, onModelNotSupportedRetry,
           completionCtx, authErrCtx, initiatorSent, billingInfo, res, span,
+          parseDeprecatedHeaderFromBody,
+          learnAndStripDeprecatedHeaderValue,
+          parseModelNotSupportedFromBody,
+          logRequest,
+          sanitizeForLog,
+          logRequestCompletion,
+          logUpstreamAuthError,
+          otel,
         });
         if (didRetry) return;
       });
@@ -297,6 +125,11 @@ function createUpstreamResponseHandlers({
     setupTokenTracking(proxyRes, body, {
       requestId, provider, req, startTime, billingInfo,
       initiatorSent, span, isStreaming,
+      trackTokenUsage,
+      sanitizeForLog,
+      metrics,
+      otel,
+      logRequest,
     });
   }
 
diff --git a/containers/api-proxy/upstream-retry.js b/containers/api-proxy/upstream-retry.js
new file mode 100644
index 00000000..cf579bda
--- /dev/null
+++ b/containers/api-proxy/upstream-retry.js
@@ -0,0 +1,76 @@
+'use strict';
+
+function handle400WithRetry(proxyRes, requestHeaders, responseBody, {
+  provider, requestId, hasRetried, onRetry,
+  modelNotSupportedRetryCount, maxModelNotSupportedRetries, onModelNotSupportedRetry,
+  completionCtx, authErrCtx, initiatorSent, billingInfo, res, span,
+  parseDeprecatedHeaderFromBody, learnAndStripDeprecatedHeaderValue,
+  parseModelNotSupportedFromBody, logRequest, sanitizeForLog,
+  logRequestCompletion, logUpstreamAuthError, otel,
+}) {
+  // ── (a) Deprecated beta-header retry (first attempt for anthropic/copilot) ──
+  if (!hasRetried && (provider === 'anthropic' || provider === 'copilot')) {
+    const deprecated = parseDeprecatedHeaderFromBody(responseBody);
+    if (deprecated) {
+      const retryHeaders = { ...requestHeaders };
+      const stripped = learnAndStripDeprecatedHeaderValue(
+        retryHeaders, deprecated.header, deprecated.value, requestId, provider,
+      );
+      if (stripped) {
+        onRetry(retryHeaders);
+        return true;
+      }
+    }
+  }
+
+  // ── (b) Transient model-not-supported retry (copilot only, up to MAX) ──────
+  if (
+    provider === 'copilot' &&
+    modelNotSupportedRetryCount < maxModelNotSupportedRetries &&
+    onModelNotSupportedRetry &&
+    parseModelNotSupportedFromBody(responseBody)
+  ) {
+    logRequest('warn', 'model_not_supported_retry', {
+      request_id: requestId,
+      provider,
+      retry_attempt: modelNotSupportedRetryCount + 1,
+      max_retries: maxModelNotSupportedRetries,
+      message: `Copilot returned 400 model not supported (transient); retrying (attempt ${modelNotSupportedRetryCount + 1}/${maxModelNotSupportedRetries})`,
+    });
+    onModelNotSupportedRetry();
+    return true;
+  }
+
+  // ── (c) Model-unavailable diagnostic (non-retryable model-not-supported 400) ───
+  if (proxyRes.statusCode === 400 && parseModelNotSupportedFromBody(responseBody)) {
+    const { req } = authErrCtx;
+    logRequest('error', 'model_unavailable', {
+      request_id: requestId,
+      provider,
+      status: proxyRes.statusCode,
+      path: sanitizeForLog(req.url),
+      retries_attempted: modelNotSupportedRetryCount,
+      message: `Model is unavailable or retired — the requested model is not supported by ${provider}. ` +
+        'Check that the model name is correct and not deprecated. ' +
+        'If using model aliases, verify the alias resolves to an available model.',
+    });
+  }
+
+  logRequestCompletion(proxyRes.statusCode, responseBody.length, initiatorSent, billingInfo, completionCtx);
+  logUpstreamAuthError(proxyRes.statusCode, { ...authErrCtx, responseBody });
+
+  const resHeaders = {
+    ...proxyRes.headers,
+    'x-request-id': requestId,
+    'content-length': String(responseBody.length),
+  };
+  delete resHeaders['transfer-encoding'];
+  res.writeHead(proxyRes.statusCode, resHeaders);
+  res.end(responseBody);
+  otel.endSpan(span, proxyRes.statusCode);
+  return false;
+}
+
+module.exports = {
+  handle400WithRetry,
+};
diff --git a/containers/api-proxy/upstream-retry.test.js b/containers/api-proxy/upstream-retry.test.js
new file mode 100644
index 00000000..51939343
--- /dev/null
+++ b/containers/api-proxy/upstream-retry.test.js
@@ -0,0 +1,72 @@
+const { handle400WithRetry } = require('./upstream-retry');
+
+describe('upstream-retry', () => {
+  function createBaseOptions() {
+    return {
+      provider: 'copilot',
+      requestId: 'req-1',
+      hasRetried: false,
+      onRetry: jest.fn(),
+      modelNotSupportedRetryCount: 0,
+      maxModelNotSupportedRetries: 2,
+      onModelNotSupportedRetry: jest.fn(),
+      completionCtx: {},
+      authErrCtx: { req: { url: '/v1/chat/completions' } },
+      initiatorSent: null,
+      billingInfo: null,
+      res: { writeHead: jest.fn(), end: jest.fn() },
+      span: {},
+      parseDeprecatedHeaderFromBody: jest.fn(() => null),
+      learnAndStripDeprecatedHeaderValue: jest.fn(() => false),
+      parseModelNotSupportedFromBody: jest.fn(() => false),
+      logRequest: jest.fn(),
+      sanitizeForLog: (value) => value,
+      logRequestCompletion: jest.fn(),
+      logUpstreamAuthError: jest.fn(),
+      otel: { endSpan: jest.fn() },
+    };
+  }
+
+  test('triggers deprecated-header retry on first attempt', () => {
+    const opts = createBaseOptions();
+    opts.parseDeprecatedHeaderFromBody.mockReturnValue({
+      header: 'anthropic-beta',
+      value: 'deprecated-value',
+    });
+    opts.learnAndStripDeprecatedHeaderValue.mockReturnValue(true);
+    const proxyRes = { statusCode: 400, headers: {} };
+
+    const didRetry = handle400WithRetry(proxyRes, { 'anthropic-beta': 'deprecated-value' }, Buffer.from('{}'), opts);
+
+    expect(didRetry).toBe(true);
+    expect(opts.onRetry).toHaveBeenCalledWith({ 'anthropic-beta': 'deprecated-value' });
+    expect(opts.res.writeHead).not.toHaveBeenCalled();
+  });
+
+  test('logs model_unavailable and forwards response when retry is exhausted', () => {
+    const opts = createBaseOptions();
+    opts.hasRetried = true;
+    opts.modelNotSupportedRetryCount = 2;
+    opts.parseModelNotSupportedFromBody.mockReturnValue(true);
+    const proxyRes = {
+      statusCode: 400,
+      headers: { 'content-type': 'application/json', 'transfer-encoding': 'chunked' },
+    };
+    const responseBody = Buffer.from('{"error":"The requested model is not supported"}');
+
+    const didRetry = handle400WithRetry(proxyRes, {}, responseBody, opts);
+
+    expect(didRetry).toBe(false);
+    expect(opts.logRequest).toHaveBeenCalledWith('error', 'model_unavailable', expect.objectContaining({
+      request_id: 'req-1',
+      retries_attempted: 2,
+    }));
+    expect(opts.logRequestCompletion).toHaveBeenCalledWith(400, responseBody.length, null, null, {});
+    expect(opts.logUpstreamAuthError).toHaveBeenCalledWith(400, expect.objectContaining({ responseBody }));
+    expect(opts.res.writeHead).toHaveBeenCalledWith(400, expect.objectContaining({
+      'x-request-id': 'req-1',
+      'content-length': String(responseBody.length),
+    }));
+    expect(opts.otel.endSpan).toHaveBeenCalledWith(opts.span, 400);
+  });
+});
diff --git a/containers/api-proxy/upstream-token.js b/containers/api-proxy/upstream-token.js
new file mode 100644
index 00000000..ee2a16ea
--- /dev/null
+++ b/containers/api-proxy/upstream-token.js
@@ -0,0 +1,35 @@
+'use strict';
+
+const { computeTokenBudgetUsage } = require('./token-budget-log');
+
+function setupTokenTracking(proxyRes, body, {
+  requestId, provider, req, startTime, billingInfo,
+  initiatorSent, span, isStreaming,
+  trackTokenUsage, sanitizeForLog, metrics, otel, logRequest,
+}) {
+  // Extract model from request body as fallback for token tracking when the
+  // upstream response omits the model field (e.g., Copilot SDK streaming).
+  let requestModel = null;
+  if (body && body.length > 0) {
+    try {
+      const parsed = JSON.parse(body.toString('utf8'));
+      if (parsed && typeof parsed.model === 'string') requestModel = parsed.model;
+    } catch { /* non-JSON body */ }
+  }
+  trackTokenUsage(proxyRes, {
+    requestId, provider, path: sanitizeForLog(req.url), startTime, metrics, billingInfo, initiatorSent, requestModel,
+    onUsage: (normalizedUsage, model) => {
+      otel.setTokenAttributes(span, { provider, model, normalizedUsage, streaming: isStreaming });
+      const budgetResult = computeTokenBudgetUsage({ logRequest, requestId, provider }, normalizedUsage, model);
+      otel.setBudgetAttributes(span, budgetResult);
+      return budgetResult;
+    },
+    onSpanEnd: (statusCode) => {
+      otel.endSpan(span, statusCode);
+    },
+  });
+}
+
+module.exports = {
+  setupTokenTracking,
+};
diff --git a/containers/api-proxy/upstream-token.test.js b/containers/api-proxy/upstream-token.test.js
new file mode 100644
index 00000000..d8a8cbdc
--- /dev/null
+++ b/containers/api-proxy/upstream-token.test.js
@@ -0,0 +1,46 @@
+const { setupTokenTracking } = require('./upstream-token');
+
+describe('upstream-token', () => {
+  test('extracts request model and wires OTEL usage/span callbacks', () => {
+    const trackTokenUsage = jest.fn();
+    const otel = {
+      setTokenAttributes: jest.fn(),
+      setBudgetAttributes: jest.fn(),
+      endSpan: jest.fn(),
+    };
+    const logRequest = jest.fn();
+
+    setupTokenTracking({}, Buffer.from(JSON.stringify({ model: 'gpt-5.4' })), {
+      requestId: 'req-1',
+      provider: 'copilot',
+      req: { url: '/v1/chat/completions' },
+      startTime: 123,
+      billingInfo: null,
+      initiatorSent: null,
+      span: { id: 'span-1' },
+      isStreaming: true,
+      trackTokenUsage,
+      sanitizeForLog: (value) => value,
+      metrics: { increment: jest.fn(), observe: jest.fn() },
+      otel,
+      logRequest,
+    });
+
+    expect(trackTokenUsage).toHaveBeenCalledWith({}, expect.objectContaining({
+      requestId: 'req-1',
+      requestModel: 'gpt-5.4',
+      path: '/v1/chat/completions',
+    }));
+
+    const trackingOptions = trackTokenUsage.mock.calls[0][1];
+    trackingOptions.onUsage({ input_tokens: 1, output_tokens: 2, total_tokens: 3 }, 'gpt-5.4');
+    trackingOptions.onSpanEnd(200);
+
+    expect(otel.setTokenAttributes).toHaveBeenCalledWith(
+      { id: 'span-1' },
+      expect.objectContaining({ provider: 'copilot', model: 'gpt-5.4', streaming: true }),
+    );
+    expect(otel.setBudgetAttributes).toHaveBeenCalled();
+    expect(otel.endSpan).toHaveBeenCalledWith({ id: 'span-1' }, 200);
+  });
+});