Merge branch 'main' of https://github.com/githubnext/gh-aw-firewall

lpcox · lpcox · commit 8c46503b5b4b · 2026-06-18T11:27:18.000-07:00
diff --git a/containers/api-proxy/Dockerfile b/containers/api-proxy/Dockerfile
@@ -17,7 +17,7 @@ RUN npm ci --omit=dev
 # Copy application files
 COPY server.js logging.js metrics.js rate-limiter.js \
      token-tracker.js token-persistence.js token-parsers.js \
-     token-tracker-http.js token-tracker-ws.js \
+     token-tracker-http.js token-tracker-ws.js token-tracker-shared.js \
      model-resolver.js model-utils.js model-body-rewriter.js proxy-utils.js adapter-factory.js anthropic-transforms.js \
      model-config.js key-validation.js server-factory.js startup.js \
      proxy-request.js http-client.js body-handler.js model-discovery.js management.js oidc-token-provider.js \
@@ -27,7 +27,8 @@ COPY server.js logging.js metrics.js rate-limiter.js \
      ai-credits-pricing.js models-dev-catalog.js models.dev.catalog.json \
      oidc-refresh-utils.js body-transform.js body-utils.js rate-limit.js websocket-proxy.js \
      deprecated-header-tracker.js billing-headers.js upstream-response.js \
-     anthropic-cache.js otel.js token-budget-log.js blocked-request-diagnostics.js \
+     anthropic-cache.js otel.js otel-exporters.js otel-serialization.js \
+     token-budget-log.js blocked-request-diagnostics.js \
      provider-env-constants.js ./
 COPY guards/ ./guards/
 COPY providers/ ./providers/
diff --git a/containers/api-proxy/dockerfile-copy-coverage.test.js b/containers/api-proxy/dockerfile-copy-coverage.test.js
@@ -0,0 +1,119 @@
+/**
+ * Guard test: every local module reachable from the runtime entrypoint
+ * (server.js) MUST be present in the Dockerfile COPY list.
+ *
+ * Background: the api-proxy image copies source files individually by name
+ * (no bundler). When a refactor adds a new module but forgets to update the
+ * Dockerfile, `require()` throws MODULE_NOT_FOUND inside the container. The
+ * proxy's graceful-degradation guards then silently stub the affected
+ * subsystem (e.g. token tracking, OTEL), so the container still boots but
+ * produces no token-usage.jsonl — causing AI-credit accounting to report 0.
+ *
+ * This regression has happened at least twice (OIDC modules, then
+ * token-tracker-shared.js). This test fails fast in CI instead.
+ */
+
+const fs = require('fs');
+const path = require('path');
+
+const ROOT = __dirname;
+const ENTRYPOINT = path.join(ROOT, 'server.js');
+
+/** Resolve a relative require spec to an existing file path, or null. */
+function resolveLocal(fromFile, spec) {
+  const base = path.resolve(path.dirname(fromFile), spec);
+  const candidates = [base, `${base}.js`, `${base}.json`, path.join(base, 'index.js')];
+  for (const c of candidates) {
+    if (fs.existsSync(c) && fs.statSync(c).isFile()) return c;
+  }
+  return null;
+}
+
+/** Compute the transitive closure of local (./ and ../) requires from an entry file. */
+function computeRequireClosure(entry) {
+  const seen = new Set();
+  const stack = [entry];
+  const requireRe = /require\(\s*(["'])(\.{1,2}\/[^"']+)\1\s*\)/g;
+
+  while (stack.length > 0) {
+    const file = stack.pop();
+    if (seen.has(file)) continue;
+    seen.add(file);
+
+    let src;
+    try {
+      src = fs.readFileSync(file, 'utf8');
+    } catch {
+      continue;
+    }
+
+    let m;
+    while ((m = requireRe.exec(src)) !== null) {
+      const resolved = resolveLocal(file, m[2]);
+      if (resolved && !resolved.includes(`${path.sep}node_modules${path.sep}`)) {
+        stack.push(resolved);
+      }
+    }
+  }
+  return seen;
+}
+
+/** Parse the Dockerfile into a set of copied files and copied directory prefixes. */
+function parseDockerfileCopies(dockerfilePath) {
+  const lines = fs.readFileSync(dockerfilePath, 'utf8').split('\n');
+  const files = new Set();
+  const dirs = new Set();
+
+  let inCopy = false;
+  for (const rawLine of lines) {
+    const line = rawLine.trim();
+    if (line.startsWith('#')) continue;
+
+    let body = line;
+    if (line.startsWith('COPY ')) {
+      inCopy = true;
+      body = line.slice('COPY '.length);
+    } else if (!inCopy) {
+      continue;
+    }
+
+    const continues = body.endsWith('\\');
+    body = body.replace(/\\$/, '').trim();
+
+    for (const tok of body.split(/\s+/)) {
+      if (!tok || tok === '.' || tok === './') continue;
+      const clean = tok.replace(/^\.\//, '');
+      if (clean.endsWith('/')) {
+        dirs.add(clean);
+      } else if (/\.(js|json)$/.test(clean)) {
+        files.add(clean);
+      }
+    }
+
+    if (!continues) inCopy = false;
+  }
+  return { files, dirs };
+}
+
+describe('Dockerfile COPY coverage', () => {
+  test('every module reachable from server.js is copied into the image', () => {
+    const closure = computeRequireClosure(ENTRYPOINT);
+    const { files, dirs } = parseDockerfileCopies(path.join(ROOT, 'Dockerfile'));
+
+    const isCopied = (relPath) => {
+      if (files.has(relPath)) return true;
+      for (const dir of dirs) {
+        if (relPath.startsWith(dir)) return true;
+      }
+      return false;
+    };
+
+    const missing = [...closure]
+      .map((abs) => path.relative(ROOT, abs).split(path.sep).join('/'))
+      .filter((rel) => !rel.startsWith('node_modules'))
+      .filter((rel) => !isCopied(rel))
+      .sort();
+
+    expect(missing).toEqual([]);
+  });
+});
diff --git a/containers/api-proxy/token-parsers.js b/containers/api-proxy/token-parsers.js
@@ -118,6 +118,71 @@ function extractCacheReadTokens(usage) {
   return undefined;
 }
 
+/**
+ * Extract the authoritative per-type token breakdown from a Copilot
+ * `copilot_usage.token_details` array.
+ *
+ * The GitHub Copilot OpenAI-compatible endpoint reports a flattened
+ * `usage` object where `prompt_tokens` lumps fresh input together with
+ * cache-write tokens, and `prompt_tokens_details.cached_tokens` only
+ * carries cache-read. The true split (input / cache_read / cache_write /
+ * output), which is billed at distinct rates, is only available in the
+ * sibling `copilot_usage.token_details` array, e.g.:
+ *
+ *   copilot_usage: { token_details: [
+ *     { token_type: "input",       token_count: 3857 },
+ *     { token_type: "cache_read",  token_count: 0 },
+ *     { token_type: "cache_write", token_count: 12539 },
+ *     { token_type: "output",      token_count: 362 },
+ *   ] }
+ *
+ * Returns Anthropic-normalized usage fields (input_tokens, output_tokens,
+ * cache_read_input_tokens, cache_creation_input_tokens) so downstream
+ * normalization records the correct cache_write split, or null when no
+ * recognizable token_details are present.
+ *
+ * @param {object} json - Parsed response JSON (or SSE event object)
+ * @returns {object|null}
+ */
+function extractCopilotUsageBreakdown(json) {
+  if (!json || typeof json !== 'object') return null;
+  const copilotUsage = (json.copilot_usage && typeof json.copilot_usage === 'object')
+    ? json.copilot_usage
+    : ((json.response && json.response.copilot_usage && typeof json.response.copilot_usage === 'object')
+      ? json.response.copilot_usage
+      : null);
+  if (!copilotUsage || !Array.isArray(copilotUsage.token_details)) return null;
+
+  const out = {};
+  let found = false;
+  for (const entry of copilotUsage.token_details) {
+    if (!entry || typeof entry !== 'object') continue;
+    const count = entry.token_count;
+    if (typeof count !== 'number') continue;
+    switch (entry.token_type) {
+      case 'input':
+        out.input_tokens = (out.input_tokens || 0) + count;
+        found = true;
+        break;
+      case 'output':
+        out.output_tokens = (out.output_tokens || 0) + count;
+        found = true;
+        break;
+      case 'cache_read':
+        out.cache_read_input_tokens = (out.cache_read_input_tokens || 0) + count;
+        found = true;
+        break;
+      case 'cache_write':
+        out.cache_creation_input_tokens = (out.cache_creation_input_tokens || 0) + count;
+        found = true;
+        break;
+      default:
+        break;
+    }
+  }
+  return found ? out : null;
+}
+
 /**
  * Extract token usage from a non-streaming JSON response body.
  *
@@ -185,6 +250,26 @@ function extractUsageFromJson(body) {
       }
     }
 
+    // Copilot exposes the authoritative input/cache_read/cache_write/output
+    // split only in the sibling `copilot_usage.token_details` array. When
+    // present, prefer it: the flattened `usage.prompt_tokens` lumps fresh
+    // input together with cache-write tokens (billed at different rates).
+    const copilotBreakdown = extractCopilotUsageBreakdown(json);
+    if (copilotBreakdown) {
+      const merged = { ...(result.usage || {}), ...copilotBreakdown };
+      if (copilotBreakdown.input_tokens !== undefined) {
+        // Copilot gave us a precise input split: drop the lumped prompt_tokens.
+        delete merged.prompt_tokens;
+      } else if (copilotBreakdown.cache_creation_input_tokens !== undefined
+                 && typeof merged.prompt_tokens === 'number') {
+        // cache_write present but input absent: infer input = prompt_tokens - cache_write
+        // to avoid double-counting cache_write in normalizeUsage.
+        merged.input_tokens = Math.max(0, merged.prompt_tokens - copilotBreakdown.cache_creation_input_tokens);
+        delete merged.prompt_tokens;
+      }
+      result.usage = merged;
+    }
+
     return result;
   } catch {
     return { usage: null, model: null };
@@ -260,6 +345,20 @@ function extractUsageFromSseLine(line) {
       }
       const cacheReadTokens = extractCacheReadTokens(json.usage);
       if (typeof cacheReadTokens === 'number') result.usage.cache_read_input_tokens = cacheReadTokens;
+      const copilotBreakdown = extractCopilotUsageBreakdown(json);
+      if (copilotBreakdown) {
+        result.usage = { ...result.usage, ...copilotBreakdown };
+        if (copilotBreakdown.input_tokens !== undefined) {
+          // Copilot gave us a precise input split: drop the lumped prompt_tokens.
+          delete result.usage.prompt_tokens;
+        } else if (copilotBreakdown.cache_creation_input_tokens !== undefined
+                   && typeof result.usage.prompt_tokens === 'number') {
+          // cache_write present but input absent: infer input = prompt_tokens - cache_write
+          // to avoid double-counting cache_write in normalizeUsage.
+          result.usage.input_tokens = Math.max(0, result.usage.prompt_tokens - copilotBreakdown.cache_creation_input_tokens);
+          delete result.usage.prompt_tokens;
+        }
+      }
       return result;
     }
 
@@ -294,7 +393,8 @@ function parseSseDataLines(text) {
  *   - input_tokens: number (from Anthropic input_tokens or OpenAI prompt_tokens)
  *   - output_tokens: number (from Anthropic output_tokens or OpenAI completion_tokens)
  *   - cache_read_tokens: number (from Anthropic cache_read_input_tokens or OpenAI prompt_tokens_details.cached_tokens)
- *   - cache_write_tokens: number (Anthropic cache_creation_input_tokens; not available in OpenAI format)
+ *   - cache_write_tokens: number (Anthropic cache_creation_input_tokens or
+ *       Copilot copilot_usage cache_write; not available in flattened OpenAI usage)
  */
 function normalizeUsage(usage) {
   if (!usage) return null;
@@ -314,6 +414,7 @@ module.exports = {
   createDecompressor,
   extractReasoningTokens,
   extractCacheReadTokens,
+  extractCopilotUsageBreakdown,
   extractUsageFromJson,
   extractUsageFromSseLine,
   parseSseDataLines,
diff --git a/containers/api-proxy/token-tracker.js b/containers/api-proxy/token-tracker.js
@@ -25,6 +25,7 @@ const {
   normalizeUsage,
   isStreamingResponse,
   isCompressedResponse,
+  extractCopilotUsageBreakdown,
 } = require('./token-parsers');
 
 module.exports = {
@@ -39,6 +40,7 @@ module.exports = {
   normalizeUsage,
   isStreamingResponse,
   isCompressedResponse,
+  extractCopilotUsageBreakdown,
   validateTokenUsageRecord,
   writeTokenUsage,
   TOKEN_LOG_FILE,
diff --git a/containers/api-proxy/token-tracker.parsing.test.js b/containers/api-proxy/token-tracker.parsing.test.js