github · lpcox · Jun 18, 2026 · Jun 18, 2026 · Jun 18, 2026
diff --git a/containers/api-proxy/token-parsers.js b/containers/api-proxy/token-parsers.js
@@ -118,6 +118,71 @@ function extractCacheReadTokens(usage) {
   return undefined;
 }
 
+/**
+ * Extract the authoritative per-type token breakdown from a Copilot
+ * `copilot_usage.token_details` array.
+ *
+ * The GitHub Copilot OpenAI-compatible endpoint reports a flattened
+ * `usage` object where `prompt_tokens` lumps fresh input together with
+ * cache-write tokens, and `prompt_tokens_details.cached_tokens` only
+ * carries cache-read. The true split (input / cache_read / cache_write /
+ * output), which is billed at distinct rates, is only available in the
+ * sibling `copilot_usage.token_details` array, e.g.:
+ *
+ *   copilot_usage: { token_details: [
+ *     { token_type: "input",       token_count: 3857 },
+ *     { token_type: "cache_read",  token_count: 0 },
+ *     { token_type: "cache_write", token_count: 12539 },
+ *     { token_type: "output",      token_count: 362 },
+ *   ] }
+ *
+ * Returns Anthropic-normalized usage fields (input_tokens, output_tokens,
+ * cache_read_input_tokens, cache_creation_input_tokens) so downstream
+ * normalization records the correct cache_write split, or null when no
+ * recognizable token_details are present.
+ *
+ * @param {object} json - Parsed response JSON (or SSE event object)
+ * @returns {object|null}
+ */
+function extractCopilotUsageBreakdown(json) {
+  if (!json || typeof json !== 'object') return null;
+  const copilotUsage = (json.copilot_usage && typeof json.copilot_usage === 'object')
+    ? json.copilot_usage
+    : ((json.response && json.response.copilot_usage && typeof json.response.copilot_usage === 'object')
+      ? json.response.copilot_usage
+      : null);
+  if (!copilotUsage || !Array.isArray(copilotUsage.token_details)) return null;
+
+  const out = {};
+  let found = false;
+  for (const entry of copilotUsage.token_details) {
+    if (!entry || typeof entry !== 'object') continue;
+    const count = entry.token_count;
+    if (typeof count !== 'number') continue;
+    switch (entry.token_type) {
+      case 'input':
+        out.input_tokens = (out.input_tokens || 0) + count;
+        found = true;
+        break;
+      case 'output':
+        out.output_tokens = (out.output_tokens || 0) + count;
+        found = true;
+        break;
+      case 'cache_read':
+        out.cache_read_input_tokens = (out.cache_read_input_tokens || 0) + count;
+        found = true;
+        break;
+      case 'cache_write':
+        out.cache_creation_input_tokens = (out.cache_creation_input_tokens || 0) + count;
+        found = true;
+        break;
+      default:
+        break;
+    }
+  }
+  return found ? out : null;
+}
+
 /**
  * Extract token usage from a non-streaming JSON response body.
  *
@@ -185,6 +250,26 @@ function extractUsageFromJson(body) {
       }
     }
 
+    // Copilot exposes the authoritative input/cache_read/cache_write/output
+    // split only in the sibling `copilot_usage.token_details` array. When
+    // present, prefer it: the flattened `usage.prompt_tokens` lumps fresh
+    // input together with cache-write tokens (billed at different rates).
+    const copilotBreakdown = extractCopilotUsageBreakdown(json);
+    if (copilotBreakdown) {
+      const merged = { ...(result.usage || {}), ...copilotBreakdown };
+      if (copilotBreakdown.input_tokens !== undefined) {
+        // Copilot gave us a precise input split: drop the lumped prompt_tokens.
+        delete merged.prompt_tokens;
+      } else if (copilotBreakdown.cache_creation_input_tokens !== undefined
+                 && typeof merged.prompt_tokens === 'number') {
+        // cache_write present but input absent: infer input = prompt_tokens - cache_write
+        // to avoid double-counting cache_write in normalizeUsage.
+        merged.input_tokens = Math.max(0, merged.prompt_tokens - copilotBreakdown.cache_creation_input_tokens);
+        delete merged.prompt_tokens;
+      }
+      result.usage = merged;
+    }
+
     return result;
   } catch {
     return { usage: null, model: null };
@@ -260,6 +345,20 @@ function extractUsageFromSseLine(line) {
       }
       const cacheReadTokens = extractCacheReadTokens(json.usage);
       if (typeof cacheReadTokens === 'number') result.usage.cache_read_input_tokens = cacheReadTokens;
+      const copilotBreakdown = extractCopilotUsageBreakdown(json);
+      if (copilotBreakdown) {
+        result.usage = { ...result.usage, ...copilotBreakdown };
+        if (copilotBreakdown.input_tokens !== undefined) {
+          // Copilot gave us a precise input split: drop the lumped prompt_tokens.
+          delete result.usage.prompt_tokens;
+        } else if (copilotBreakdown.cache_creation_input_tokens !== undefined
+                   && typeof result.usage.prompt_tokens === 'number') {
+          // cache_write present but input absent: infer input = prompt_tokens - cache_write
+          // to avoid double-counting cache_write in normalizeUsage.
+          result.usage.input_tokens = Math.max(0, result.usage.prompt_tokens - copilotBreakdown.cache_creation_input_tokens);
+          delete result.usage.prompt_tokens;
+        }
+      }
       return result;
     }
 
@@ -294,7 +393,8 @@ function parseSseDataLines(text) {
  *   - input_tokens: number (from Anthropic input_tokens or OpenAI prompt_tokens)
  *   - output_tokens: number (from Anthropic output_tokens or OpenAI completion_tokens)
  *   - cache_read_tokens: number (from Anthropic cache_read_input_tokens or OpenAI prompt_tokens_details.cached_tokens)
- *   - cache_write_tokens: number (Anthropic cache_creation_input_tokens; not available in OpenAI format)
+ *   - cache_write_tokens: number (Anthropic cache_creation_input_tokens or
+ *       Copilot copilot_usage cache_write; not available in flattened OpenAI usage)
  */
 function normalizeUsage(usage) {
   if (!usage) return null;
@@ -314,6 +414,7 @@ module.exports = {
   createDecompressor,
   extractReasoningTokens,
   extractCacheReadTokens,
+  extractCopilotUsageBreakdown,
   extractUsageFromJson,
   extractUsageFromSseLine,
   parseSseDataLines,

diff --git a/containers/api-proxy/token-tracker.js b/containers/api-proxy/token-tracker.js
@@ -25,6 +25,7 @@ const {
   normalizeUsage,
   isStreamingResponse,
   isCompressedResponse,
+  extractCopilotUsageBreakdown,
 } = require('./token-parsers');
 
 module.exports = {
@@ -39,6 +40,7 @@ module.exports = {
   normalizeUsage,
   isStreamingResponse,
   isCompressedResponse,
+  extractCopilotUsageBreakdown,
   validateTokenUsageRecord,
   writeTokenUsage,
   TOKEN_LOG_FILE,

diff --git a/containers/api-proxy/token-tracker.parsing.test.js b/containers/api-proxy/token-tracker.parsing.test.js
@@ -7,6 +7,7 @@ const {
   extractUsageFromSseLine,
   parseSseDataLines,
   normalizeUsage,
+  extractCopilotUsageBreakdown,
 } = require('./token-tracker');
 
 // ── extractUsageFromJson ──────────────────────────────────────────────
@@ -523,3 +524,231 @@ describe('normalizeUsage', () => {
     });
   });
 });
+
+// ── Copilot copilot_usage.token_details breakdown ─────────────────────
+
+describe('extractCopilotUsageBreakdown', () => {
+  test('returns null when copilot_usage is absent', () => {
+    expect(extractCopilotUsageBreakdown({ usage: { prompt_tokens: 10 } })).toBeNull();
+  });
+
+  test('returns null when token_details is not an array', () => {
+    expect(extractCopilotUsageBreakdown({ copilot_usage: { token_details: {} } })).toBeNull();
+  });
+
+  test('returns null when no recognizable token types are present', () => {
+    expect(extractCopilotUsageBreakdown({
+      copilot_usage: { token_details: [{ token_type: 'mystery', token_count: 5 }] },
+    })).toBeNull();
+  });
+
+  test('extracts the full input/cache_read/cache_write/output split', () => {
+    const result = extractCopilotUsageBreakdown({
+      copilot_usage: {
+        token_details: [
+          { token_type: 'input', token_count: 3857 },
+          { token_type: 'cache_read', token_count: 0 },
+          { token_type: 'cache_write', token_count: 12539 },
+          { token_type: 'output', token_count: 362 },
+        ],
+      },
+    });
+    expect(result).toEqual({
+      input_tokens: 3857,
+      cache_read_input_tokens: 0,
+      cache_creation_input_tokens: 12539,
+      output_tokens: 362,
+    });
+  });
+
+  test('reads copilot_usage nested under a response object', () => {
+    const result = extractCopilotUsageBreakdown({
+      response: {
+        copilot_usage: { token_details: [{ token_type: 'input', token_count: 7 }] },
+      },
+    });
+    expect(result).toEqual({ input_tokens: 7 });
+  });
+
+  test('sums repeated token types and ignores malformed entries', () => {
+    const result = extractCopilotUsageBreakdown({
+      copilot_usage: {
+        token_details: [
+          { token_type: 'input', token_count: 100 },
+          { token_type: 'input', token_count: 50 },
+          { token_type: 'output', token_count: 'nope' },
+          null,
+          { token_type: 'cache_write' },
+        ],
+      },
+    });
+    expect(result).toEqual({ input_tokens: 150 });
+  });
+});
+
+// ── extractUsageFromJson + Copilot breakdown integration ──────────────
+
+describe('extractUsageFromJson with copilot_usage', () => {
+  // Real Claude-via-Copilot response shape: flattened usage.prompt_tokens
+  // lumps fresh input (3857) with cache_write (12539); the authoritative
+  // split lives only in copilot_usage.token_details.
+  const copilotBody = () => Buffer.from(JSON.stringify({
+    id: 'e6925ddf',
+    model: 'claude-sonnet-4.6',
+    choices: [{ message: { role: 'assistant', content: 'hi' } }],
+    usage: {
+      completion_tokens: 362,
+      prompt_tokens: 16396,
+      prompt_tokens_details: { cached_tokens: 0 },
+      total_tokens: 16758,
+    },
+    copilot_usage: {
+      token_details: [
+        { token_type: 'input', token_count: 3857 },
+        { token_type: 'cache_read', token_count: 0 },
+        { token_type: 'cache_write', token_count: 12539 },
+        { token_type: 'output', token_count: 362 },
+      ],
+      total_nano_aiu: 6402225000,
+    },
+  }));
+
+  test('prefers the copilot_usage split over the lumped prompt_tokens', () => {
+    const { usage, model } = extractUsageFromJson(copilotBody());
+    expect(model).toBe('claude-sonnet-4.6');
+    expect(usage.input_tokens).toBe(3857);
+    expect(usage.cache_creation_input_tokens).toBe(12539);
+    expect(usage.cache_read_input_tokens).toBe(0);
+    expect(usage.output_tokens).toBe(362);
+    // The lumped prompt_tokens is dropped so normalization uses input_tokens.
+    expect(usage.prompt_tokens).toBeUndefined();
+  });
+
+  test('normalizes to the correct cache_write split', () => {
+    const { usage } = extractUsageFromJson(copilotBody());
+    expect(normalizeUsage(usage)).toEqual({
+      input_tokens: 3857,
+      output_tokens: 362,
+      cache_read_tokens: 0,
+      cache_write_tokens: 12539,
+      reasoning_tokens: 0,
+    });
+  });
+
+  test('does not affect plain OpenAI responses without copilot_usage', () => {
+    const body = Buffer.from(JSON.stringify({
+      model: 'gpt-5',
+      usage: {
+        prompt_tokens: 100,
+        completion_tokens: 20,
+        total_tokens: 120,
+        prompt_tokens_details: { cached_tokens: 30 },
+      },
+    }));
+    expect(normalizeUsage(extractUsageFromJson(body).usage)).toEqual({
+      input_tokens: 100,
+      output_tokens: 20,
+      cache_read_tokens: 30,
+      cache_write_tokens: 0,
+      reasoning_tokens: 0,
+    });
+  });
+
+  test('uses copilot_usage even when the flattened usage object is absent', () => {
+    const body = Buffer.from(JSON.stringify({
+      model: 'claude-sonnet-4.6',
+      copilot_usage: {
+        token_details: [
+          { token_type: 'input', token_count: 200 },
+          { token_type: 'output', token_count: 10 },
+          { token_type: 'cache_write', token_count: 99 },
+        ],
+      },
+    }));
+    expect(normalizeUsage(extractUsageFromJson(body).usage)).toEqual({
+      input_tokens: 200,
+      output_tokens: 10,
+      cache_read_tokens: 0,
+      cache_write_tokens: 99,
+      reasoning_tokens: 0,
+    });
+  });
+
+  test('infers input_tokens from prompt_tokens when copilot_usage has cache_write but no input', () => {
+    // Edge case: token_details provides cache_write but omits input.
+    // prompt_tokens = input + cache_write, so input must be inferred to avoid
+    // double-counting cache_write in normalizeUsage.
+    const body = Buffer.from(JSON.stringify({
+      model: 'claude-sonnet-4.6',
+      usage: {
+        prompt_tokens: 500,
+        completion_tokens: 50,
+        total_tokens: 550,
+      },
+      copilot_usage: {
+        token_details: [
+          { token_type: 'cache_write', token_count: 300 },
+          { token_type: 'output', token_count: 50 },
+        ],
+      },
+    }));
+    const { usage } = extractUsageFromJson(body);
+    // prompt_tokens should be removed; input_tokens inferred as 500 - 300 = 200
+    expect(usage.prompt_tokens).toBeUndefined();
+    expect(usage.input_tokens).toBe(200);
+    expect(usage.cache_creation_input_tokens).toBe(300);
+    expect(normalizeUsage(usage)).toEqual({
+      input_tokens: 200,
+      output_tokens: 50,
+      cache_read_tokens: 0,
+      cache_write_tokens: 300,
+      reasoning_tokens: 0,
+    });
+  });
+});
+
+describe('extractUsageFromSseLine with copilot_usage', () => {
+  test('applies the copilot_usage split in a streaming final chunk', () => {
+    const line = JSON.stringify({
+      model: 'claude-sonnet-4.6',
+      usage: { prompt_tokens: 16396, completion_tokens: 362, total_tokens: 16758 },
+      copilot_usage: {
+        token_details: [
+          { token_type: 'input', token_count: 3857 },
+          { token_type: 'cache_write', token_count: 12539 },
+          { token_type: 'output', token_count: 362 },
+        ],
+      },
+    });
+    const { usage } = extractUsageFromSseLine(line);
+    expect(usage.input_tokens).toBe(3857);
+    expect(usage.cache_creation_input_tokens).toBe(12539);
+    expect(usage.prompt_tokens).toBeUndefined();
+  });
+
+  test('infers input_tokens from prompt_tokens when streaming copilot_usage has cache_write but no input', () => {
+    // Same double-count guard as non-streaming: if token_details omits input but
+    // provides cache_write, prompt_tokens must not survive alongside cache_creation_input_tokens.
+    const line = JSON.stringify({
+      model: 'claude-sonnet-4.6',
+      usage: { prompt_tokens: 500, completion_tokens: 50, total_tokens: 550 },
+      copilot_usage: {
+        token_details: [
+          { token_type: 'cache_write', token_count: 300 },
+          { token_type: 'output', token_count: 50 },
+        ],
+      },
+    });
+    const { usage } = extractUsageFromSseLine(line);
+    expect(usage.prompt_tokens).toBeUndefined();
+    expect(usage.input_tokens).toBe(200);
+    expect(usage.cache_creation_input_tokens).toBe(300);
+    expect(normalizeUsage(usage)).toEqual({
+      input_tokens: 200,
+      output_tokens: 50,
+      cache_read_tokens: 0,
+      cache_write_tokens: 300,
+      reasoning_tokens: 0,
+    });
+  });
+});