|
| 1 | +/** |
| 2 | + * Token usage parsers for AWF API Proxy. |
| 3 | + * |
| 4 | + * Pure parsing and normalization functions — no I/O, no side effects. |
| 5 | + * Covers SSE streaming, non-streaming JSON, decompression helpers, and |
| 6 | + * usage normalization into a unified format. |
| 7 | + */ |
| 8 | + |
| 9 | +'use strict'; |
| 10 | + |
| 11 | +const zlib = require('zlib'); |
| 12 | + |
| 13 | +/** |
| 14 | + * Check if a response is SSE (Server-Sent Events) streaming. |
| 15 | + */ |
| 16 | +function isStreamingResponse(headers) { |
| 17 | + const ct = headers['content-type'] || ''; |
| 18 | + return ct.includes('text/event-stream'); |
| 19 | +} |
| 20 | + |
| 21 | +/** |
| 22 | + * Check if a response is gzip or deflate compressed. |
| 23 | + */ |
| 24 | +function isCompressedResponse(headers) { |
| 25 | + const ce = (headers['content-encoding'] || '').toLowerCase(); |
| 26 | + return ce === 'gzip' || ce === 'deflate' || ce === 'br'; |
| 27 | +} |
| 28 | + |
| 29 | +/** |
| 30 | + * Create a decompression transform stream based on content-encoding. |
| 31 | + * Returns null if the encoding is not supported. |
| 32 | + */ |
| 33 | +function createDecompressor(headers) { |
| 34 | + const ce = (headers['content-encoding'] || '').toLowerCase(); |
| 35 | + if (ce === 'gzip') return zlib.createGunzip(); |
| 36 | + if (ce === 'deflate') return zlib.createInflate(); |
| 37 | + if (ce === 'br') return zlib.createBrotliDecompress(); |
| 38 | + return null; |
| 39 | +} |
| 40 | + |
| 41 | +/** |
| 42 | + * Extract reasoning token count from provider usage payloads. |
| 43 | + * |
| 44 | + * Supports explicit `reasoning_tokens` and provider-specific nested fields. |
| 45 | + * Priority order: top-level → completion_tokens_details → output_tokens_details. |
| 46 | + */ |
| 47 | +function extractReasoningTokens(usage) { |
| 48 | + if (!usage || typeof usage !== 'object') return undefined; |
| 49 | + if (typeof usage.reasoning_tokens === 'number') return usage.reasoning_tokens; |
| 50 | + if (usage.completion_tokens_details && typeof usage.completion_tokens_details.reasoning_tokens === 'number') { |
| 51 | + return usage.completion_tokens_details.reasoning_tokens; |
| 52 | + } |
| 53 | + if (usage.output_tokens_details && typeof usage.output_tokens_details.reasoning_tokens === 'number') { |
| 54 | + return usage.output_tokens_details.reasoning_tokens; |
| 55 | + } |
| 56 | + return undefined; |
| 57 | +} |
| 58 | + |
| 59 | +/** |
| 60 | + * Extract token usage from a non-streaming JSON response body. |
| 61 | + * |
| 62 | + * Supports: |
| 63 | + * - OpenAI/Copilot: { usage: { prompt_tokens, completion_tokens, total_tokens, prompt_tokens_details: { cached_tokens } } } |
| 64 | + * - Anthropic: { usage: { input_tokens, output_tokens, cache_creation_input_tokens, cache_read_input_tokens } } |
| 65 | + * |
| 66 | + * Also extracts the model field if present. |
| 67 | + * |
| 68 | + * @param {Buffer} body - Response body |
| 69 | + * @returns {{ usage: object|null, model: string|null }} |
| 70 | + */ |
| 71 | +function extractUsageFromJson(body) { |
| 72 | + try { |
| 73 | + const text = body.toString('utf8'); |
| 74 | + const json = JSON.parse(text); |
| 75 | + const usageSource = (json.usage && typeof json.usage === 'object') |
| 76 | + ? json.usage |
| 77 | + : ((json.response && json.response.usage && typeof json.response.usage === 'object') |
| 78 | + ? json.response.usage |
| 79 | + : null); |
| 80 | + const result = { usage: null, model: json.model || (json.response && json.response.model) || null }; |
| 81 | + |
| 82 | + if (usageSource) { |
| 83 | + const usage = {}; |
| 84 | + let hasField = false; |
| 85 | + // Anthropic fields |
| 86 | + if (typeof usageSource.input_tokens === 'number') { |
| 87 | + usage.input_tokens = usageSource.input_tokens; |
| 88 | + hasField = true; |
| 89 | + } |
| 90 | + if (typeof usageSource.output_tokens === 'number') { |
| 91 | + usage.output_tokens = usageSource.output_tokens; |
| 92 | + hasField = true; |
| 93 | + } |
| 94 | + if (typeof usageSource.cache_creation_input_tokens === 'number') { |
| 95 | + usage.cache_creation_input_tokens = usageSource.cache_creation_input_tokens; |
| 96 | + hasField = true; |
| 97 | + } |
| 98 | + if (typeof usageSource.cache_read_input_tokens === 'number') { |
| 99 | + usage.cache_read_input_tokens = usageSource.cache_read_input_tokens; |
| 100 | + hasField = true; |
| 101 | + } |
| 102 | + // OpenAI/Copilot fields |
| 103 | + if (typeof usageSource.prompt_tokens === 'number') { |
| 104 | + usage.prompt_tokens = usageSource.prompt_tokens; |
| 105 | + hasField = true; |
| 106 | + } |
| 107 | + if (typeof usageSource.completion_tokens === 'number') { |
| 108 | + usage.completion_tokens = usageSource.completion_tokens; |
| 109 | + hasField = true; |
| 110 | + } |
| 111 | + if (typeof usageSource.total_tokens === 'number') { |
| 112 | + usage.total_tokens = usageSource.total_tokens; |
| 113 | + hasField = true; |
| 114 | + } |
| 115 | + const reasoningTokens = extractReasoningTokens(usageSource); |
| 116 | + if (typeof reasoningTokens === 'number') { |
| 117 | + usage.reasoning_tokens = reasoningTokens; |
| 118 | + hasField = true; |
| 119 | + } |
| 120 | + // OpenAI/Copilot nested cache fields (prompt_tokens_details.cached_tokens) |
| 121 | + if (usageSource.prompt_tokens_details && typeof usageSource.prompt_tokens_details.cached_tokens === 'number') { |
| 122 | + usage.cache_read_input_tokens = usageSource.prompt_tokens_details.cached_tokens; |
| 123 | + hasField = true; |
| 124 | + } |
| 125 | + if (hasField) { |
| 126 | + result.usage = usage; |
| 127 | + } |
| 128 | + } |
| 129 | + |
| 130 | + return result; |
| 131 | + } catch { |
| 132 | + return { usage: null, model: null }; |
| 133 | + } |
| 134 | +} |
| 135 | + |
| 136 | +/** |
| 137 | + * Extract token usage from a single SSE data line. |
| 138 | + * |
| 139 | + * SSE format: "data: {json}\n\n" |
| 140 | + * |
| 141 | + * Anthropic streaming events with usage: |
| 142 | + * - message_start: { type: "message_start", message: { usage: { input_tokens, cache_creation_input_tokens, cache_read_input_tokens } } } |
| 143 | + * - message_delta: { type: "message_delta", usage: { output_tokens } } |
| 144 | + * |
| 145 | + * OpenAI/Copilot streaming events with usage: |
| 146 | + * - Final chunk: { usage: { prompt_tokens, completion_tokens, total_tokens, prompt_tokens_details: { cached_tokens } } } |
| 147 | + * |
| 148 | + * @param {string} line - A single SSE data line (without "data: " prefix) |
| 149 | + * @returns {{ usage: object|null, model: string|null }} |
| 150 | + */ |
| 151 | +function extractUsageFromSseLine(line) { |
| 152 | + if (!line || line === '[DONE]') return { usage: null, model: null }; |
| 153 | + |
| 154 | + try { |
| 155 | + const json = JSON.parse(line); |
| 156 | + const result = { usage: null, model: json.model || null }; |
| 157 | + |
| 158 | + // Anthropic message_start: usage is inside message object |
| 159 | + if (json.type === 'message_start' && json.message && json.message.usage) { |
| 160 | + result.usage = {}; |
| 161 | + const u = json.message.usage; |
| 162 | + if (typeof u.input_tokens === 'number') result.usage.input_tokens = u.input_tokens; |
| 163 | + if (typeof u.cache_creation_input_tokens === 'number') result.usage.cache_creation_input_tokens = u.cache_creation_input_tokens; |
| 164 | + if (typeof u.cache_read_input_tokens === 'number') result.usage.cache_read_input_tokens = u.cache_read_input_tokens; |
| 165 | + result.model = (json.message && json.message.model) || result.model; |
| 166 | + return result; |
| 167 | + } |
| 168 | + |
| 169 | + // Anthropic message_delta: usage at top level |
| 170 | + if (json.type === 'message_delta' && json.usage) { |
| 171 | + result.usage = {}; |
| 172 | + if (typeof json.usage.output_tokens === 'number') result.usage.output_tokens = json.usage.output_tokens; |
| 173 | + return result; |
| 174 | + } |
| 175 | + |
| 176 | + // OpenAI Responses API: usage in response object on completion events |
| 177 | + if ((json.type === 'response.completed' || json.type === 'response.done') |
| 178 | + && json.response && json.response.usage && typeof json.response.usage === 'object') { |
| 179 | + const u = json.response.usage; |
| 180 | + result.usage = {}; |
| 181 | + if (typeof u.input_tokens === 'number') result.usage.input_tokens = u.input_tokens; |
| 182 | + if (typeof u.output_tokens === 'number') result.usage.output_tokens = u.output_tokens; |
| 183 | + if (typeof u.total_tokens === 'number') result.usage.total_tokens = u.total_tokens; |
| 184 | + const reasoningTokens = extractReasoningTokens(u); |
| 185 | + if (typeof reasoningTokens === 'number') result.usage.reasoning_tokens = reasoningTokens; |
| 186 | + if (u.prompt_tokens_details && typeof u.prompt_tokens_details.cached_tokens === 'number') { |
| 187 | + result.usage.cache_read_input_tokens = u.prompt_tokens_details.cached_tokens; |
| 188 | + } |
| 189 | + result.model = json.response.model || result.model; |
| 190 | + return result; |
| 191 | + } |
| 192 | + |
| 193 | + // OpenAI/Copilot: usage at top level in final chunk |
| 194 | + if (json.usage && typeof json.usage === 'object') { |
| 195 | + result.usage = {}; |
| 196 | + if (typeof json.usage.prompt_tokens === 'number') result.usage.prompt_tokens = json.usage.prompt_tokens; |
| 197 | + if (typeof json.usage.completion_tokens === 'number') result.usage.completion_tokens = json.usage.completion_tokens; |
| 198 | + if (typeof json.usage.total_tokens === 'number') result.usage.total_tokens = json.usage.total_tokens; |
| 199 | + const reasoningTokens = extractReasoningTokens(json.usage); |
| 200 | + if (typeof reasoningTokens === 'number') { |
| 201 | + result.usage.reasoning_tokens = reasoningTokens; |
| 202 | + } |
| 203 | + // OpenAI/Copilot nested cache fields (prompt_tokens_details.cached_tokens) |
| 204 | + if (json.usage.prompt_tokens_details && typeof json.usage.prompt_tokens_details.cached_tokens === 'number') { |
| 205 | + result.usage.cache_read_input_tokens = json.usage.prompt_tokens_details.cached_tokens; |
| 206 | + } |
| 207 | + return result; |
| 208 | + } |
| 209 | + |
| 210 | + return result; |
| 211 | + } catch { |
| 212 | + return { usage: null, model: null }; |
| 213 | + } |
| 214 | +} |
| 215 | + |
| 216 | +/** |
| 217 | + * Extract all SSE data lines from a text chunk. |
| 218 | + * Lines are prefixed with "data: " in the SSE protocol. |
| 219 | + */ |
| 220 | +function parseSseDataLines(text) { |
| 221 | + const lines = []; |
| 222 | + const parts = text.split('\n'); |
| 223 | + for (const part of parts) { |
| 224 | + const trimmed = part.trim(); |
| 225 | + if (trimmed.startsWith('data: ')) { |
| 226 | + lines.push(trimmed.slice(6)); |
| 227 | + } else if (trimmed === 'data:') { |
| 228 | + // empty data line |
| 229 | + } |
| 230 | + } |
| 231 | + return lines; |
| 232 | +} |
| 233 | + |
| 234 | +/** |
| 235 | + * Normalize extracted usage into a unified format. |
| 236 | + * |
| 237 | + * Output fields: |
| 238 | + * - input_tokens: number (from Anthropic input_tokens or OpenAI prompt_tokens) |
| 239 | + * - output_tokens: number (from Anthropic output_tokens or OpenAI completion_tokens) |
| 240 | + * - cache_read_tokens: number (from Anthropic cache_read_input_tokens or OpenAI prompt_tokens_details.cached_tokens) |
| 241 | + * - cache_write_tokens: number (Anthropic cache_creation_input_tokens; not available in OpenAI format) |
| 242 | + */ |
| 243 | +function normalizeUsage(usage) { |
| 244 | + if (!usage) return null; |
| 245 | + |
| 246 | + return { |
| 247 | + input_tokens: usage.input_tokens ?? usage.prompt_tokens ?? 0, |
| 248 | + output_tokens: usage.output_tokens ?? usage.completion_tokens ?? 0, |
| 249 | + cache_read_tokens: usage.cache_read_input_tokens ?? 0, |
| 250 | + cache_write_tokens: usage.cache_creation_input_tokens ?? 0, |
| 251 | + reasoning_tokens: usage.reasoning_tokens ?? 0, |
| 252 | + }; |
| 253 | +} |
| 254 | + |
| 255 | +module.exports = { |
| 256 | + isStreamingResponse, |
| 257 | + isCompressedResponse, |
| 258 | + createDecompressor, |
| 259 | + extractReasoningTokens, |
| 260 | + extractUsageFromJson, |
| 261 | + extractUsageFromSseLine, |
| 262 | + parseSseDataLines, |
| 263 | + normalizeUsage, |
| 264 | +}; |
0 commit comments