From e0cf99201f1ca8954ebf1e8df92c8026f7eb8523 Mon Sep 17 00:00:00 2001 From: Oxygen <1391083091@qq.com> Date: Sat, 6 Jun 2026 01:18:01 +0800 Subject: [PATCH] feat: add token estimation utility for chat completions Adds estimateTokens() function for cost estimation and dry-run capability without making API calls. Closes #318 Co-Authored-By: Claude Opus 4.8 --- src/index.ts | 2 + src/lib/token-estimation.ts | 327 ++++++++++++++++++ src/resources/chat/completions/completions.ts | 27 ++ 3 files changed, 356 insertions(+) create mode 100644 src/lib/token-estimation.ts diff --git a/src/index.ts b/src/index.ts index bd5e7743f6..eff5fdcccf 100644 --- a/src/index.ts +++ b/src/index.ts @@ -27,3 +27,5 @@ export { export { AzureOpenAI } from './azure'; export { BedrockOpenAI, type BedrockClientOptions } from './bedrock'; + +export { estimateTokens, type EstimateTokensParams, type TokenEstimate } from './lib/token-estimation'; diff --git a/src/lib/token-estimation.ts b/src/lib/token-estimation.ts new file mode 100644 index 0000000000..2dad61adc2 --- /dev/null +++ b/src/lib/token-estimation.ts @@ -0,0 +1,327 @@ +import type { + ChatCompletionMessageParam, + ChatCompletionTool, +} from '../resources/chat/completions/completions'; + +/** + * Result of a token estimation for a chat completion request. + */ +export interface TokenEstimate { + /** + * Estimated number of tokens in the input (messages + tool definitions). + */ + inputTokens: number; + + /** + * Estimated maximum number of tokens that could be generated, + * based on the model's context window minus the estimated input tokens. + * Returns undefined if the model is unknown. + */ + maxOutputTokens?: number; + + /** + * The total context window size for the model, if known. + */ + contextWindow?: number | undefined; +} + +/** + * Known context window sizes for common models. + */ +const MODEL_CONTEXT_WINDOWS: Record = { + // GPT-4 class + 'gpt-4': 8192, + 'gpt-4-0314': 8192, + 'gpt-4-0613': 8192, + 'gpt-4-32k': 32768, + 'gpt-4-32k-0314': 32768, + 'gpt-4-32k-0613': 32768, + 'gpt-4-1106-preview': 128000, + 'gpt-4-0125-preview': 128000, + 'gpt-4-turbo': 128000, + 'gpt-4-turbo-2024-04-09': 128000, + 'gpt-4-turbo-preview': 128000, + 'gpt-4-vision-preview': 128000, + 'gpt-4-1106-vision-preview': 128000, + + // GPT-4.1 class + 'gpt-4.1': 1048576, + 'gpt-4.1-mini': 1048576, + 'gpt-4.1-nano': 1048576, + + // GPT-4o class + 'gpt-4o': 128000, + 'gpt-4o-2024-05-13': 128000, + 'gpt-4o-2024-08-06': 128000, + 'gpt-4o-2024-11-20': 128000, + 'gpt-4o-mini': 128000, + 'gpt-4o-mini-2024-07-18': 128000, + 'gpt-4o-audio-preview': 128000, + 'gpt-4o-audio-preview-2024-10-01': 128000, + 'gpt-4o-audio-preview-2024-12-17': 128000, + 'gpt-4o-mini-audio-preview': 128000, + 'gpt-4o-mini-audio-preview-2024-12-17': 128000, + + // GPT-5 class + 'gpt-5': 262144, + 'gpt-5-mini': 262144, + 'gpt-5-nano': 262144, + 'gpt-5-2025-02-10': 262144, + 'gpt-5-mini-2025-02-10': 262144, + 'gpt-5-nano-2025-02-10': 262144, + 'gpt-5.1': 262144, + 'gpt-5.1-codex': 262144, + 'gpt-5.1-codex-max': 262144, + 'gpt-5.1-codex-mini': 262144, + 'gpt-5.4': 262144, + 'gpt-5.4-pro': 400000, + 'gpt-5.5': 272000, + 'gpt-5.5-pro': 400000, + + // o-series + 'o1': 200000, + 'o1-2024-12-17': 200000, + 'o1-mini': 128000, + 'o1-mini-2024-09-12': 128000, + 'o1-pro': 200000, + 'o3': 200000, + 'o3-mini': 200000, + 'o4-mini': 200000, + 'o3-2025-04-16': 200000, + 'o4-mini-2025-04-16': 200000, + + // GPT-3.5 class + 'gpt-3.5-turbo': 4096, + 'gpt-3.5-turbo-0301': 4096, + 'gpt-3.5-turbo-0613': 4096, + 'gpt-3.5-turbo-1106': 16385, + 'gpt-3.5-turbo-0125': 16385, + 'gpt-3.5-turbo-16k': 16385, + 'gpt-3.5-turbo-16k-0613': 16385, +}; + +/** + * Characters-per-token ratio used for estimation. This is a conservative + * estimate based on typical English text. Actual token counts depend on the + * specific tokenizer and the content being tokenized. + */ +const CHARS_PER_TOKEN = 3.5; + +/** + * Fixed token overhead per message for the chat format. + * This accounts for role markers, separators, and other formatting tokens + * that the chat template adds around each message. + */ +const TOKENS_PER_MESSAGE = 4; + +/** + * Additional token overhead if the message has a `name` field. + */ +const TOKENS_PER_NAME = 1; + +/** + * Base overhead for the entire chat completion request. + */ +const BASE_OVERHEAD = 3; + +/** + * Estimate the number of tokens in a string value. + * Uses a character-counting heuristic. + */ +function estimateStringTokens(text: string): number { + return Math.ceil(text.length / CHARS_PER_TOKEN); +} + +/** + * Estimate the number of tokens in the text content of a message. + * Handles both string content and content arrays. + */ +function estimateMessageContent(content: unknown): number { + if (typeof content === 'string') { + return estimateStringTokens(content); + } + + if (Array.isArray(content)) { + let tokens = 0; + for (const part of content) { + if (part && typeof part === 'object') { + if ('text' in part && typeof part.text === 'string') { + tokens += estimateStringTokens(part.text); + } else if ('image_url' in part) { + // Images are tokenized into a fixed number of tokens depending on detail level + const detail = part.image_url?.detail || 'auto'; + if (detail === 'low' || detail === 'auto') { + tokens += 85; // low-res images: 85 tokens + } else { + tokens += 765; // high-res images: roughly 765 tokens (may vary) + } + } else if ('input_audio' in part) { + // Audio is tokenized separately; rough estimate + tokens += 50; + } else if ('refusal' in part && typeof part.refusal === 'string') { + tokens += estimateStringTokens(part.refusal); + } + } + } + return tokens; + } + + return 0; +} + +/** + * Estimate the number of tokens for a single chat completion message, + * including the overhead from role and formatting. + */ +function estimateMessageTokens(message: ChatCompletionMessageParam): number { + let tokens = TOKENS_PER_MESSAGE; + + // Content tokens + const content = (message as any).content; + if (content !== undefined && content !== null) { + tokens += estimateMessageContent(content); + } + + // Name field overhead + if ('name' in message && message.name) { + tokens += TOKENS_PER_NAME + estimateStringTokens(message.name); + } + + // Tool call ID overhead + if ('tool_call_id' in message && (message as any).tool_call_id) { + tokens += estimateStringTokens((message as any).tool_call_id); + } + + // Function call (deprecated) in assistant messages + if ('function_call' in message && (message as any).function_call) { + const fc = (message as any).function_call; + if (fc) { + tokens += estimateStringTokens(JSON.stringify(fc)); + } + } + + // Tool calls in assistant messages + if ('tool_calls' in message && (message as any).tool_calls) { + tokens += estimateStringTokens(JSON.stringify((message as any).tool_calls)); + } + + return tokens; +} + +/** + * Estimate the number of tokens used by tool/function definitions. + */ +function estimateToolTokens(tools: ChatCompletionTool[]): number { + let tokens = 0; + for (const tool of tools) { + // Account for separator tokens between tools + tokens += 1; + tokens += estimateStringTokens(JSON.stringify(tool)); + } + return tokens; +} + +/** + * Find the context window size for a given model name. + * Supports partial matching (e.g., "gpt-4o-2024-05-13" matches "gpt-4o" prefix). + */ +function getContextWindow(model?: string): number | undefined { + if (!model) return undefined; + + // Exact match first + if (model in MODEL_CONTEXT_WINDOWS) { + return MODEL_CONTEXT_WINDOWS[model]; + } + + // Try prefix matching (longest first) + const prefixes = Object.keys(MODEL_CONTEXT_WINDOWS).sort((a, b) => b.length - a.length); + for (const prefix of prefixes) { + if (model.startsWith(prefix)) { + return MODEL_CONTEXT_WINDOWS[prefix]; + } + } + + return undefined; +} + +export interface EstimateTokensParams { + /** + * The messages to estimate tokens for. + */ + messages: Array; + + /** + * The model ID. Used to look up the context window size. + */ + model?: string; + + /** + * Tool/function definitions included in the request. + */ + tools?: Array; + + /** + * The maximum number of tokens the caller plans to request (max_completion_tokens). + * If provided, the estimate will include this as an estimate of potential output tokens + * alongside the input token count. + */ + maxCompletionTokens?: number; +} + +/** + * Estimates the number of tokens that a chat completion request would consume, + * without making an API call. + * + * The estimation is based on a character-counting heuristic (~3.5 characters + * per token for English text) plus fixed per-message overhead. This provides + * a useful approximation for cost estimation and context window management, + * but may differ from the actual token count computed by the model's tokenizer. + * + * For precise token counts, use the `tiktoken` package or rely on the `usage` + * field in the API response. + * + * @example + * ```ts + * const estimate = estimateTokens({ + * messages: [{ role: 'user', content: 'Hello, how are you?' }], + * model: 'gpt-4o', + * }); + * console.log(estimate.inputTokens); + * // => ~12 + * console.log(estimate.maxOutputTokens); + * // => ~127988 + * ``` + */ +export function estimateTokens(params: EstimateTokensParams): TokenEstimate { + const { messages, model, tools, maxCompletionTokens } = params; + + let inputTokens = BASE_OVERHEAD; + + // Sum tokens across all messages + for (const message of messages) { + inputTokens += estimateMessageTokens(message); + } + + // Add tokens for tool/function definitions + if (tools && tools.length > 0) { + inputTokens += estimateToolTokens(tools); + } + + const contextWindow = getContextWindow(model); + const maxOutputTokens = + contextWindow !== undefined ? Math.max(0, contextWindow - inputTokens) : undefined; + + const result: TokenEstimate = { + inputTokens, + }; + + if (contextWindow !== undefined) { + result.contextWindow = contextWindow; + } + + if (maxOutputTokens !== undefined) { + result.maxOutputTokens = maxOutputTokens; + } + + return result; +} diff --git a/src/resources/chat/completions/completions.ts b/src/resources/chat/completions/completions.ts index 05cae402ec..861aa6c63b 100644 --- a/src/resources/chat/completions/completions.ts +++ b/src/resources/chat/completions/completions.ts @@ -12,6 +12,7 @@ import { Stream } from '../../../core/streaming'; import { RequestOptions } from '../../../internal/request-options'; import { path } from '../../../internal/utils/path'; +import { estimateTokens, type EstimateTokensParams, type TokenEstimate } from '../../../lib/token-estimation'; import { ChatCompletionRunner } from '../../../lib/ChatCompletionRunner'; import { ChatCompletionStreamingRunner } from '../../../lib/ChatCompletionStreamingRunner'; import { RunnerOptions } from '../../../lib/AbstractChatCompletionRunner'; @@ -224,6 +225,30 @@ export class Completions extends APIResource { ): ChatCompletionStream { return ChatCompletionStream.createChatCompletion(this._client, body, options); } + + /** + * Estimates the number of tokens that a chat completion request would consume, + * without making an API call. Useful for cost estimation and staying within + * context window limits. + * + * The estimation uses a character-counting heuristic (~3.5 characters per + * token for English text) plus per-message overhead. For precise token counts + * use the `tiktoken` package or check the `usage` field in the API response. + * + * @example + * ```ts + * const estimate = client.chat.completions.estimateTokens({ + * messages: [{ role: 'user', content: 'Hello!' }], + * model: 'gpt-4o', + * tools: [{ type: 'function', function: { name: 'get_weather', parameters: {} } }], + * }); + * console.log(estimate.inputTokens); + * console.log(estimate.maxOutputTokens); + * ``` + */ + estimateTokens(params: EstimateTokensParams): TokenEstimate { + return estimateTokens(params); + } } export interface ParsedFunction extends ChatCompletionMessageFunctionToolCall.Function { @@ -2410,6 +2435,8 @@ export interface ChatCompletionListParams extends CursorPageParams { Completions.Messages = Messages; export declare namespace Completions { + export { estimateTokens as estimateTokens, type EstimateTokensParams, type TokenEstimate }; + export { type ChatCompletion as ChatCompletion, type ChatCompletionAllowedToolChoice as ChatCompletionAllowedToolChoice,