11import vscode from 'vscode' ;
2+ import { IMAGE_DESCRIPTION_PREFIX , IMAGE_DESCRIPTION_SUFFIX } from '../consts' ;
3+ import { computeDataHash , getCachedDescriptionByDataHash } from './vision/cache' ;
4+
5+ /**
6+ * Recursively estimate the character count for a single content part.
7+ * Returns character count, which the caller divides by charsPerToken to get token estimate.
8+ */
9+ function estimatePartChars ( part : unknown ) : number {
10+ // 1. LanguageModelTextPart — the most common case
11+ if ( part instanceof vscode . LanguageModelTextPart ) {
12+ return part . value . length ;
13+ }
14+
15+ // 2. LanguageModelToolCallPart — count callId + name + JSON-serialized input
16+ if ( part instanceof vscode . LanguageModelToolCallPart ) {
17+ let chars = part . callId . length + part . name . length ;
18+ try {
19+ chars += JSON . stringify ( part . input ) . length ;
20+ } catch {
21+ // If input can't be stringified (e.g. contains circular refs), fall back to a rough estimate
22+ chars += 2 ;
23+ }
24+ return chars ;
25+ }
26+
27+ // 3. LanguageModelToolResultPart — recursively count nested content parts
28+ if ( part instanceof vscode . LanguageModelToolResultPart ) {
29+ let chars = part . callId . length ;
30+ if ( Array . isArray ( part . content ) ) {
31+ for ( const item of part . content ) {
32+ chars += estimatePartChars ( item ) ;
33+ }
34+ }
35+ return chars ;
36+ }
37+
38+ // 4. LanguageModelDataPart — use a capped heuristic because our model never
39+ // receives binary data directly. Images are resolved to text descriptions
40+ // by the vision pipeline; raw byteLength would massively overestimate.
41+ if ( part instanceof vscode . LanguageModelDataPart ) {
42+ const mime = part . mimeType ;
43+ // Images: try the vision description cache first. If this image was
44+ // already resolved, the cached description length is the most accurate
45+ // estimate of what the model will actually receive.
46+ if ( mime . startsWith ( 'image/' ) ) {
47+ // Skip SHA-256 for very large images — the hash cost outweighs the
48+ // benefit of a cache lookup, and such images are unlikely to be
49+ // processed by the vision pipeline anyway.
50+ if ( part . data . byteLength <= 500_000 ) {
51+ const cached = getCachedDescriptionByDataHash ( computeDataHash ( part . data ) ) ;
52+ if ( cached !== undefined ) {
53+ return IMAGE_DESCRIPTION_PREFIX . length + cached . length + IMAGE_DESCRIPTION_SUFFIX . length ;
54+ }
55+ }
56+ // Cold cache (or image too large to hash): use a conservative
57+ // fixed estimate (~255 tokens at 4 chars/tok, roughly matching
58+ // OpenAI auto-detail for a moderate image).
59+ // The vision pipeline will replace these with text descriptions
60+ // whose actual token cost is counted via LanguageModelTextPart
61+ // on the next pass.
62+ return 1020 ;
63+ }
64+ // PDFs and other documents: use byteLength as a rough proxy but cap it
65+ // to prevent a single large attachment from dominating the budget.
66+ return Math . min ( part . data ?. byteLength ?? 0 , 10000 ) ;
67+ }
68+
69+ // 5. LanguageModelThinkingPart (proposed API) — handle string | string[]
70+ if ( isLanguageModelThinkingPart ( part ) ) {
71+ if ( typeof part . value === 'string' ) {
72+ return part . value . length ;
73+ }
74+ if ( Array . isArray ( part . value ) ) {
75+ let chars = 0 ;
76+ for ( const s of part . value ) {
77+ chars += s . length ;
78+ }
79+ return chars ;
80+ }
81+ return 0 ;
82+ }
83+
84+ // 6. LanguageModelPromptTsxPart — stringify the value if present
85+ // Duck-type check since PromptTsxPart may not always be available
86+ if (
87+ part &&
88+ typeof part === 'object' &&
89+ 'value' in part &&
90+ part . constructor ?. name === 'LanguageModelPromptTsxPart'
91+ ) {
92+ try {
93+ return JSON . stringify ( ( part as { value : unknown } ) . value ) . length ;
94+ } catch {
95+ return 0 ;
96+ }
97+ }
98+
99+ // Fallback: try to stringify unknown part types
100+ if ( part && typeof part === 'object' ) {
101+ try {
102+ return JSON . stringify ( part ) . length ;
103+ } catch {
104+ return 0 ;
105+ }
106+ }
107+
108+ return 0 ;
109+ }
110+
111+ /**
112+ * Check for LanguageModelThinkingPart (proposed API, may not be available at runtime).
113+ */
114+ function isLanguageModelThinkingPart ( part : unknown ) : part is vscode . LanguageModelThinkingPart {
115+ return (
116+ typeof ( vscode as Record < string , unknown > ) . LanguageModelThinkingPart === 'function' &&
117+ part instanceof vscode . LanguageModelThinkingPart
118+ ) ;
119+ }
2120
3121export function estimateTokenCount (
4122 text : string | vscode . LanguageModelChatRequestMessage ,
@@ -12,11 +130,9 @@ export function estimateTokenCount(
12130 return 1 ;
13131 }
14132
15- let total = 0 ;
133+ let totalChars = 0 ;
16134 for ( const part of text . content ) {
17- if ( part instanceof vscode . LanguageModelTextPart ) {
18- total += part . value . length ;
19- }
135+ totalChars += estimatePartChars ( part ) ;
20136 }
21- return Math . max ( 1 , Math . ceil ( total / charsPerToken ) ) ;
137+ return Math . max ( 1 , Math . ceil ( totalChars / charsPerToken ) ) ;
22138}
0 commit comments