88import Foundation
99import FoundationModels
1010
11- // MARK: - Token Counting Extensions
11+ // MARK: - Token Estimation (Fallback for pre-iOS 26.4)
1212
1313extension Transcript . Entry {
14+ /// Estimates token count using a heuristic of ~4.5 characters per token.
1415 var estimatedTokenCount : Int {
1516 switch self {
1617 case . instructions( let instructions) :
1718 return instructions. segments. reduce ( 0 ) { $0 + $1. estimatedTokenCount }
18-
1919 case . prompt( let prompt) :
2020 return prompt. segments. reduce ( 0 ) { $0 + $1. estimatedTokenCount }
21-
2221 case . response( let response) :
2322 return response. segments. reduce ( 0 ) { $0 + $1. estimatedTokenCount }
24-
2523 case . toolCalls( let toolCalls) :
2624 return toolCalls. reduce ( 0 ) { total, call in
2725 total + estimateTokensAdvanced( call. toolName) +
28- estimateTokensForStructuredContent( call. arguments) + 5 // Call overhead
26+ estimateTokensForStructuredContent( call. arguments) + 5
2927 }
30-
3128 case . toolOutput( let output) :
32- return output. segments. reduce ( 0 ) { $0 + $1. estimatedTokenCount } + 3 // Output overhead
29+ return output. segments. reduce ( 0 ) { $0 + $1. estimatedTokenCount } + 3
3330 @unknown default :
34- // Return 0 for unknown entry types to avoid crashes
35- // This is a conservative estimate that won't affect token budget calculations
3631 return 0
3732 }
3833 }
@@ -43,96 +38,197 @@ extension Transcript.Segment {
4338 switch self {
4439 case . text( let textSegment) :
4540 return estimateTokensAdvanced ( textSegment. content)
46-
4741 case . structure( let structuredSegment) :
4842 return estimateTokensForStructuredContent ( structuredSegment. content)
4943 @unknown default :
50- // Return 0 for unknown segment types to avoid crashes
51- // This is a conservative estimate that won't affect token budget calculations
5244 return 0
5345 }
5446 }
5547}
5648
5749extension Transcript {
5850 var estimatedTokenCount : Int {
59- return self . reduce ( 0 ) { $0 + $1. estimatedTokenCount }
51+ self . reduce ( 0 ) { $0 + $1. estimatedTokenCount }
6052 }
6153}
6254
63- // MARK: - Token Estimation Utilities
64-
65- /// Estimates token count using Apple's guidance: 4.5 characters per token
55+ /// Estimates token count at ~4.5 characters per token.
6656func estimateTokensAdvanced( _ text: String ) -> Int {
6757 guard !text. isEmpty else { return 0 }
68-
69- let characterCount = text. count
70-
71- // Simple: 4.5 characters per token across all content types
72- let tokensPerChar = 1.0 / 4.5
73-
74- return max ( 1 , Int ( ceil ( Double ( characterCount) * tokensPerChar) ) )
58+ return max ( 1 , Int ( ceil ( Double ( text. count) / 4.5 ) ) )
7559}
7660
77- /// Estimates token count for structured JSON content
61+ /// Estimates token count for structured JSON content.
7862func estimateTokensForStructuredContent( _ content: GeneratedContent ) -> Int {
79- let jsonString = content. jsonString
80- let characterCount = jsonString. count
63+ let count = content. jsonString. count
64+ return max ( 1 , Int ( ceil ( Double ( count) / 4.5 ) ) )
65+ }
8166
82- let tokensPerChar = 1.0 / 4.5
67+ // MARK: - Real Token Counting (iOS 26.4+)
8368
84- return max ( 1 , Int ( ceil ( Double ( characterCount) * tokensPerChar) ) )
69+ #if compiler(>=6.3)
70+ @available ( iOS 26 . 4 , macOS 26 . 4 , visionOS 26 . 4 , * )
71+ extension Transcript {
72+ /// Returns the real token count for the entire transcript using the system tokenizer.
73+ func realTokenCount( using model: SystemLanguageModel = . default) async throws -> Int {
74+ try await model. tokenUsage ( for: Array ( self ) ) . tokenCount
75+ }
8576}
77+ #endif
8678
87- // MARK: - Helper Functions
79+ // MARK: - Unified Token Counting
8880
89- // Removed unused helper functions
81+ extension Transcript {
82+ /// Returns the best available token count: real on iOS 26.4+, estimated otherwise.
83+ func tokenCount( using model: SystemLanguageModel = . default) async -> Int {
84+ #if compiler(>=6.3)
85+ if #available( iOS 26 . 4 , macOS 26 . 4 , visionOS 26 . 4 , * ) {
86+ if let real = try ? await realTokenCount ( using: model) {
87+ return real
88+ }
89+ }
90+ #endif
91+ return estimatedTokenCount
92+ }
9093
91- // MARK: - Context Window Management Utilities
94+ /// Returns the token count with a safety buffer for context window management.
95+ /// Uses a small buffer (5%) for real counts, larger buffer (25% + overhead) for estimates.
96+ func safeTokenCount( using model: SystemLanguageModel = . default) async -> Int {
97+ #if compiler(>=6.3)
98+ if #available( iOS 26 . 4 , macOS 26 . 4 , visionOS 26 . 4 , * ) {
99+ if let realTokens = try ? await realTokenCount ( using: model) {
100+ let buffer = Int ( Double ( realTokens) * 0.05 )
101+ return realTokens + buffer
102+ }
103+ }
104+ #endif
92105
93- extension Transcript {
94- /// Returns the estimated token count with a larger safety buffer
95- var safeEstimatedTokenCount : Int {
96106 let baseTokens = estimatedTokenCount
97107 let buffer = Int ( Double ( baseTokens) * 0.25 )
98- let systemOverhead = 100
99-
100- return baseTokens + buffer + systemOverhead
108+ return baseTokens + buffer + 100
101109 }
102110
103- /// Checks if the transcript is approaching the token limit (earlier trigger)
104- func isApproachingLimit( threshold: Double = 0.70 , maxTokens: Int = 4096 ) -> Bool {
105- let currentTokens = safeEstimatedTokenCount
106- let limitThreshold = Int ( Double ( maxTokens) * threshold)
107- return currentTokens > limitThreshold
111+ /// Checks if the transcript is approaching the context window limit.
112+ func isApproachingLimit(
113+ threshold: Double = 0.70 ,
114+ maxTokens: Int = 4096 ,
115+ using model: SystemLanguageModel = . default
116+ ) async -> Bool {
117+ let currentTokens = await safeTokenCount ( using: model)
118+ return currentTokens > Int ( Double ( maxTokens) * threshold)
108119 }
109120
110- /// Returns a subset of entries that fit within the token budget
111- func entriesWithinTokenBudget( _ budget: Int ) -> [ Transcript . Entry ] {
112- var result : [ Transcript . Entry ] = [ ]
113- var tokenCount = 0
114-
115- if let instructions = self . first ( where: {
121+ /// Returns the most recent entries that fit within the token budget.
122+ /// On iOS 26.4+ uses binary search with batched real token counts for efficiency.
123+ /// Falls back to sequential estimation on older versions.
124+ func entriesWithinTokenBudget(
125+ _ budget: Int ,
126+ using model: SystemLanguageModel = . default
127+ ) async -> [ Transcript . Entry ] {
128+ let instructionsEntry = self . first ( where: {
116129 if case . instructions = $0 { return true }
117130 return false
118- } ) {
119- result. append ( instructions)
120- tokenCount += instructions. estimatedTokenCount
121- }
131+ } )
122132
123- let nonInstructionEntries = self . filter { entry in
133+ let conversationEntries = self . filter { entry in
124134 if case . instructions = entry { return false }
125135 return true
126136 }
127137
128- for entry in nonInstructionEntries. reversed ( ) {
138+ #if compiler(>=6.3)
139+ if #available( iOS 26 . 4 , macOS 26 . 4 , visionOS 26 . 4 , * ) {
140+ return await realTokenBudgetWindow (
141+ instructions: instructionsEntry,
142+ conversation: conversationEntries,
143+ budget: budget,
144+ model: model
145+ ) ?? estimatedTokenBudgetWindow (
146+ instructions: instructionsEntry,
147+ conversation: conversationEntries,
148+ budget: budget
149+ )
150+ }
151+ #endif
152+
153+ return estimatedTokenBudgetWindow (
154+ instructions: instructionsEntry,
155+ conversation: conversationEntries,
156+ budget: budget
157+ )
158+ }
159+ }
160+
161+ // MARK: - Token Budget Window Implementations
162+
163+ private extension Transcript {
164+ /// Estimation-based windowing: sequential scan from most recent entries.
165+ func estimatedTokenBudgetWindow(
166+ instructions: Transcript . Entry ? ,
167+ conversation: [ Transcript . Entry ] ,
168+ budget: Int
169+ ) -> [ Transcript . Entry ] {
170+ let base : [ Transcript . Entry ] = instructions. map { [ $0] } ?? [ ]
171+ var selectedConversation : [ Transcript . Entry ] = [ ]
172+ var usedTokens = 0
173+
174+ if let instructions {
175+ usedTokens += instructions. estimatedTokenCount
176+ }
177+
178+ for entry in conversation. reversed ( ) {
129179 let entryTokens = entry. estimatedTokenCount
130- if tokenCount + entryTokens > budget { break }
180+ if usedTokens + entryTokens > budget { break }
181+ selectedConversation. append ( entry)
182+ usedTokens += entryTokens
183+ }
184+
185+ // Preserve chronological order (oldest -> newest) for the selected window.
186+ return base + Array( selectedConversation. reversed ( ) )
187+ }
188+
189+ #if compiler(>=6.3)
190+ /// Real token counting with binary search: O(log N) API calls instead of O(N).
191+ /// Returns nil if the token usage API fails, signaling fallback to estimation.
192+ @available ( iOS 26 . 4 , macOS 26 . 4 , visionOS 26 . 4 , * )
193+ func realTokenBudgetWindow(
194+ instructions: Transcript . Entry ? ,
195+ conversation: [ Transcript . Entry ] ,
196+ budget: Int ,
197+ model: SystemLanguageModel
198+ ) async -> [ Transcript . Entry ] ? {
199+ let base : [ Transcript . Entry ] = instructions. map { [ $0] } ?? [ ]
200+
201+ guard let baseTokens = base. isEmpty
202+ ? 0
203+ : try ? await model. tokenUsage ( for: base) . tokenCount
204+ else {
205+ return nil
206+ }
207+
208+ if baseTokens > budget {
209+ return base
210+ }
131211
132- result. insert ( entry, at: result. count)
133- tokenCount += entryTokens
212+ var low = 0
213+ var high = conversation. count
214+
215+ while low < high {
216+ let mid = ( low + high + 1 ) / 2
217+ let recentEntries = Array ( conversation. suffix ( mid) )
218+ let candidate = base + recentEntries
219+
220+ guard let tokens = try ? await model. tokenUsage ( for: candidate) . tokenCount else {
221+ return nil
222+ }
223+
224+ if tokens <= budget {
225+ low = mid
226+ } else {
227+ high = mid - 1
228+ }
134229 }
135230
136- return result
231+ return base + Array ( conversation . suffix ( low ) )
137232 }
233+ #endif
138234}
0 commit comments