Skip to content

Commit f52bc32

Browse files
authored
Merge pull request #99 from rudrankriyam/feature/token-usage
Add real token usage tracking (iOS 26.4 / Swift 6.3)
2 parents 531f6dd + d0b93a6 commit f52bc32

19 files changed

+475
-116
lines changed

Foundation Lab/Extensions/Transcript+TokenCounting.swift

Lines changed: 154 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -8,31 +8,26 @@
88
import Foundation
99
import FoundationModels
1010

11-
// MARK: - Token Counting Extensions
11+
// MARK: - Token Estimation (Fallback for pre-iOS 26.4)
1212

1313
extension Transcript.Entry {
14+
/// Estimates token count using a heuristic of ~4.5 characters per token.
1415
var estimatedTokenCount: Int {
1516
switch self {
1617
case .instructions(let instructions):
1718
return instructions.segments.reduce(0) { $0 + $1.estimatedTokenCount }
18-
1919
case .prompt(let prompt):
2020
return prompt.segments.reduce(0) { $0 + $1.estimatedTokenCount }
21-
2221
case .response(let response):
2322
return response.segments.reduce(0) { $0 + $1.estimatedTokenCount }
24-
2523
case .toolCalls(let toolCalls):
2624
return toolCalls.reduce(0) { total, call in
2725
total + estimateTokensAdvanced(call.toolName) +
28-
estimateTokensForStructuredContent(call.arguments) + 5 // Call overhead
26+
estimateTokensForStructuredContent(call.arguments) + 5
2927
}
30-
3128
case .toolOutput(let output):
32-
return output.segments.reduce(0) { $0 + $1.estimatedTokenCount } + 3 // Output overhead
29+
return output.segments.reduce(0) { $0 + $1.estimatedTokenCount } + 3
3330
@unknown default:
34-
// Return 0 for unknown entry types to avoid crashes
35-
// This is a conservative estimate that won't affect token budget calculations
3631
return 0
3732
}
3833
}
@@ -43,96 +38,197 @@ extension Transcript.Segment {
4338
switch self {
4439
case .text(let textSegment):
4540
return estimateTokensAdvanced(textSegment.content)
46-
4741
case .structure(let structuredSegment):
4842
return estimateTokensForStructuredContent(structuredSegment.content)
4943
@unknown default:
50-
// Return 0 for unknown segment types to avoid crashes
51-
// This is a conservative estimate that won't affect token budget calculations
5244
return 0
5345
}
5446
}
5547
}
5648

5749
extension Transcript {
5850
var estimatedTokenCount: Int {
59-
return self.reduce(0) { $0 + $1.estimatedTokenCount }
51+
self.reduce(0) { $0 + $1.estimatedTokenCount }
6052
}
6153
}
6254

63-
// MARK: - Token Estimation Utilities
64-
65-
/// Estimates token count using Apple's guidance: 4.5 characters per token
55+
/// Estimates token count at ~4.5 characters per token.
6656
func estimateTokensAdvanced(_ text: String) -> Int {
6757
guard !text.isEmpty else { return 0 }
68-
69-
let characterCount = text.count
70-
71-
// Simple: 4.5 characters per token across all content types
72-
let tokensPerChar = 1.0 / 4.5
73-
74-
return max(1, Int(ceil(Double(characterCount) * tokensPerChar)))
58+
return max(1, Int(ceil(Double(text.count) / 4.5)))
7559
}
7660

77-
/// Estimates token count for structured JSON content
61+
/// Estimates token count for structured JSON content.
7862
func estimateTokensForStructuredContent(_ content: GeneratedContent) -> Int {
79-
let jsonString = content.jsonString
80-
let characterCount = jsonString.count
63+
let count = content.jsonString.count
64+
return max(1, Int(ceil(Double(count) / 4.5)))
65+
}
8166

82-
let tokensPerChar = 1.0 / 4.5
67+
// MARK: - Real Token Counting (iOS 26.4+)
8368

84-
return max(1, Int(ceil(Double(characterCount) * tokensPerChar)))
69+
#if compiler(>=6.3)
70+
@available(iOS 26.4, macOS 26.4, visionOS 26.4, *)
71+
extension Transcript {
72+
/// Returns the real token count for the entire transcript using the system tokenizer.
73+
func realTokenCount(using model: SystemLanguageModel = .default) async throws -> Int {
74+
try await model.tokenUsage(for: Array(self)).tokenCount
75+
}
8576
}
77+
#endif
8678

87-
// MARK: - Helper Functions
79+
// MARK: - Unified Token Counting
8880

89-
// Removed unused helper functions
81+
extension Transcript {
82+
/// Returns the best available token count: real on iOS 26.4+, estimated otherwise.
83+
func tokenCount(using model: SystemLanguageModel = .default) async -> Int {
84+
#if compiler(>=6.3)
85+
if #available(iOS 26.4, macOS 26.4, visionOS 26.4, *) {
86+
if let real = try? await realTokenCount(using: model) {
87+
return real
88+
}
89+
}
90+
#endif
91+
return estimatedTokenCount
92+
}
9093

91-
// MARK: - Context Window Management Utilities
94+
/// Returns the token count with a safety buffer for context window management.
95+
/// Uses a small buffer (5%) for real counts, larger buffer (25% + overhead) for estimates.
96+
func safeTokenCount(using model: SystemLanguageModel = .default) async -> Int {
97+
#if compiler(>=6.3)
98+
if #available(iOS 26.4, macOS 26.4, visionOS 26.4, *) {
99+
if let realTokens = try? await realTokenCount(using: model) {
100+
let buffer = Int(Double(realTokens) * 0.05)
101+
return realTokens + buffer
102+
}
103+
}
104+
#endif
92105

93-
extension Transcript {
94-
/// Returns the estimated token count with a larger safety buffer
95-
var safeEstimatedTokenCount: Int {
96106
let baseTokens = estimatedTokenCount
97107
let buffer = Int(Double(baseTokens) * 0.25)
98-
let systemOverhead = 100
99-
100-
return baseTokens + buffer + systemOverhead
108+
return baseTokens + buffer + 100
101109
}
102110

103-
/// Checks if the transcript is approaching the token limit (earlier trigger)
104-
func isApproachingLimit(threshold: Double = 0.70, maxTokens: Int = 4096) -> Bool {
105-
let currentTokens = safeEstimatedTokenCount
106-
let limitThreshold = Int(Double(maxTokens) * threshold)
107-
return currentTokens > limitThreshold
111+
/// Checks if the transcript is approaching the context window limit.
112+
func isApproachingLimit(
113+
threshold: Double = 0.70,
114+
maxTokens: Int = 4096,
115+
using model: SystemLanguageModel = .default
116+
) async -> Bool {
117+
let currentTokens = await safeTokenCount(using: model)
118+
return currentTokens > Int(Double(maxTokens) * threshold)
108119
}
109120

110-
/// Returns a subset of entries that fit within the token budget
111-
func entriesWithinTokenBudget(_ budget: Int) -> [Transcript.Entry] {
112-
var result: [Transcript.Entry] = []
113-
var tokenCount = 0
114-
115-
if let instructions = self.first(where: {
121+
/// Returns the most recent entries that fit within the token budget.
122+
/// On iOS 26.4+ uses binary search with batched real token counts for efficiency.
123+
/// Falls back to sequential estimation on older versions.
124+
func entriesWithinTokenBudget(
125+
_ budget: Int,
126+
using model: SystemLanguageModel = .default
127+
) async -> [Transcript.Entry] {
128+
let instructionsEntry = self.first(where: {
116129
if case .instructions = $0 { return true }
117130
return false
118-
}) {
119-
result.append(instructions)
120-
tokenCount += instructions.estimatedTokenCount
121-
}
131+
})
122132

123-
let nonInstructionEntries = self.filter { entry in
133+
let conversationEntries = self.filter { entry in
124134
if case .instructions = entry { return false }
125135
return true
126136
}
127137

128-
for entry in nonInstructionEntries.reversed() {
138+
#if compiler(>=6.3)
139+
if #available(iOS 26.4, macOS 26.4, visionOS 26.4, *) {
140+
return await realTokenBudgetWindow(
141+
instructions: instructionsEntry,
142+
conversation: conversationEntries,
143+
budget: budget,
144+
model: model
145+
) ?? estimatedTokenBudgetWindow(
146+
instructions: instructionsEntry,
147+
conversation: conversationEntries,
148+
budget: budget
149+
)
150+
}
151+
#endif
152+
153+
return estimatedTokenBudgetWindow(
154+
instructions: instructionsEntry,
155+
conversation: conversationEntries,
156+
budget: budget
157+
)
158+
}
159+
}
160+
161+
// MARK: - Token Budget Window Implementations
162+
163+
private extension Transcript {
164+
/// Estimation-based windowing: sequential scan from most recent entries.
165+
func estimatedTokenBudgetWindow(
166+
instructions: Transcript.Entry?,
167+
conversation: [Transcript.Entry],
168+
budget: Int
169+
) -> [Transcript.Entry] {
170+
let base: [Transcript.Entry] = instructions.map { [$0] } ?? []
171+
var selectedConversation: [Transcript.Entry] = []
172+
var usedTokens = 0
173+
174+
if let instructions {
175+
usedTokens += instructions.estimatedTokenCount
176+
}
177+
178+
for entry in conversation.reversed() {
129179
let entryTokens = entry.estimatedTokenCount
130-
if tokenCount + entryTokens > budget { break }
180+
if usedTokens + entryTokens > budget { break }
181+
selectedConversation.append(entry)
182+
usedTokens += entryTokens
183+
}
184+
185+
// Preserve chronological order (oldest -> newest) for the selected window.
186+
return base + Array(selectedConversation.reversed())
187+
}
188+
189+
#if compiler(>=6.3)
190+
/// Real token counting with binary search: O(log N) API calls instead of O(N).
191+
/// Returns nil if the token usage API fails, signaling fallback to estimation.
192+
@available(iOS 26.4, macOS 26.4, visionOS 26.4, *)
193+
func realTokenBudgetWindow(
194+
instructions: Transcript.Entry?,
195+
conversation: [Transcript.Entry],
196+
budget: Int,
197+
model: SystemLanguageModel
198+
) async -> [Transcript.Entry]? {
199+
let base: [Transcript.Entry] = instructions.map { [$0] } ?? []
200+
201+
guard let baseTokens = base.isEmpty
202+
? 0
203+
: try? await model.tokenUsage(for: base).tokenCount
204+
else {
205+
return nil
206+
}
207+
208+
if baseTokens > budget {
209+
return base
210+
}
131211

132-
result.insert(entry, at: result.count)
133-
tokenCount += entryTokens
212+
var low = 0
213+
var high = conversation.count
214+
215+
while low < high {
216+
let mid = (low + high + 1) / 2
217+
let recentEntries = Array(conversation.suffix(mid))
218+
let candidate = base + recentEntries
219+
220+
guard let tokens = try? await model.tokenUsage(for: candidate).tokenCount else {
221+
return nil
222+
}
223+
224+
if tokens <= budget {
225+
low = mid
226+
} else {
227+
high = mid - 1
228+
}
134229
}
135230

136-
return result
231+
return base + Array(conversation.suffix(low))
137232
}
233+
#endif
138234
}

Foundation Lab/Health/ViewModels/HealthChatViewModel.swift

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,23 @@ final class HealthChatViewModel {
2626
var sessionCount: Int = 1
2727
var currentHealthMetrics: [MetricType: Double] = [:]
2828

29+
// MARK: - Token Usage Tracking
30+
private(set) var currentTokenCount: Int = 0
31+
private(set) var maxContextSize: Int = AppConfiguration.TokenManagement.defaultMaxTokens
32+
33+
var tokenUsageFraction: Double {
34+
guard maxContextSize > 0 else { return 0 }
35+
return min(1.0, Double(currentTokenCount) / Double(maxContextSize))
36+
}
37+
2938
// MARK: - Streaming Task
3039
private var streamingTask: Task<Void, Error>?
3140

3241
// MARK: - Public Properties
3342
private(set) var session: LanguageModelSession
3443
private var modelContext: ModelContext?
3544
private let healthDataManager: HealthDataManager
45+
private let languageModel = SystemLanguageModel.default
3646

3747
// MARK: - Tools
3848
private let tools: [any Tool] = [
@@ -43,11 +53,14 @@ final class HealthChatViewModel {
4353
// MARK: - Initialization
4454
init(healthDataManager: HealthDataManager? = nil) {
4555
self.healthDataManager = healthDataManager ?? .shared
46-
// Create session with tools and instructions for health data access
4756
self.session = LanguageModelSession(
4857
tools: tools,
4958
instructions: Instructions(Self.baseInstructions)
5059
)
60+
61+
Task {
62+
maxContextSize = await AppConfiguration.TokenManagement.contextSize(for: languageModel)
63+
}
5164
}
5265

5366
func setModelContext(_ context: ModelContext) {
@@ -91,7 +104,8 @@ final class HealthChatViewModel {
91104
await saveMessageToSession(responseText, isFromUser: false)
92105
}
93106

94-
// Generate insights if health data was discussed
107+
await updateTokenCount()
108+
95109
if shouldGenerateInsight(from: responseText) {
96110
await generateHealthInsight(from: responseText)
97111
}
@@ -113,6 +127,7 @@ final class HealthChatViewModel {
113127
streamingTask?.cancel()
114128
streamingTask = nil
115129
sessionCount = 1
130+
currentTokenCount = 0
116131
session = LanguageModelSession(
117132
tools: tools,
118133
instructions: Instructions(Self.baseInstructions)
@@ -149,6 +164,10 @@ final class HealthChatViewModel {
149164
}
150165

151166
private extension HealthChatViewModel {
167+
func updateTokenCount() async {
168+
currentTokenCount = await session.transcript.tokenCount(using: languageModel)
169+
}
170+
152171
static let baseInstructions = """
153172
You are a friendly and knowledgeable health coach AI assistant.
154173
Based on the user's health data, provide personalized, encouraging responses.
@@ -196,6 +215,7 @@ private extension HealthChatViewModel {
196215
instructions: Instructions(contextInstructions)
197216
)
198217
sessionCount += 1
218+
currentTokenCount = 0
199219
}
200220
}
201221

@@ -258,12 +278,14 @@ private extension HealthChatViewModel {
258278
isSummarizing = false
259279

260280
try await respondWithNewSession(to: userMessage, shouldSaveUserMessage: false)
281+
await updateTokenCount()
261282
} catch {
262283
isSummarizing = false
263284
session = LanguageModelSession(
264285
tools: tools,
265286
instructions: Instructions(Self.baseInstructions)
266287
)
288+
currentTokenCount = 0
267289
let restartMessage = "I need to start a fresh conversation. Please repeat your question."
268290
await saveMessageToSession(restartMessage, isFromUser: false)
269291
}

Foundation Lab/Health/Views/Chat/HealthChatView.swift

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,11 @@ struct HealthChatView: View {
2020
var body: some View {
2121
NavigationStack {
2222
VStack(spacing: 0) {
23+
TokenUsageBar(
24+
currentTokenCount: viewModel.currentTokenCount,
25+
maxContextSize: viewModel.maxContextSize,
26+
tokenUsageFraction: viewModel.tokenUsageFraction
27+
)
2328
messagesView
2429

2530
HealthChatInputView(

0 commit comments

Comments
 (0)