rudrankriyam
diff --git a/‎Foundation Lab/Extensions/Transcript+TokenCounting.swift‎
Lines changed: 154 additions & 58 deletions b/‎Foundation Lab/Extensions/Transcript+TokenCounting.swift‎
Lines changed: 154 additions & 58 deletions
diff --git a/‎Foundation Lab/Health/ViewModels/HealthChatViewModel.swift‎
Lines changed: 24 additions & 2 deletions b/‎Foundation Lab/Health/ViewModels/HealthChatViewModel.swift‎
Lines changed: 24 additions & 2 deletions
diff --git a/‎Foundation Lab/Health/Views/Chat/HealthChatView.swift‎
Lines changed: 5 additions & 0 deletions b/‎Foundation Lab/Health/Views/Chat/HealthChatView.swift‎
Lines changed: 5 additions & 0 deletions
@@ -8,31 +8,26 @@
 import Foundation
 import FoundationModels
 
-// MARK: - Token Counting Extensions
+// MARK: - Token Estimation (Fallback for pre-iOS 26.4)
 
 extension Transcript.Entry {
+    /// Estimates token count using a heuristic of ~4.5 characters per token.
     var estimatedTokenCount: Int {
         switch self {
         case .instructions(let instructions):
             return instructions.segments.reduce(0) { $0 + $1.estimatedTokenCount }
-
         case .prompt(let prompt):
             return prompt.segments.reduce(0) { $0 + $1.estimatedTokenCount }
-
         case .response(let response):
             return response.segments.reduce(0) { $0 + $1.estimatedTokenCount }
-
         case .toolCalls(let toolCalls):
             return toolCalls.reduce(0) { total, call in
                 total + estimateTokensAdvanced(call.toolName) +
-                estimateTokensForStructuredContent(call.arguments) + 5 // Call overhead
+                estimateTokensForStructuredContent(call.arguments) + 5
             }
-
         case .toolOutput(let output):
-            return output.segments.reduce(0) { $0 + $1.estimatedTokenCount } + 3 // Output overhead
+            return output.segments.reduce(0) { $0 + $1.estimatedTokenCount } + 3
         @unknown default:
-            // Return 0 for unknown entry types to avoid crashes
-            // This is a conservative estimate that won't affect token budget calculations
             return 0
         }
     }
@@ -43,96 +38,197 @@ extension Transcript.Segment {
         switch self {
         case .text(let textSegment):
             return estimateTokensAdvanced(textSegment.content)
-
         case .structure(let structuredSegment):
             return estimateTokensForStructuredContent(structuredSegment.content)
         @unknown default:
-            // Return 0 for unknown segment types to avoid crashes
-            // This is a conservative estimate that won't affect token budget calculations
             return 0
         }
     }
 }
 
 extension Transcript {
     var estimatedTokenCount: Int {
-        return self.reduce(0) { $0 + $1.estimatedTokenCount }
+        self.reduce(0) { $0 + $1.estimatedTokenCount }
     }
 }
 
-// MARK: - Token Estimation Utilities
-
-/// Estimates token count using Apple's guidance: 4.5 characters per token
+/// Estimates token count at ~4.5 characters per token.
 func estimateTokensAdvanced(_ text: String) -> Int {
     guard !text.isEmpty else { return 0 }
-
-    let characterCount = text.count
-
-    // Simple: 4.5 characters per token across all content types
-    let tokensPerChar = 1.0 / 4.5
-
-    return max(1, Int(ceil(Double(characterCount) * tokensPerChar)))
+    return max(1, Int(ceil(Double(text.count) / 4.5)))
 }
 
-/// Estimates token count for structured JSON content
+/// Estimates token count for structured JSON content.
 func estimateTokensForStructuredContent(_ content: GeneratedContent) -> Int {
-    let jsonString = content.jsonString
-    let characterCount = jsonString.count
+    let count = content.jsonString.count
+    return max(1, Int(ceil(Double(count) / 4.5)))
+}
 
-    let tokensPerChar = 1.0 / 4.5
+// MARK: - Real Token Counting (iOS 26.4+)
 
-    return max(1, Int(ceil(Double(characterCount) * tokensPerChar)))
+#if compiler(>=6.3)
+@available(iOS 26.4, macOS 26.4, visionOS 26.4, *)
+extension Transcript {
+    /// Returns the real token count for the entire transcript using the system tokenizer.
+    func realTokenCount(using model: SystemLanguageModel = .default) async throws -> Int {
+        try await model.tokenUsage(for: Array(self)).tokenCount
+    }
 }
+#endif
 
-// MARK: - Helper Functions
+// MARK: - Unified Token Counting
 
-// Removed unused helper functions
+extension Transcript {
+    /// Returns the best available token count: real on iOS 26.4+, estimated otherwise.
+    func tokenCount(using model: SystemLanguageModel = .default) async -> Int {
+        #if compiler(>=6.3)
+        if #available(iOS 26.4, macOS 26.4, visionOS 26.4, *) {
+            if let real = try? await realTokenCount(using: model) {
+                return real
+            }
+        }
+        #endif
+        return estimatedTokenCount
+    }
 
-// MARK: - Context Window Management Utilities
+    /// Returns the token count with a safety buffer for context window management.
+    /// Uses a small buffer (5%) for real counts, larger buffer (25% + overhead) for estimates.
+    func safeTokenCount(using model: SystemLanguageModel = .default) async -> Int {
+        #if compiler(>=6.3)
+        if #available(iOS 26.4, macOS 26.4, visionOS 26.4, *) {
+            if let realTokens = try? await realTokenCount(using: model) {
+                let buffer = Int(Double(realTokens) * 0.05)
+                return realTokens + buffer
+            }
+        }
+        #endif
 
-extension Transcript {
-    /// Returns the estimated token count with a larger safety buffer
-    var safeEstimatedTokenCount: Int {
         let baseTokens = estimatedTokenCount
         let buffer = Int(Double(baseTokens) * 0.25)
-        let systemOverhead = 100
-
-        return baseTokens + buffer + systemOverhead
+        return baseTokens + buffer + 100
     }
 
-    /// Checks if the transcript is approaching the token limit (earlier trigger)
-    func isApproachingLimit(threshold: Double = 0.70, maxTokens: Int = 4096) -> Bool {
-        let currentTokens = safeEstimatedTokenCount
-        let limitThreshold = Int(Double(maxTokens) * threshold)
-        return currentTokens > limitThreshold
+    /// Checks if the transcript is approaching the context window limit.
+    func isApproachingLimit(
+        threshold: Double = 0.70,
+        maxTokens: Int = 4096,
+        using model: SystemLanguageModel = .default
+    ) async -> Bool {
+        let currentTokens = await safeTokenCount(using: model)
+        return currentTokens > Int(Double(maxTokens) * threshold)
     }
 
-    /// Returns a subset of entries that fit within the token budget
-    func entriesWithinTokenBudget(_ budget: Int) -> [Transcript.Entry] {
-        var result: [Transcript.Entry] = []
-        var tokenCount = 0
-
-        if let instructions = self.first(where: {
+    /// Returns the most recent entries that fit within the token budget.
+    /// On iOS 26.4+ uses binary search with batched real token counts for efficiency.
+    /// Falls back to sequential estimation on older versions.
+    func entriesWithinTokenBudget(
+        _ budget: Int,
+        using model: SystemLanguageModel = .default
+    ) async -> [Transcript.Entry] {
+        let instructionsEntry = self.first(where: {
             if case .instructions = $0 { return true }
             return false
-        }) {
-            result.append(instructions)
-            tokenCount += instructions.estimatedTokenCount
-        }
+        })
 
-        let nonInstructionEntries = self.filter { entry in
+        let conversationEntries = self.filter { entry in
             if case .instructions = entry { return false }
             return true
         }
 
-        for entry in nonInstructionEntries.reversed() {
+        #if compiler(>=6.3)
+        if #available(iOS 26.4, macOS 26.4, visionOS 26.4, *) {
+            return await realTokenBudgetWindow(
+                instructions: instructionsEntry,
+                conversation: conversationEntries,
+                budget: budget,
+                model: model
+            ) ?? estimatedTokenBudgetWindow(
+                instructions: instructionsEntry,
+                conversation: conversationEntries,
+                budget: budget
+            )
+        }
+        #endif
+
+        return estimatedTokenBudgetWindow(
+            instructions: instructionsEntry,
+            conversation: conversationEntries,
+            budget: budget
+        )
+    }
+}
+
+// MARK: - Token Budget Window Implementations
+
+private extension Transcript {
+    /// Estimation-based windowing: sequential scan from most recent entries.
+    func estimatedTokenBudgetWindow(
+        instructions: Transcript.Entry?,
+        conversation: [Transcript.Entry],
+        budget: Int
+    ) -> [Transcript.Entry] {
+        let base: [Transcript.Entry] = instructions.map { [$0] } ?? []
+        var selectedConversation: [Transcript.Entry] = []
+        var usedTokens = 0
+
+        if let instructions {
+            usedTokens += instructions.estimatedTokenCount
+        }
+
+        for entry in conversation.reversed() {
             let entryTokens = entry.estimatedTokenCount
-            if tokenCount + entryTokens > budget { break }
+            if usedTokens + entryTokens > budget { break }
+            selectedConversation.append(entry)
+            usedTokens += entryTokens
+        }
+
+        // Preserve chronological order (oldest -> newest) for the selected window.
+        return base + Array(selectedConversation.reversed())
+    }
+
+    #if compiler(>=6.3)
+    /// Real token counting with binary search: O(log N) API calls instead of O(N).
+    /// Returns nil if the token usage API fails, signaling fallback to estimation.
+    @available(iOS 26.4, macOS 26.4, visionOS 26.4, *)
+    func realTokenBudgetWindow(
+        instructions: Transcript.Entry?,
+        conversation: [Transcript.Entry],
+        budget: Int,
+        model: SystemLanguageModel
+    ) async -> [Transcript.Entry]? {
+        let base: [Transcript.Entry] = instructions.map { [$0] } ?? []
+
+        guard let baseTokens = base.isEmpty
+            ? 0
+            : try? await model.tokenUsage(for: base).tokenCount
+        else {
+            return nil
+        }
+
+        if baseTokens > budget {
+            return base
+        }
 
-            result.insert(entry, at: result.count)
-            tokenCount += entryTokens
+        var low = 0
+        var high = conversation.count
+
+        while low < high {
+            let mid = (low + high + 1) / 2
+            let recentEntries = Array(conversation.suffix(mid))
+            let candidate = base + recentEntries
+
+            guard let tokens = try? await model.tokenUsage(for: candidate).tokenCount else {
+                return nil
+            }
+
+            if tokens <= budget {
+                low = mid
+            } else {
+                high = mid - 1
+            }
         }
 
-        return result
+        return base + Array(conversation.suffix(low))
     }
+    #endif
 }
@@ -26,13 +26,23 @@ final class HealthChatViewModel {
     var sessionCount: Int = 1
     var currentHealthMetrics: [MetricType: Double] = [:]
 
+    // MARK: - Token Usage Tracking
+    private(set) var currentTokenCount: Int = 0
+    private(set) var maxContextSize: Int = AppConfiguration.TokenManagement.defaultMaxTokens
+
+    var tokenUsageFraction: Double {
+        guard maxContextSize > 0 else { return 0 }
+        return min(1.0, Double(currentTokenCount) / Double(maxContextSize))
+    }
+
     // MARK: - Streaming Task
     private var streamingTask: Task<Void, Error>?
 
     // MARK: - Public Properties
     private(set) var session: LanguageModelSession
     private var modelContext: ModelContext?
     private let healthDataManager: HealthDataManager
+    private let languageModel = SystemLanguageModel.default
 
     // MARK: - Tools
     private let tools: [any Tool] = [
@@ -43,11 +53,14 @@ final class HealthChatViewModel {
     // MARK: - Initialization
     init(healthDataManager: HealthDataManager? = nil) {
         self.healthDataManager = healthDataManager ?? .shared
-        // Create session with tools and instructions for health data access
         self.session = LanguageModelSession(
             tools: tools,
             instructions: Instructions(Self.baseInstructions)
         )
+
+        Task {
+            maxContextSize = await AppConfiguration.TokenManagement.contextSize(for: languageModel)
+        }
     }
 
     func setModelContext(_ context: ModelContext) {
@@ -91,7 +104,8 @@ final class HealthChatViewModel {
                 await saveMessageToSession(responseText, isFromUser: false)
             }
 
-            // Generate insights if health data was discussed
+            await updateTokenCount()
+
             if shouldGenerateInsight(from: responseText) {
                 await generateHealthInsight(from: responseText)
             }
@@ -113,6 +127,7 @@ final class HealthChatViewModel {
         streamingTask?.cancel()
         streamingTask = nil
         sessionCount = 1
+        currentTokenCount = 0
         session = LanguageModelSession(
             tools: tools,
             instructions: Instructions(Self.baseInstructions)
@@ -149,6 +164,10 @@ final class HealthChatViewModel {
 }
 
 private extension HealthChatViewModel {
+    func updateTokenCount() async {
+        currentTokenCount = await session.transcript.tokenCount(using: languageModel)
+    }
+
     static let baseInstructions = """
     You are a friendly and knowledgeable health coach AI assistant.
     Based on the user's health data, provide personalized, encouraging responses.
@@ -196,6 +215,7 @@ private extension HealthChatViewModel {
             instructions: Instructions(contextInstructions)
         )
         sessionCount += 1
+        currentTokenCount = 0
     }
 }
 
@@ -258,12 +278,14 @@ private extension HealthChatViewModel {
             isSummarizing = false
 
             try await respondWithNewSession(to: userMessage, shouldSaveUserMessage: false)
+            await updateTokenCount()
         } catch {
             isSummarizing = false
             session = LanguageModelSession(
                 tools: tools,
                 instructions: Instructions(Self.baseInstructions)
             )
+            currentTokenCount = 0
             let restartMessage = "I need to start a fresh conversation. Please repeat your question."
             await saveMessageToSession(restartMessage, isFromUser: false)
         }
 
@@ -20,6 +20,11 @@ struct HealthChatView: View {
     var body: some View {
         NavigationStack {
             VStack(spacing: 0) {
+                TokenUsageBar(
+                    currentTokenCount: viewModel.currentTokenCount,
+                    maxContextSize: viewModel.maxContextSize,
+                    tokenUsageFraction: viewModel.tokenUsageFraction
+                )
                 messagesView
 
                 HealthChatInputView(