Add model loading progress for reloads

github-actions[bot] · github-actions[bot] · commit dcc0a3a20d0e · 2026-04-30T21:18:09.000-07:00
diff --git a/Sources/MLXInferenceCore/InferenceEngine.swift b/Sources/MLXInferenceCore/InferenceEngine.swift
@@ -72,7 +72,7 @@ private struct TransformersTokenizerBridge: MLXLMCommon.Tokenizer, Sendable {
 public enum ModelState: Equatable, Sendable {
     case idle
     case downloading(progress: Double, speed: String)
-    case loading
+    case loading(progress: Double, stage: String)
     case ready(modelId: String)
     case generating
     case error(String)
@@ -319,7 +319,7 @@ public final class InferenceEngine: ObservableObject {
     }
 
     private func loadVerifiedModel(modelId: String) async {
-        state = .loading
+        setLoadingState(progress: 0.05, stage: "Preparing model configuration")
         currentModelId = modelId
 
         do {
@@ -354,36 +354,29 @@ public final class InferenceEngine: ObservableObject {
                 print("[InferenceEngine] SSD expert streaming: disabled")
             }
 
+            setLoadingState(progress: 0.15, stage: "Inspecting model architecture")
             let downloader = HubDownloader(hub: hub)
             let architecture = try await ModelArchitectureProbe.inspect(
                 configuration: config,
                 downloader: downloader
             )
 
-            let speedTracker = DownloadSpeedTracker()
+            let loadingStage = architecture.supportsVision
+                ? "Loading multimodal model"
+                : "Loading language model"
+
+            setLoadingState(progress: 0.22, stage: loadingStage)
 
             if architecture.supportsVision {
                 container = try await VLMModelFactory.shared.loadContainer(
                     from: downloader,
                     using: TransformersTokenizerLoader(),
                     configuration: config
                 ) { [weak self] progress in
-                    speedTracker.record(totalBytes: progress.completedUnitCount)
-                    let smoothedSpeed = speedTracker.speedBytesPerSec
-
                     Task { @MainActor in
                         guard let self else { return }
                         let pct = progress.fractionCompleted
-                        let speedStr = smoothedSpeed
-                            .map { String(format: "%.1f MB/s", $0 / 1_000_000) } ?? ""
-                        self.state = .downloading(progress: pct, speed: speedStr)
-
-                        self.downloadManager.updateProgress(ModelDownloadProgress(
-                            modelId: modelId,
-                            fractionCompleted: pct,
-                            currentFile: "",
-                            speedMBps: smoothedSpeed.map { $0 / 1_000_000 }
-                        ))
+                        self.setLoadingState(progress: 0.22 + (pct * 0.68), stage: loadingStage)
                     }
                 }
             } else {
@@ -392,22 +385,10 @@ public final class InferenceEngine: ObservableObject {
                     using: TransformersTokenizerLoader(),
                     configuration: config
                 ) { [weak self] progress in
-                    speedTracker.record(totalBytes: progress.completedUnitCount)
-                    let smoothedSpeed = speedTracker.speedBytesPerSec
-
                     Task { @MainActor in
                         guard let self else { return }
                         let pct = progress.fractionCompleted
-                        let speedStr = smoothedSpeed
-                            .map { String(format: "%.1f MB/s", $0 / 1_000_000) } ?? ""
-                        self.state = .downloading(progress: pct, speed: speedStr)
-
-                        self.downloadManager.updateProgress(ModelDownloadProgress(
-                            modelId: modelId,
-                            fractionCompleted: pct,
-                            currentFile: "",
-                            speedMBps: smoothedSpeed.map { $0 / 1_000_000 }
-                        ))
+                        self.setLoadingState(progress: 0.22 + (pct * 0.68), stage: loadingStage)
                     }
                 }
             }
@@ -417,11 +398,13 @@ public final class InferenceEngine: ObservableObject {
             downloadManager.refresh()
 
             // Verify integrity to catch incomplete downloads before marking as ready
+            setLoadingState(progress: 0.94, stage: "Verifying model files")
             guard ModelStorage.verifyModelIntegrity(for: modelId) else {
                 throw NSError(domain: "InferenceEngine", code: 1, userInfo: [NSLocalizedDescriptionKey: "Model safetensors files are incomplete. Please delete and re-download."])
             }
 
             // Read the model's actual max context length from config.json
+            setLoadingState(progress: 0.98, stage: "Reading model limits")
             if let ctxLen = ModelStorage.readMaxContextLength(for: modelId) {
                 self.maxContextWindow = ctxLen
                 print("[InferenceEngine] Model context window: \(ctxLen) tokens")
@@ -471,6 +454,10 @@ public final class InferenceEngine: ObservableObject {
         MLX.Memory.cacheLimit = 0
     }
 
+    private func setLoadingState(progress: Double, stage: String) {
+        state = .loading(progress: min(max(progress, 0), 1), stage: stage)
+    }
+
     private func markModelCorrupted(modelId: String?, message: String) {
         let failedModelId = modelId ?? currentModelId
         releaseLoadedModelResources()
@@ -622,7 +609,7 @@ extension InferenceEngine {
                     // Use the real token count from the prepared LMInput rather than
                     // a character-length heuristic (which was consistently off by 2–3×
                     // for CJK and code content).
-                    let baseTokens = lmInput.text.tokens.shape[0]
+                    let baseTokens = lmInput.text.tokens.size
                     self.activeContextTokens = baseTokens
                     
                     // maxContextWindow is already set during loadModel() from config.json
diff --git a/SwiftBuddy/SwiftBuddy/Views/ChatView.swift b/SwiftBuddy/SwiftBuddy/Views/ChatView.swift
@@ -137,19 +137,28 @@ struct ChatView: View {
         case .downloading(let progress, let speed):
             DownloadAnimationView(progress: progress, speed: speed)
 
-        case .loading:
+        case .loading(let progress, let stage):
             VStack(spacing: 16) {
                 ZStack {
                     Circle()
                         .stroke(SwiftBuddyTheme.accent.opacity(0.15), lineWidth: 3)
                         .frame(width: 64, height: 64)
-                    ProgressView()
+                    ProgressView(value: progress)
                         .controlSize(.large)
                         .tint(SwiftBuddyTheme.accent)
+                        .frame(width: 64)
+                }
+                VStack(spacing: 4) {
+                    Text("Loading model into Metal GPU…")
+                        .font(.subheadline)
+                        .foregroundStyle(SwiftBuddyTheme.textSecondary)
+                    Text(stage)
+                        .font(.caption)
+                        .foregroundStyle(SwiftBuddyTheme.textTertiary)
+                    Text("\(Int(progress * 100))%")
+                        .font(.caption.monospacedDigit())
+                        .foregroundStyle(SwiftBuddyTheme.textTertiary)
                 }
-                Text("Loading model into Metal GPU…")
-                    .font(.subheadline)
-                    .foregroundStyle(SwiftBuddyTheme.textSecondary)
             }
 
         case .idle:
@@ -252,13 +261,18 @@ struct ChatView: View {
         switch engine.state {
         case .idle:
             bannerRow(icon: "cpu", text: "No model loaded", color: SwiftBuddyTheme.textTertiary)
-        case .loading:
-            HStack(spacing: 8) {
-                ProgressView().controlSize(.mini).tint(SwiftBuddyTheme.accent)
-                Text("Loading model…")
-                    .font(.caption)
-                    .foregroundStyle(SwiftBuddyTheme.textSecondary)
-                Spacer()
+        case .loading(let progress, let stage):
+            VStack(alignment: .leading, spacing: 4) {
+                HStack {
+                    Text(stage)
+                        .font(.caption.weight(.medium))
+                        .foregroundStyle(SwiftBuddyTheme.textSecondary)
+                    Spacer()
+                    Text("\(Int(progress * 100))%")
+                        .font(.caption2.monospacedDigit())
+                        .foregroundStyle(SwiftBuddyTheme.textTertiary)
+                }
+                ProgressView(value: progress).tint(SwiftBuddyTheme.accent)
             }
             .padding(.horizontal, 16)
             .padding(.vertical, 8)
@@ -527,7 +541,7 @@ extension ModelState {
     var shortLabel: String {
         switch self {
         case .idle:                        return "No model"
-        case .loading:                     return "Loading…"
+        case .loading(let progress, _):    return "\(Int(progress * 100))% loading"
         case .downloading(let p, _):       return "\(Int(p * 100))% downloading"
         case .ready(let modelId):          return modelId.components(separatedBy: "/").last ?? modelId
         case .generating:                  return "Generating"
diff --git a/SwiftBuddy/SwiftBuddy/Views/ModelsView.swift b/SwiftBuddy/SwiftBuddy/Views/ModelsView.swift
@@ -277,8 +277,8 @@ private struct ActiveModelCardView: View {
                     entry: engine.loadedModelId.flatMap { id in ModelCatalog.all.first(where: { $0.id == id }) },
                     state: engine.state
                 )
-            case .loading:
-                loadingCard
+            case .loading(let progress, let stage):
+                loadingCard(progress: progress, stage: stage)
             case .downloading(let progress, let speed):
                 downloadingCard(progress: progress, speed: speed)
             case .idle, .error:
@@ -287,18 +287,24 @@ private struct ActiveModelCardView: View {
         }
     }
 
-    private var loadingCard: some View {
-        HStack(spacing: 12) {
-            ProgressView().controlSize(.regular).tint(SwiftBuddyTheme.accent)
-            VStack(alignment: .leading, spacing: 2) {
-                Text("Loading model…")
-                    .font(.subheadline.weight(.semibold))
-                    .foregroundStyle(SwiftBuddyTheme.textPrimary)
-                Text("Initializing Metal GPU")
-                    .font(.caption)
+    private func loadingCard(progress: Double, stage: String) -> some View {
+        VStack(alignment: .leading, spacing: 10) {
+            HStack {
+                ProgressView().controlSize(.regular).tint(SwiftBuddyTheme.accent)
+                VStack(alignment: .leading, spacing: 2) {
+                    Text("Loading model…")
+                        .font(.subheadline.weight(.semibold))
+                        .foregroundStyle(SwiftBuddyTheme.textPrimary)
+                    Text(stage)
+                        .font(.caption)
+                        .foregroundStyle(SwiftBuddyTheme.textSecondary)
+                }
+                Spacer()
+                Text("\(Int(progress * 100))%")
+                    .font(.caption.monospacedDigit())
                     .foregroundStyle(SwiftBuddyTheme.textSecondary)
             }
-            Spacer()
+            ProgressView(value: progress).tint(SwiftBuddyTheme.accent)
         }
         .padding()
         .glassCard(cornerRadius: SwiftBuddyTheme.radiusLarge)
diff --git a/SwiftBuddy/SwiftBuddy/Views/RootView.swift b/SwiftBuddy/SwiftBuddy/Views/RootView.swift
@@ -26,6 +26,7 @@ struct RootView: View {
     @State private var showTextIngestion = false
     @State private var showModelManagement = false
     @State private var lastDownloadLogBucket: Int?
+    @State private var lastLoadingStage: String?
     enum Tab { case chat, models, palace, mindPalace, miner, settings }
 
     var body: some View {
@@ -72,11 +73,16 @@ struct RootView: View {
                     switch newState {
                     case .idle:
                         lastDownloadLogBucket = nil
+                        lastLoadingStage = nil
                         ConsoleLog.shared.info("Engine idle — no model loaded")
-                    case .loading:
+                    case .loading(_, let stage):
                         lastDownloadLogBucket = nil
-                        ConsoleLog.shared.info("Loading model…")
+                        if lastLoadingStage != stage {
+                            lastLoadingStage = stage
+                            ConsoleLog.shared.info(stage)
+                        }
                     case .downloading(let p, let speed):
+                        lastLoadingStage = nil
                         let percent = Int(p * 100)
                         let bucket = min((percent / 25) * 25, 100)
                         if bucket != lastDownloadLogBucket, [0, 25, 50, 75, 100].contains(bucket) {
@@ -85,12 +91,15 @@ struct RootView: View {
                         }
                     case .ready(let modelId):
                         lastDownloadLogBucket = nil
+                        lastLoadingStage = nil
                         ConsoleLog.shared.info("✓ Model ready: \(modelId)")
                     case .generating:
                         lastDownloadLogBucket = nil
+                        lastLoadingStage = nil
                         ConsoleLog.shared.debug("Generating…")
                     case .error(let msg):
                         lastDownloadLogBucket = nil
+                        lastLoadingStage = nil
                         ConsoleLog.shared.error("Engine error: \(msg)")
                     }
                 }
@@ -430,12 +439,12 @@ struct RootView: View {
                 .tint(SwiftBuddyTheme.accent)
                 .controlSize(.small)
 
-        case .loading:
-            HStack(spacing: 6) {
-                ProgressView().controlSize(.mini).tint(SwiftBuddyTheme.accent)
-                Text("Loading…")
-                    .font(.caption)
-                    .foregroundStyle(SwiftBuddyTheme.textSecondary)
+        case .loading(let progress, let stage):
+            VStack(alignment: .leading, spacing: 4) {
+                ProgressView(value: progress).tint(SwiftBuddyTheme.accent)
+                Text("\(Int(progress * 100))% · \(stage)")
+                    .font(.caption2.monospacedDigit())
+                    .foregroundStyle(SwiftBuddyTheme.textTertiary)
             }
 
         case .downloading(let progress, let speed):
diff --git a/SwiftBuddy/SwiftBuddy/Views/SettingsView.swift b/SwiftBuddy/SwiftBuddy/Views/SettingsView.swift
@@ -527,29 +527,64 @@ struct SettingsView: View {
                         if viewModel.config.streamExperts != (ModelCatalog.all.first(where: {
                             if case .ready(let id) = engine.state { return $0.id == id } else { return false }
                         })?.isMoE ?? false) {
-                            HStack(spacing: 6) {
-                                Image(systemName: "arrow.clockwise.circle.fill")
-                                    .foregroundStyle(SwiftBuddyTheme.warning)
-                                    .font(.caption)
-                                Text("Reload model to apply this change")
-                                    .font(.caption2.weight(.medium))
-                                    .foregroundStyle(SwiftBuddyTheme.warning)
-                                Spacer()
-                                Button("Reload") {
-                                    let currentId: String? = {
-                                        if case .ready(let id) = engine.state { return id }
-                                        return nil
-                                    }()
-                                    if let id = currentId {
-                                        Task {
-                                            engine.unload()
-                                            await engine.load(modelId: id)
+                            VStack(alignment: .leading, spacing: 8) {
+                                HStack(spacing: 6) {
+                                    Image(systemName: "arrow.clockwise.circle.fill")
+                                        .foregroundStyle(SwiftBuddyTheme.warning)
+                                        .font(.caption)
+                                    Text("Reload model to apply this change")
+                                        .font(.caption2.weight(.medium))
+                                        .foregroundStyle(SwiftBuddyTheme.warning)
+                                    Spacer()
+                                    Button("Reload") {
+                                        let currentId: String? = {
+                                            if case .ready(let id) = engine.state { return id }
+                                            return nil
+                                        }()
+                                        if let id = currentId {
+                                            Task {
+                                                engine.unload()
+                                                await engine.load(modelId: id)
+                                            }
                                         }
                                     }
+                                    .font(.caption2.weight(.semibold))
+                                    .foregroundStyle(SwiftBuddyTheme.accent)
+                                    .buttonStyle(.plain)
+                                }
+
+                                switch engine.state {
+                                case .loading(let progress, let stage):
+                                    VStack(alignment: .leading, spacing: 4) {
+                                        HStack {
+                                            Text(stage)
+                                                .font(.caption2.weight(.medium))
+                                                .foregroundStyle(SwiftBuddyTheme.textSecondary)
+                                            Spacer()
+                                            Text("\(Int(progress * 100))%")
+                                                .font(.caption2.monospacedDigit())
+                                                .foregroundStyle(SwiftBuddyTheme.textTertiary)
+                                        }
+                                        ProgressView(value: progress)
+                                            .tint(SwiftBuddyTheme.accent)
+                                    }
+                                case .downloading(let progress, let speed):
+                                    VStack(alignment: .leading, spacing: 4) {
+                                        HStack {
+                                            Text("Downloading model files")
+                                                .font(.caption2.weight(.medium))
+                                                .foregroundStyle(SwiftBuddyTheme.textSecondary)
+                                            Spacer()
+                                            Text("\(Int(progress * 100))% · \(speed)")
+                                                .font(.caption2.monospacedDigit())
+                                                .foregroundStyle(SwiftBuddyTheme.textTertiary)
+                                        }
+                                        ProgressView(value: progress)
+                                            .tint(SwiftBuddyTheme.accent)
+                                    }
+                                default:
+                                    EmptyView()
                                 }
-                                .font(.caption2.weight(.semibold))
-                                .foregroundStyle(SwiftBuddyTheme.accent)
-                                .buttonStyle(.plain)
                             }
                             .padding(.horizontal, 4)
                             .padding(.vertical, 6)