Skip to content

Commit 321fc21

Browse files
Fix persisted SSD streaming behavior
1 parent dcc0a3a commit 321fc21

3 files changed

Lines changed: 73 additions & 55 deletions

File tree

Sources/MLXInferenceCore/GenerationConfig.swift

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,19 @@ public struct GenerationConfig: Sendable, Codable {
8686

8787
private static let storageKey = "swiftlm.generationConfig"
8888

89+
/// True when the user has previously saved a GenerationConfig.
90+
/// Used to distinguish the first-run/default state from an explicit choice.
91+
public static var hasPersistedConfig: Bool {
92+
UserDefaults.standard.object(forKey: storageKey) != nil
93+
}
94+
95+
/// Computes the effective SSD streaming setting.
96+
/// Before the user has saved settings, MoE models default to streaming on.
97+
/// After settings are persisted, the saved toggle becomes authoritative.
98+
public func effectiveStreamExperts(defaultingTo defaultValue: Bool) -> Bool {
99+
Self.hasPersistedConfig ? streamExperts : defaultValue
100+
}
101+
89102
public func save() {
90103
guard let data = try? JSONEncoder().encode(self) else { return }
91104
UserDefaults.standard.set(data, forKey: Self.storageKey)

Sources/MLXInferenceCore/InferenceEngine.swift

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -331,11 +331,10 @@ public final class InferenceEngine: ObservableObject {
331331
// at load time — only active expert pages touch RAM during inference.
332332
var config = ModelConfiguration(id: modelId)
333333
let isMoE = ModelCatalog.all.first(where: { $0.id == modelId })?.isMoE ?? false
334-
// SSD expert streaming:
335-
// - MoE catalog models default ON (required to fit in RAM)
336-
// - User can override via GenerationConfig.streamExperts for custom/non-catalog models
337-
// - isMoE acts as the default; user toggle overrides both ways
338-
let shouldStream = isMoE || GenerationConfig.load().streamExperts
334+
let generationConfig = GenerationConfig.load()
335+
// SSD expert streaming defaults ON for MoE until the user saves a preference.
336+
// Once persisted, the saved toggle becomes authoritative for all models.
337+
let shouldStream = generationConfig.effectiveStreamExperts(defaultingTo: isMoE)
339338
if shouldStream {
340339
config.lazyLoad = true
341340
let modelDir = ModelStorage.snapshotDirectory(for: modelId)
@@ -349,7 +348,7 @@ public final class InferenceEngine: ObservableObject {
349348
#endif
350349
}()
351350
)
352-
print("[InferenceEngine] SSD expert streaming: enabled (isMoE=\(isMoE), userOverride=\(GenerationConfig.load().streamExperts))")
351+
print("[InferenceEngine] SSD expert streaming: enabled (isMoE=\(isMoE), persisted=\(GenerationConfig.hasPersistedConfig), setting=\(generationConfig.streamExperts))")
353352
} else {
354353
print("[InferenceEngine] SSD expert streaming: disabled")
355354
}

SwiftBuddy/SwiftBuddy/Views/SettingsView.swift

Lines changed: 55 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,22 @@ struct SettingsView: View {
3232
Double(ProcessInfo.processInfo.physicalMemory) / (1024 * 1024 * 1024)
3333
}
3434

35+
private var currentModelIsMoE: Bool {
36+
guard case .ready(let modelId) = engine.state else { return false }
37+
return ModelCatalog.all.first(where: { $0.id == modelId })?.isMoE ?? false
38+
}
39+
40+
private var effectiveStreamExpertsSetting: Bool {
41+
viewModel.config.effectiveStreamExperts(defaultingTo: currentModelIsMoE)
42+
}
43+
44+
private var ssdStreamingBinding: Binding<Bool> {
45+
Binding(
46+
get: { effectiveStreamExpertsSetting },
47+
set: { viewModel.config.streamExperts = $0 }
48+
)
49+
}
50+
3551
enum SettingsTab: String, CaseIterable {
3652
case generation = "Generation"
3753
case engine = "Engine"
@@ -203,7 +219,7 @@ struct SettingsView: View {
203219
get: { Double(viewModel.config.maxTokens) },
204220
set: { viewModel.config.maxTokens = Int($0) }
205221
),
206-
range: 128...max(16384.0, Double(engine.maxContextWindow)), step: 128, format: "%.0f",
222+
range: 128...16384.0, step: 128, format: "%.0f",
207223
tint: SwiftBuddyTheme.accent,
208224
hint: engine.maxContextWindow > 0
209225
? "Max output per reply. Model context window: \(engine.maxContextWindow / 1000)K tokens"
@@ -272,6 +288,21 @@ struct SettingsView: View {
272288
)
273289
}
274290

291+
parameterCard("Performance") {
292+
toggleRow(
293+
label: "SSD Streaming", icon: "internaldrive",
294+
isOn: ssdStreamingBinding,
295+
tint: SwiftBuddyTheme.warning,
296+
hint: "Stream MoE expert weights from NVMe (requires model reload)"
297+
)
298+
toggleRow(
299+
label: "TurboQuant KV", icon: "bolt.badge.clock",
300+
isOn: $viewModel.config.turboKV,
301+
tint: SwiftBuddyTheme.success,
302+
hint: "3-bit KV compression for massive context windows"
303+
)
304+
}
305+
275306
parameterCard("System Prompt") {
276307
TextEditor(text: $viewModel.systemPrompt)
277308
.frame(minHeight: 80)
@@ -520,13 +551,11 @@ struct SettingsView: View {
520551
VStack(alignment: .leading, spacing: 6) {
521552
toggleRow(
522553
label: "SSD Expert Streaming", icon: "externaldrive.fill",
523-
isOn: $viewModel.config.streamExperts,
554+
isOn: ssdStreamingBinding,
524555
tint: SwiftBuddyTheme.accentSecondary,
525556
hint: "mmap expert weights from NVMe — only active expert pages stay in RAM. Auto-enabled for MoE catalog models."
526557
)
527-
if viewModel.config.streamExperts != (ModelCatalog.all.first(where: {
528-
if case .ready(let id) = engine.state { return $0.id == id } else { return false }
529-
})?.isMoE ?? false) {
558+
if effectiveStreamExpertsSetting != currentModelIsMoE {
530559
VStack(alignment: .leading, spacing: 8) {
531560
HStack(spacing: 6) {
532561
Image(systemName: "arrow.clockwise.circle.fill")
@@ -666,22 +695,19 @@ struct SettingsView: View {
666695
.font(.callout)
667696
Spacer()
668697
}
669-
Picker("", selection: Binding(
670-
get: { appearance.preference },
671-
set: { newValue in
672-
localColorScheme = newValue
673-
// Defer the @Published write to avoid the view update crash
674-
Task { @MainActor in
675-
appearance.preference = newValue
676-
}
677-
}
678-
)) {
679-
HStack { Image(systemName: "moon.fill"); Text("Dark") }.tag("dark")
680-
HStack { Image(systemName: "sun.max.fill"); Text("Light") }.tag("light")
681-
HStack { Image(systemName: "circle.lefthalf.filled"); Text("System") }.tag("system")
698+
Picker("", selection: $localColorScheme) {
699+
Text("Dark").tag("dark")
700+
Text("Light").tag("light")
701+
Text("System").tag("system")
682702
}
683703
.pickerStyle(.segmented)
684704
.tint(SwiftBuddyTheme.accent)
705+
.onChange(of: localColorScheme) { newValue in
706+
// Defer the @Published write to avoid the view update crash
707+
Task { @MainActor in
708+
appearance.preference = newValue
709+
}
710+
}
685711
}
686712
}
687713
.padding(.horizontal, 16)
@@ -885,37 +911,17 @@ struct SettingsView: View {
885911

886912
/// Build the equivalent `swift run SwiftLM` command from current settings.
887913
private var cliCommand: String {
888-
let cfg = viewModel.config
889-
var parts: [String] = []
890-
891-
if case .ready(let id) = engine.state {
892-
parts.append("--model \(id)")
893-
} else {
894-
parts.append("--model <model-id>")
895-
}
896-
897-
parts.append("--host \(server.host)")
898-
parts.append("--port \(server.port)")
899-
parts.append("--max-tokens \(cfg.maxTokens)")
900-
parts.append("--temp \(String(format: "%.2f", cfg.temperature))")
901-
902-
if cfg.topP < 1.0 { parts.append("--top-p \(String(format: "%.2f", cfg.topP))") }
903-
if cfg.topK != 50 { parts.append("--top-k \(cfg.topK)") }
904-
if cfg.minP > 0 { parts.append("--min-p \(String(format: "%.2f", cfg.minP))") }
905-
if cfg.repetitionPenalty != 1.05 { parts.append("--repeat-penalty \(String(format: "%.2f", cfg.repetitionPenalty))") }
906-
if cfg.prefillSize != 512 { parts.append("--prefill-size \(cfg.prefillSize)") }
907-
if let kv = cfg.kvBits {
908-
parts.append("--kv-bits \(kv)")
909-
if cfg.kvGroupSize != 64 { parts.append("--kv-group-size \(cfg.kvGroupSize)") }
910-
}
911-
if cfg.enableThinking { parts.append("--thinking") }
912-
if let seed = cfg.seed { parts.append("--seed \(seed)") }
913-
if server.startupConfiguration.parallelSlots > 1 {
914-
parts.append("--parallel \(server.startupConfiguration.parallelSlots)")
915-
}
916-
if !server.startupConfiguration.apiKey.isEmpty { parts.append("--api-key <redacted>") }
917-
918-
return "swift run SwiftLM " + parts.joined(separator: " \\\n ")
914+
buildCLICommand(
915+
config: viewModel.config,
916+
host: server.host,
917+
port: server.port,
918+
parallel: server.startupConfiguration.parallelSlots,
919+
apiKeySet: !server.startupConfiguration.apiKey.isEmpty,
920+
modelId: {
921+
if case .ready(let id) = engine.state { return id }
922+
return nil
923+
}()
924+
)
919925
}
920926

921927
private func copyCLI() {

0 commit comments

Comments
 (0)