Skip to content

Commit c360806

Browse files
feat(swiftbuddy): CLI panel, applied toast, seed wiring, remove dead config fields
streamExperts / turboKV removed from GenerationConfig - Both were architecturally dead: streamExperts is auto-activated at load time via ModelCatalog.isMoE; turboKV had no downstream wiring in GenerateParameters or the mlx-lm call chain - Engine tab now shows an 'Advanced Engine' info card explaining SSD streaming is automatic for MoE models and directing users to kvBits for cache quantisation seed wired end-to-end - MLX.seed(seed) called before container.prepare() in generate() - Seed UI in Output card: lock icon to fix a seed, xmark to go random - Fixed seed shows 'same input → identical output' hint Settings applied toast (Generation tab) - .onChange watchers on all 10 config fields flash a green 'Applied — takes effect on next message' capsule for 2s - Makes clear no restart is needed: params are hot-applied per request CLI Equivalent card (Engine tab) - Computes the equivalent `swift run SwiftLM` command from live settings - Only emits non-default flags (keeps command readable) - Tap to copy; checkmark confirmation for 2s; horizontally scrollable - Shows real loaded model ID when available iOS Performance card fixed - Was displaced outside #if os(iOS) guard by previous edit
1 parent 0304495 commit c360806

3 files changed

Lines changed: 236 additions & 12 deletions

File tree

Sources/MLXInferenceCore/GenerationConfig.swift

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,23 +5,27 @@ import Foundation
55
///
66
/// Conforms to `Codable` so settings can be persisted across app launches
77
/// via `save()` / `load()` using `UserDefaults`.
8+
///
9+
/// ### Notes on removed fields
10+
/// - `streamExperts` was removed: expert streaming is a **load-time** flag
11+
/// automatically derived from `ModelCatalog.isMoE` inside `InferenceEngine.load()`.
12+
/// Exposing it as a per-request toggle had no effect and misled users.
13+
/// - `turboKV` was removed: the PolarQuant+QJL path was never wired into
14+
/// `GenerateParameters` or the mlx-lm call chain. Use `kvBits: 4` or `kvBits: 8`
15+
/// for KV-cache quantisation instead.
816
public struct GenerationConfig: Sendable, Codable {
917
public var maxTokens: Int
1018
public var temperature: Float
1119
public var topP: Float
1220
public var topK: Int
1321
public var minP: Float
1422
public var repetitionPenalty: Float
15-
public var seed: UInt64?
16-
public var enableThinking: Bool
1723

18-
// ── SwiftLM Engine Parameters ──────────────────────────────────────
19-
/// Enable TurboQuant KV-cache compression (3-bit PolarQuant+QJL).
20-
/// Compresses KV history > 8192 tokens to ~3.5 bits/token.
21-
public var turboKV: Bool
24+
/// Optional RNG seed for reproducible outputs.
25+
/// When non-nil, `MLX.seed(UInt32(seed!))` is called before each generation.
26+
public var seed: UInt64?
2227

23-
/// Enable SSD expert streaming for MoE models.
24-
public var streamExperts: Bool
28+
public var enableThinking: Bool
2529

2630
/// Chunk size for prefill evaluation.
2731
/// Lower values prevent GPU timeout on large models.
@@ -42,8 +46,6 @@ public struct GenerationConfig: Sendable, Codable {
4246
repetitionPenalty: Float = 1.05,
4347
seed: UInt64? = nil,
4448
enableThinking: Bool = false,
45-
turboKV: Bool = false,
46-
streamExperts: Bool = false,
4749
prefillSize: Int = 512,
4850
kvBits: Int? = nil,
4951
kvGroupSize: Int = 64
@@ -56,8 +58,6 @@ public struct GenerationConfig: Sendable, Codable {
5658
self.repetitionPenalty = repetitionPenalty
5759
self.seed = seed
5860
self.enableThinking = enableThinking
59-
self.turboKV = turboKV
60-
self.streamExperts = streamExperts
6161
self.prefillSize = prefillSize
6262
self.kvBits = kvBits
6363
self.kvGroupSize = kvGroupSize

Sources/MLXInferenceCore/InferenceEngine.swift

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -594,6 +594,11 @@ extension InferenceEngine {
594594
var outputText = ""
595595
var tokenCount = 0
596596

597+
// Set RNG seed for reproducible output when requested.
598+
if let seed = config.seed {
599+
MLX.seed(seed)
600+
}
601+
597602
// Pass enable_thinking to the Jinja chat template so the model
598603
// actually generates <think> blocks when thinking mode is ON.
599604
// Without this kwarg, Qwen3's template defaults to thinking=false

SwiftBuddy/SwiftBuddy/Views/SettingsView.swift

Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ struct SettingsView: View {
1919
@State private var draftServerConfiguration = ServerStartupConfiguration.load()
2020
@State private var showRestartNotification = false
2121
@State private var endpointCopied = false
22+
@State private var showAppliedBadge = false
23+
@State private var cliCopied = false
2224
@State private var serverSaveMessage = "Server settings saved"
2325
@State private var restartNotificationRequiresAction = false
2426

@@ -209,6 +211,48 @@ struct SettingsView: View {
209211
tint: SwiftBuddyTheme.success,
210212
hint: "Higher = less repeating, 1.0 = disabled"
211213
)
214+
215+
// Seed — optional reproducibility
216+
HStack {
217+
Label("Seed", systemImage: "number")
218+
.foregroundStyle(SwiftBuddyTheme.textPrimary)
219+
.font(.callout)
220+
Spacer()
221+
if let seed = viewModel.config.seed {
222+
Text("\(seed)")
223+
.foregroundStyle(SwiftBuddyTheme.textSecondary)
224+
.font(.callout.monospacedDigit())
225+
Stepper("", value: Binding(
226+
get: { Int(seed) },
227+
set: { viewModel.config.seed = UInt64($0) }
228+
), in: 0...Int.max)
229+
.labelsHidden()
230+
Button {
231+
viewModel.config.seed = nil
232+
} label: {
233+
Image(systemName: "xmark.circle.fill")
234+
.foregroundStyle(SwiftBuddyTheme.textTertiary)
235+
}
236+
.buttonStyle(.plain)
237+
} else {
238+
Text("Random")
239+
.foregroundStyle(SwiftBuddyTheme.textTertiary)
240+
.font(.callout)
241+
Button {
242+
viewModel.config.seed = UInt64.random(in: 0...UInt64.max)
243+
} label: {
244+
Image(systemName: "lock.fill")
245+
.foregroundStyle(SwiftBuddyTheme.accent)
246+
}
247+
.buttonStyle(.plain)
248+
}
249+
}
250+
.padding(.vertical, 2)
251+
if viewModel.config.seed != nil {
252+
Text("Fixed seed — same input will produce identical output")
253+
.font(.caption2)
254+
.foregroundStyle(SwiftBuddyTheme.textTertiary)
255+
}
212256
}
213257

214258
parameterCard("Reasoning") {
@@ -257,6 +301,39 @@ struct SettingsView: View {
257301
}
258302
.padding(.top, 8)
259303
}
304+
// Generation params are hot-applied per request — no restart needed.
305+
// Flash a brief badge so the user knows the change was captured.
306+
.onChange(of: viewModel.config.temperature) { flashApplied() }
307+
.onChange(of: viewModel.config.topP) { flashApplied() }
308+
.onChange(of: viewModel.config.topK) { flashApplied() }
309+
.onChange(of: viewModel.config.minP) { flashApplied() }
310+
.onChange(of: viewModel.config.maxTokens) { flashApplied() }
311+
.onChange(of: viewModel.config.repetitionPenalty) { flashApplied() }
312+
.onChange(of: viewModel.config.enableThinking) { flashApplied() }
313+
.onChange(of: viewModel.config.kvBits) { flashApplied() }
314+
.onChange(of: viewModel.config.prefillSize) { flashApplied() }
315+
.onChange(of: viewModel.config.seed) { flashApplied() }
316+
.overlay(alignment: .top) {
317+
if showAppliedBadge {
318+
HStack(spacing: 6) {
319+
Image(systemName: "checkmark.circle.fill")
320+
.foregroundStyle(SwiftBuddyTheme.success)
321+
.font(.caption)
322+
Text("Applied — takes effect on next message")
323+
.font(.caption.weight(.medium))
324+
.foregroundStyle(SwiftBuddyTheme.textPrimary)
325+
}
326+
.padding(.horizontal, 14)
327+
.padding(.vertical, 8)
328+
.background(.ultraThinMaterial)
329+
.background(SwiftBuddyTheme.success.opacity(0.12))
330+
.clipShape(Capsule())
331+
.overlay(Capsule().strokeBorder(SwiftBuddyTheme.success.opacity(0.3), lineWidth: 1))
332+
.padding(.top, 8)
333+
.transition(.move(edge: .top).combined(with: .opacity))
334+
.animation(.easeInOut(duration: 0.2), value: showAppliedBadge)
335+
}
336+
}
260337
}
261338

262339
// MARK: — Engine Tab
@@ -436,6 +513,43 @@ struct SettingsView: View {
436513
.tint(SwiftBuddyTheme.accent)
437514
}
438515

516+
parameterCard("Advanced Engine") {
517+
HStack(alignment: .top, spacing: 10) {
518+
Image(systemName: "bolt.circle.fill")
519+
.foregroundStyle(SwiftBuddyTheme.accentSecondary)
520+
.font(.callout)
521+
.padding(.top, 2)
522+
VStack(alignment: .leading, spacing: 4) {
523+
Text("SSD Streaming — automatic for MoE models")
524+
.font(.callout.weight(.medium))
525+
.foregroundStyle(SwiftBuddyTheme.textPrimary)
526+
Text("Expert weight streaming is enabled automatically when you load a Mixture-of-Experts model (e.g. Qwen 3.5 35B MoE). No manual toggle is needed.")
527+
.font(.caption2)
528+
.foregroundStyle(SwiftBuddyTheme.textTertiary)
529+
.fixedSize(horizontal: false, vertical: true)
530+
}
531+
}
532+
.padding(.vertical, 2)
533+
534+
Divider().background(SwiftBuddyTheme.divider)
535+
536+
HStack(alignment: .top, spacing: 10) {
537+
Image(systemName: "memorychip")
538+
.foregroundStyle(SwiftBuddyTheme.warning)
539+
.font(.callout)
540+
.padding(.top, 2)
541+
VStack(alignment: .leading, spacing: 4) {
542+
Text("KV Cache Quantisation")
543+
.font(.callout.weight(.medium))
544+
.foregroundStyle(SwiftBuddyTheme.textPrimary)
545+
Text("Set KV Bits to 4 or 8 in the KV Cache card below to compress the attention cache. Reduces VRAM at the cost of slight quality.")
546+
.font(.caption2)
547+
.foregroundStyle(SwiftBuddyTheme.textTertiary)
548+
.fixedSize(horizontal: false, vertical: true)
549+
}
550+
}
551+
.padding(.vertical, 2)
552+
}
439553
#if os(iOS)
440554
parameterCard("iOS Performance") {
441555
toggleRow(
@@ -458,6 +572,34 @@ struct SettingsView: View {
458572
}
459573
#endif
460574

575+
// ── CLI Equivalent ──────────────────────────────────────────
576+
parameterCard("CLI Equivalent") {
577+
Text("Run standalone server with these settings:")
578+
.font(.caption2)
579+
.foregroundStyle(SwiftBuddyTheme.textTertiary)
580+
581+
ScrollView(.horizontal, showsIndicators: false) {
582+
Text(cliCommand)
583+
.font(.system(size: 11, design: .monospaced))
584+
.foregroundStyle(SwiftBuddyTheme.textSecondary)
585+
.textSelection(.enabled)
586+
.padding(.vertical, 6)
587+
}
588+
589+
Button {
590+
copyCLI()
591+
} label: {
592+
Label(
593+
cliCopied ? "Copied!" : "Copy Command",
594+
systemImage: cliCopied ? "checkmark" : "doc.on.doc"
595+
)
596+
.font(.caption.weight(.medium))
597+
.frame(maxWidth: .infinity)
598+
}
599+
.buttonStyle(.bordered)
600+
.tint(cliCopied ? SwiftBuddyTheme.success : SwiftBuddyTheme.accent)
601+
.animation(.easeInOut(duration: 0.2), value: cliCopied)
602+
}
461603
Spacer(minLength: 20)
462604
}
463605
.padding(.top, 8)
@@ -643,6 +785,83 @@ struct SettingsView: View {
643785
.shadow(color: .black.opacity(0.18), radius: 14, y: 6)
644786
}
645787

788+
private func flashApplied() {
789+
withAnimation { showAppliedBadge = true }
790+
DispatchQueue.main.asyncAfter(deadline: .now() + 2) {
791+
withAnimation { showAppliedBadge = false }
792+
}
793+
}
794+
795+
/// Build the equivalent `swift run SwiftLM` command from current settings.
796+
private var cliCommand: String {
797+
let cfg = viewModel.config
798+
let srv = server
799+
var parts: [String] = []
800+
801+
// Model (use loaded ID if available)
802+
switch engine.state {
803+
case .ready(let id):
804+
parts.append("--model \(id)")
805+
default:
806+
parts.append("--model <model-id>")
807+
}
808+
809+
parts.append("--host \(srv.host)")
810+
parts.append("--port \(srv.port)")
811+
parts.append("--max-tokens \(cfg.maxTokens)")
812+
parts.append("--temp \(String(format: "%.2f", cfg.temperature))")
813+
814+
if cfg.topP < 1.0 {
815+
parts.append("--top-p \(String(format: "%.2f", cfg.topP))")
816+
}
817+
if cfg.topK != 50 {
818+
parts.append("--top-k \(cfg.topK)")
819+
}
820+
if cfg.minP > 0 {
821+
parts.append("--min-p \(String(format: "%.2f", cfg.minP))")
822+
}
823+
if cfg.repetitionPenalty != 1.05 {
824+
parts.append("--repeat-penalty \(String(format: "%.2f", cfg.repetitionPenalty))")
825+
}
826+
if cfg.prefillSize != 512 {
827+
parts.append("--prefill-size \(cfg.prefillSize)")
828+
}
829+
if let kvBits = cfg.kvBits {
830+
parts.append("--kv-bits \(kvBits)")
831+
if cfg.kvGroupSize != 64 {
832+
parts.append("--kv-group-size \(cfg.kvGroupSize)")
833+
}
834+
}
835+
if cfg.enableThinking {
836+
parts.append("--thinking")
837+
}
838+
if let seed = cfg.seed {
839+
parts.append("--seed \(seed)")
840+
}
841+
if srv.parallelSlots > 1 {
842+
parts.append("--parallel \(srv.parallelSlots)")
843+
}
844+
if !srv.startupConfiguration.apiKey.isEmpty {
845+
parts.append("--api-key <redacted>")
846+
}
847+
848+
return "swift run SwiftLM " + parts.joined(separator: " \\
849+
")
850+
}
851+
852+
private func copyCLI() {
853+
#if os(macOS)
854+
NSPasteboard.general.clearContents()
855+
NSPasteboard.general.setString(cliCommand, forType: .string)
856+
#else
857+
UIPasteboard.general.string = cliCommand
858+
#endif
859+
withAnimation { cliCopied = true }
860+
DispatchQueue.main.asyncAfter(deadline: .now() + 2) {
861+
withAnimation { cliCopied = false }
862+
}
863+
}
864+
646865
private func copyEndpoint(_ url: String) {
647866
#if os(macOS)
648867
NSPasteboard.general.clearContents()

0 commit comments

Comments
 (0)