11// GenerationConfig.swift — SwiftLM inference parameters
22import Foundation
33
4- /// Configuration for a single generation request.
5- public struct GenerationConfig : Sendable {
4+ /// Per-request generation parameters, persisted across app launches via UserDefaults.
5+ ///
6+ /// ### Field classification
7+ /// **Per-request** (applied on every `generate()` call — no reload needed):
8+ /// temperature, topP, topK, minP, repetitionPenalty, seed, enableThinking,
9+ /// prefillSize, kvBits, kvGroupSize, turboKV
10+ ///
11+ /// **Load-time** (requires model reload to take effect):
12+ /// streamExperts — controls SSD expert streaming for MoE and large models.
13+ /// Stored here for persistence but applied by InferenceEngine at load time.
14+ public struct GenerationConfig : Sendable , Codable {
615 public var maxTokens : Int
716 public var temperature : Float
817 public var topP : Float
918 public var topK : Int
1019 public var minP : Float
1120 public var repetitionPenalty : Float
12- public var seed : UInt64 ?
13- public var enableThinking : Bool
1421
15- // ── SwiftLM Engine Parameters ──────────────────────────────────────
16- /// Enable TurboQuant KV-cache compression (3-bit PolarQuant+QJL).
17- /// Compresses KV history > 8192 tokens to ~3.5 bits/token.
18- public var turboKV : Bool
22+ /// Optional RNG seed for reproducible outputs.
23+ /// When non-nil, `MLX.seed(seed)` is called before each generation using this `UInt64` value.
24+ public var seed : UInt64 ?
1925
20- /// Enable SSD expert streaming for MoE models.
21- public var streamExperts : Bool
26+ public var enableThinking : Bool
2227
2328 /// Chunk size for prefill evaluation.
2429 /// Lower values prevent GPU timeout on large models.
@@ -30,6 +35,21 @@ public struct GenerationConfig: Sendable {
3035 /// KV-cache quantization group size (default 64).
3136 public var kvGroupSize : Int
3237
38+ /// Enable 3-bit TurboQuant KV-cache compression (PolarQuant+QJL).
39+ /// Compresses KV history older than 8192 tokens to ~3.5 bits/token.
40+ /// Recommended for 100k+ context to halve KV RAM usage.
41+ /// Applied per-request — no model reload needed.
42+ public var turboKV : Bool
43+
44+ /// Enable SSD expert streaming for MoE (and any large) models.
45+ /// When true, expert weights are mmap'd from NVMe and only active
46+ /// expert pages reside in RAM during inference (Flash-MoE style).
47+ /// ⚠️ LOAD-TIME flag: changes take effect on the next model load.
48+ /// MoE models (isMoE == true) default to true automatically;
49+ /// this flag lets users override that for non-catalog models or
50+ /// force-disable streaming even on MoE models.
51+ public var streamExperts : Bool
52+
3353 public init (
3454 maxTokens: Int = 2048 ,
3555 temperature: Float = 0.6 ,
@@ -39,11 +59,11 @@ public struct GenerationConfig: Sendable {
3959 repetitionPenalty: Float = 1.05 ,
4060 seed: UInt64 ? = nil ,
4161 enableThinking: Bool = false ,
42- turboKV: Bool = false ,
43- streamExperts: Bool = false ,
4462 prefillSize: Int = 512 ,
4563 kvBits: Int ? = nil ,
46- kvGroupSize: Int = 64
64+ kvGroupSize: Int = 64 ,
65+ turboKV: Bool = false ,
66+ streamExperts: Bool = false
4767 ) {
4868 self . maxTokens = maxTokens
4969 self . temperature = temperature
@@ -53,12 +73,41 @@ public struct GenerationConfig: Sendable {
5373 self . repetitionPenalty = repetitionPenalty
5474 self . seed = seed
5575 self . enableThinking = enableThinking
56- self . turboKV = turboKV
57- self . streamExperts = streamExperts
5876 self . prefillSize = prefillSize
5977 self . kvBits = kvBits
6078 self . kvGroupSize = kvGroupSize
79+ self . turboKV = turboKV
80+ self . streamExperts = streamExperts
6181 }
6282
6383 public static let `default` = GenerationConfig ( )
84+
85+ // MARK: — Persistence
86+
87+ private static let storageKey = " swiftlm.generationConfig "
88+
89+ /// True when the user has previously saved a GenerationConfig.
90+ /// Used to distinguish the first-run/default state from an explicit choice.
91+ public static var hasPersistedConfig : Bool {
92+ UserDefaults . standard. object ( forKey: storageKey) != nil
93+ }
94+
95+ /// Computes the effective SSD streaming setting.
96+ /// Before the user has saved settings, MoE models default to streaming on.
97+ /// After settings are persisted, the saved toggle becomes authoritative.
98+ public func effectiveStreamExperts( defaultingTo defaultValue: Bool ) -> Bool {
99+ Self . hasPersistedConfig ? streamExperts : defaultValue
100+ }
101+
102+ public func save( ) {
103+ guard let data = try ? JSONEncoder ( ) . encode ( self ) else { return }
104+ UserDefaults . standard. set ( data, forKey: Self . storageKey)
105+ }
106+
107+ public static func load( ) -> GenerationConfig {
108+ guard let data = UserDefaults . standard. data ( forKey: storageKey) ,
109+ let decoded = try ? JSONDecoder ( ) . decode ( GenerationConfig . self, from: data)
110+ else { return . default }
111+ return decoded
112+ }
64113}
0 commit comments