@@ -79,12 +79,15 @@ struct MLXServer: AsyncParsableCommand {
7979 @Flag ( name: . long, help: " Force re-calibration of optimal memory settings (normally auto-cached) " )
8080 var calibrate : Bool = false
8181
82+ @Flag ( name: . long, help: " Enable SSD expert streaming for MoE models (Flash-MoE style memory-mapping) " )
83+ var streamExperts : Bool = false
84+
8285 mutating func run( ) async throws {
8386 print ( " [mlx-server] Loading model: \( model) " )
8487 let modelId = model
8588
8689 // ── Load model ──
87- let modelConfig : ModelConfiguration
90+ var modelConfig : ModelConfiguration
8891 let fileManager = FileManager . default
8992 if fileManager. fileExists ( atPath: modelId) {
9093 var isDir : ObjCBool = false
@@ -98,6 +101,11 @@ struct MLXServer: AsyncParsableCommand {
98101 } else {
99102 modelConfig = ModelConfiguration ( id: modelId)
100103 }
104+
105+ // Inject streaming flag into config to bypass eval(model) if requested
106+ if self . streamExperts {
107+ modelConfig. lazyLoad = true
108+ }
101109
102110 // ── Pre-load profiling ──
103111 // Resolve model directory for profiling (checks HuggingFace cache)
@@ -193,6 +201,16 @@ struct MLXServer: AsyncParsableCommand {
193201 }
194202 }
195203
204+ // ── Apply SSD Expert Streaming ──
205+ if self . streamExperts {
206+ let streamingEnabled = await container. setStreamExperts ( true )
207+ if streamingEnabled {
208+ print ( " [mlx-server] 💾 SSD Expert Streaming enabled (lazy load + layer-sync) " )
209+ } else {
210+ print ( " [mlx-server] ⚠️ Model does not support SSD expert streaming " )
211+ }
212+ }
213+
196214 // ── Auto-calibration (Wisdom system) ──
197215 if let plan = partitionPlan {
198216 if self . calibrate {
0 commit comments