SharpAI
diff --git a/‎Package.resolved‎
Lines changed: 1 addition & 1 deletion b/‎Package.resolved‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Sources/mlx-server/Server.swift‎
Lines changed: 19 additions & 1 deletion b/‎Sources/mlx-server/Server.swift‎
Lines changed: 19 additions & 1 deletion
@@ -79,12 +79,15 @@ struct MLXServer: AsyncParsableCommand {
     @Flag(name: .long, help: "Force re-calibration of optimal memory settings (normally auto-cached)")
     var calibrate: Bool = false
 
+    @Flag(name: .long, help: "Enable SSD expert streaming for MoE models (Flash-MoE style memory-mapping)")
+    var streamExperts: Bool = false
+
     mutating func run() async throws {
         print("[mlx-server] Loading model: \(model)")
         let modelId = model
 
         // ── Load model ──
-        let modelConfig: ModelConfiguration
+        var modelConfig: ModelConfiguration
         let fileManager = FileManager.default
         if fileManager.fileExists(atPath: modelId) {
             var isDir: ObjCBool = false
@@ -98,6 +101,11 @@ struct MLXServer: AsyncParsableCommand {
         } else {
             modelConfig = ModelConfiguration(id: modelId)
         }
+        
+        // Inject streaming flag into config to bypass eval(model) if requested
+        if self.streamExperts {
+            modelConfig.lazyLoad = true
+        }
 
         // ── Pre-load profiling ──
         // Resolve model directory for profiling (checks HuggingFace cache)
@@ -193,6 +201,16 @@ struct MLXServer: AsyncParsableCommand {
             }
         }
 
+        // ── Apply SSD Expert Streaming ──
+        if self.streamExperts {
+            let streamingEnabled = await container.setStreamExperts(true)
+            if streamingEnabled {
+                print("[mlx-server] 💾 SSD Expert Streaming enabled (lazy load + layer-sync)")
+            } else {
+                print("[mlx-server] ⚠️  Model does not support SSD expert streaming")
+            }
+        }
+
         // ── Auto-calibration (Wisdom system) ──
         if let plan = partitionPlan {
             if self.calibrate {