fix: address Copilot review feedback on PR #77

github-actions[bot] · github-actions[bot] · commit 7b0bfd496622 · 2026-04-23T11:25:54.000-07:00
- Fix Server.swift memory limit being unconditionally overridden later in execution
- Consolidate ModelProfiler.profile calls to reduce startup latency
- Replace hardcoded 16384 page sizes with dynamic sysctl hw.pagesize in CI and benchmark scripts
- Ensure CI multiline JSON inference output is correctly piped to files instead of GITHUB_OUTPUT
- Refine unit tests to assert fan-out break even limits properly and standardize to GiB
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -292,11 +292,12 @@ jobs:
       - name: Snapshot RAM before test
         id: ram_before
         run: |
-          RAM=$(vm_stat | awk '
+          PAGE_SIZE=$(sysctl -n hw.pagesize)
+          RAM=$(vm_stat | awk -v page_size="$PAGE_SIZE" '
             /Pages active:/        { v=$3; gsub(/\./, "", v); act=v+0 }
             /Pages wired down:/    { v=$4; gsub(/\./, "", v); wire=v+0 }
             /Pages occupied by compressor:/ { v=$5; gsub(/\./, "", v); comp=v+0 }
-            END { printf "%.2f", (act+wire+comp)*16384/1073741824 }
+            END { printf "%.2f", (act+wire+comp)*page_size/1073741824 }
           ')
           echo "ram_before=$RAM" >> $GITHUB_OUTPUT
           echo "RAM before eval: ${RAM} GB"
@@ -326,11 +327,12 @@ jobs:
         if: always()
         id: ram_after
         run: |
-          RAM=$(vm_stat | awk '
+          PAGE_SIZE=$(sysctl -n hw.pagesize)
+          RAM=$(vm_stat | awk -v page_size="$PAGE_SIZE" '
             /Pages active:/        { v=$3; gsub(/\./, "", v); act=v+0 }
             /Pages wired down:/    { v=$4; gsub(/\./, "", v); wire=v+0 }
             /Pages occupied by compressor:/ { v=$5; gsub(/\./, "", v); comp=v+0 }
-            END { printf "%.2f", (act+wire+comp)*16384/1073741824 }
+            END { printf "%.2f", (act+wire+comp)*page_size/1073741824 }
           ')
           echo "ram_after=$RAM" >> $GITHUB_OUTPUT
           echo "RAM after eval: ${RAM} GB"
@@ -411,11 +413,12 @@ jobs:
       - name: Snapshot RAM baseline
         id: ram_base
         run: |
-          RAM=$(vm_stat | awk '
+          PAGE_SIZE=$(sysctl -n hw.pagesize)
+          RAM=$(vm_stat | awk -v page_size="$PAGE_SIZE" '
             /Pages active:/        { v=$3; gsub(/\./, "", v); act=v+0 }
             /Pages wired down:/    { v=$4; gsub(/\./, "", v); wire=v+0 }
             /Pages occupied by compressor:/ { v=$5; gsub(/\./, "", v); comp=v+0 }
-            END { printf "%.2f", (act+wire+comp)*16384/1073741824 }
+            END { printf "%.2f", (act+wire+comp)*page_size/1073741824 }
           ')
           TOTAL=$(sysctl -n hw.memsize | awk '{printf "%.0f", $1/1073741824}')
           LIMIT=$(echo "$TOTAL * 0.85" | bc | cut -d. -f1)
@@ -458,11 +461,12 @@ jobs:
       - name: Snapshot RAM after model load
         id: ram_loaded
         run: |
-          RAM=$(vm_stat | awk '
+          PAGE_SIZE=$(sysctl -n hw.pagesize)
+          RAM=$(vm_stat | awk -v page_size="$PAGE_SIZE" '
             /Pages active:/        { v=$3; gsub(/\./, "", v); act=v+0 }
             /Pages wired down:/    { v=$4; gsub(/\./, "", v); wire=v+0 }
             /Pages occupied by compressor:/ { v=$5; gsub(/\./, "", v); comp=v+0 }
-            END { printf "%.2f", (act+wire+comp)*16384/1073741824 }
+            END { printf "%.2f", (act+wire+comp)*page_size/1073741824 }
           ')
           echo "ram_loaded=$RAM" >> $GITHUB_OUTPUT
           echo "RAM after load: ${RAM} GB"
@@ -485,13 +489,14 @@ jobs:
             -H "Content-Type: application/json" \
             -d '{"model":"test","messages":[{"role":"user","content":"What is 2+2? One word."}],"max_tokens":32,"stream":false}' \
             2>/dev/null || echo "{}")
-          echo "inf_result=$RESULT" >> $GITHUB_OUTPUT
+          echo "$RESULT" > /tmp/inf_result.json
 
-          RAM=$(vm_stat | awk '
+          PAGE_SIZE=$(sysctl -n hw.pagesize)
+          RAM=$(vm_stat | awk -v page_size="$PAGE_SIZE" '
             /Pages active:/        { v=$3; gsub(/\./, "", v); act=v+0 }
             /Pages wired down:/    { v=$4; gsub(/\./, "", v); wire=v+0 }
             /Pages occupied by compressor:/ { v=$5; gsub(/\./, "", v); comp=v+0 }
-            END { printf "%.2f", (act+wire+comp)*16384/1073741824 }
+            END { printf "%.2f", (act+wire+comp)*page_size/1073741824 }
           ')
           echo "ram_peak=$RAM" >> $GITHUB_OUTPUT
           echo "RAM after inference: ${RAM} GB"
@@ -508,7 +513,7 @@ jobs:
 
       - name: "[3/3] Validate inference response"
         run: |
-          RESULT='${{ steps.ram_peak.outputs.inf_result }}'
+          RESULT=$(cat /tmp/inf_result.json)
           if echo "$RESULT" | grep -q '"content"'; then
             TEXT=$(echo "$RESULT" | python3 -c \
               "import sys,json;d=json.load(sys.stdin);print(d['choices'][0]['message']['content'])" \
diff --git a/Sources/SwiftLM/Server.swift b/Sources/SwiftLM/Server.swift
@@ -345,6 +345,8 @@ struct MLXServer: AsyncParsableCommand {
             draftFootprintBytes = 0
         }
 
+        var mainModelProfile: ModelProfile? = nil
+
         if self.streamExperts, let modelDir = modelDirectory {
             setenv("EXPERIMENTAL_SSD_STREAM", modelDir.path, 1)
             // Activate the modern Swift ExpertStreamingConfig so Load.swift can:
@@ -381,7 +383,8 @@ struct MLXServer: AsyncParsableCommand {
             Memory.cacheLimit = computeSSDMemoryBudget(totalRAMBytes: system.totalRAMBytes, draftWeightBytes: draftFootprintBytes)
 
             // Determine safe memoryLimit sentinel
-            let mainFootprintBytes = ModelProfiler.profile(modelDirectory: modelDir, modelId: modelId)?.weightFileSizeBytes ?? 0
+            mainModelProfile = ModelProfiler.profile(modelDirectory: modelDir, modelId: modelId)
+            let mainFootprintBytes = mainModelProfile?.weightFileSizeBytes ?? 0
             let combinedFootprint = mainFootprintBytes + draftFootprintBytes
             let physicalRAM = Int(system.totalRAMBytes)
             let combinedExceedsRAM = combinedFootprint > Int(Double(physicalRAM) * 0.70)
@@ -417,8 +420,9 @@ struct MLXServer: AsyncParsableCommand {
         }
         
         var partitionPlan: PartitionPlan?
-        if let modelDir = modelDirectory,
-           let profile = ModelProfiler.profile(modelDirectory: modelDir, modelId: modelId) {
+        if let modelDir = modelDirectory {
+           let profile = mainModelProfile ?? ModelProfiler.profile(modelDirectory: modelDir, modelId: modelId)
+           if let profile = profile {
             let system = ModelProfiler.systemProfile()
             let contextSize = self.ctxSize ?? 4096
             let plan = ModelProfiler.plan(model: profile, system: system, contextSize: contextSize)
@@ -441,7 +445,6 @@ struct MLXServer: AsyncParsableCommand {
                     // draftFootprintBytes pre-computed once above (Copilot review).
                     let physicalBudget = computeSSDMemoryBudget(totalRAMBytes: system.totalRAMBytes, draftWeightBytes: draftFootprintBytes)
                     Memory.cacheLimit = physicalBudget
-                    Memory.memoryLimit = 200 * 1024 * 1024 * 1024 // 200GB sentinel to bypass MLX eval_impl spin loop
                     print("[SwiftLM] 💾 Memory strategy: SSD STREAMING (page-cache managed, \(physicalBudget / (1024*1024*1024))GB RAM budget, no swap)")
                 } else {
                     Memory.cacheLimit = plan.recommendedCacheLimit
@@ -453,7 +456,6 @@ struct MLXServer: AsyncParsableCommand {
                     // draftFootprintBytes pre-computed once above (Copilot review).
                     let physicalBudget = computeSSDMemoryBudget(totalRAMBytes: system.totalRAMBytes, draftWeightBytes: draftFootprintBytes)
                     Memory.cacheLimit = physicalBudget
-                    Memory.memoryLimit = 200 * 1024 * 1024 * 1024 // 200GB sentinel to bypass MLX eval_impl spin loop
                     print("[SwiftLM] 💾 Memory strategy: SSD STREAMING (page-cache managed, \(physicalBudget / (1024*1024*1024))GB RAM budget, no swap)")
                 } else {
                     Memory.cacheLimit = plan.recommendedCacheLimit
@@ -465,6 +467,7 @@ struct MLXServer: AsyncParsableCommand {
                 print("[SwiftLM] \(plan.strategy.emoji) WARNING: Model is \(String(format: "%.1f", plan.overcommitRatio))× system RAM. Loading will be extremely slow.")
                 for w in plan.warnings { print("[SwiftLM]    \(w)") }
             }
+           }
         } else if self.info {
             print("[SwiftLM] Model not yet downloaded. Run without --info to download first, or provide a local path.")
             return
diff --git a/run_benchmark.sh b/run_benchmark.sh
@@ -1174,11 +1174,12 @@ if [ "$suite_opt" == "10" ]; then
 
     # Measure RAM via vm_stat (Apple Silicon page size = 16384 bytes)
     get_ram_gb_t10() {
-        vm_stat | awk '
+        PAGE_SIZE=$(sysctl -n hw.pagesize)
+        vm_stat | awk -v page_size="$PAGE_SIZE" '
             /Pages active:/        { v=$3; gsub(/\./, "", v); act=v+0 }
             /Pages wired down:/    { v=$4; gsub(/\./, "", v); wire=v+0 }
             /Pages occupied by compressor:/ { v=$5; gsub(/\./, "", v); comp=v+0 }
-            END { printf "%.2f", (act+wire+comp)*16384/1073741824 }
+            END { printf "%.2f", (act+wire+comp)*page_size/1073741824 }
         '
     }
 

Original file line number	Diff line number	Diff line change
`@@ -1174,11 +1174,12 @@ if [ "$suite_opt" == "10" ]; then`
`1174`	`1174`
`1175`	`1175`	`# Measure RAM via vm_stat (Apple Silicon page size = 16384 bytes)`
`1176`	`1176`	`get_ram_gb_t10() {`
`1177`		`- vm_stat \| awk '`
	`1177`	`+ PAGE_SIZE=$(sysctl -n hw.pagesize)`
	`1178`	`+ vm_stat \| awk -v page_size="$PAGE_SIZE" '`
`1178`	`1179`	`/Pages active:/ { v=$3; gsub(/\./, "", v); act=v+0 }`
`1179`	`1180`	`/Pages wired down:/ { v=$4; gsub(/\./, "", v); wire=v+0 }`
`1180`	`1181`	`/Pages occupied by compressor:/ { v=$5; gsub(/\./, "", v); comp=v+0 }`
`1181`		`- END { printf "%.2f", (act+wire+comp)*16384/1073741824 }`
	`1182`	`+ END { printf "%.2f", (act+wire+comp)*page_size/1073741824 }`
`1182`	`1183`	`'`
`1183`	`1184`	`}`
`1184`	`1185`