Skip to content

Commit 7b0bfd4

Browse files
fix: address Copilot review feedback on PR #77
- Fix Server.swift memory limit being unconditionally overridden later in execution - Consolidate ModelProfiler.profile calls to reduce startup latency - Replace hardcoded 16384 page sizes with dynamic sysctl hw.pagesize in CI and benchmark scripts - Ensure CI multiline JSON inference output is correctly piped to files instead of GITHUB_OUTPUT - Refine unit tests to assert fan-out break even limits properly and standardize to GiB
1 parent 58249c2 commit 7b0bfd4

3 files changed

Lines changed: 28 additions & 19 deletions

File tree

.github/workflows/ci.yml

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -292,11 +292,12 @@ jobs:
292292
- name: Snapshot RAM before test
293293
id: ram_before
294294
run: |
295-
RAM=$(vm_stat | awk '
295+
PAGE_SIZE=$(sysctl -n hw.pagesize)
296+
RAM=$(vm_stat | awk -v page_size="$PAGE_SIZE" '
296297
/Pages active:/ { v=$3; gsub(/\./, "", v); act=v+0 }
297298
/Pages wired down:/ { v=$4; gsub(/\./, "", v); wire=v+0 }
298299
/Pages occupied by compressor:/ { v=$5; gsub(/\./, "", v); comp=v+0 }
299-
END { printf "%.2f", (act+wire+comp)*16384/1073741824 }
300+
END { printf "%.2f", (act+wire+comp)*page_size/1073741824 }
300301
')
301302
echo "ram_before=$RAM" >> $GITHUB_OUTPUT
302303
echo "RAM before eval: ${RAM} GB"
@@ -326,11 +327,12 @@ jobs:
326327
if: always()
327328
id: ram_after
328329
run: |
329-
RAM=$(vm_stat | awk '
330+
PAGE_SIZE=$(sysctl -n hw.pagesize)
331+
RAM=$(vm_stat | awk -v page_size="$PAGE_SIZE" '
330332
/Pages active:/ { v=$3; gsub(/\./, "", v); act=v+0 }
331333
/Pages wired down:/ { v=$4; gsub(/\./, "", v); wire=v+0 }
332334
/Pages occupied by compressor:/ { v=$5; gsub(/\./, "", v); comp=v+0 }
333-
END { printf "%.2f", (act+wire+comp)*16384/1073741824 }
335+
END { printf "%.2f", (act+wire+comp)*page_size/1073741824 }
334336
')
335337
echo "ram_after=$RAM" >> $GITHUB_OUTPUT
336338
echo "RAM after eval: ${RAM} GB"
@@ -411,11 +413,12 @@ jobs:
411413
- name: Snapshot RAM baseline
412414
id: ram_base
413415
run: |
414-
RAM=$(vm_stat | awk '
416+
PAGE_SIZE=$(sysctl -n hw.pagesize)
417+
RAM=$(vm_stat | awk -v page_size="$PAGE_SIZE" '
415418
/Pages active:/ { v=$3; gsub(/\./, "", v); act=v+0 }
416419
/Pages wired down:/ { v=$4; gsub(/\./, "", v); wire=v+0 }
417420
/Pages occupied by compressor:/ { v=$5; gsub(/\./, "", v); comp=v+0 }
418-
END { printf "%.2f", (act+wire+comp)*16384/1073741824 }
421+
END { printf "%.2f", (act+wire+comp)*page_size/1073741824 }
419422
')
420423
TOTAL=$(sysctl -n hw.memsize | awk '{printf "%.0f", $1/1073741824}')
421424
LIMIT=$(echo "$TOTAL * 0.85" | bc | cut -d. -f1)
@@ -458,11 +461,12 @@ jobs:
458461
- name: Snapshot RAM after model load
459462
id: ram_loaded
460463
run: |
461-
RAM=$(vm_stat | awk '
464+
PAGE_SIZE=$(sysctl -n hw.pagesize)
465+
RAM=$(vm_stat | awk -v page_size="$PAGE_SIZE" '
462466
/Pages active:/ { v=$3; gsub(/\./, "", v); act=v+0 }
463467
/Pages wired down:/ { v=$4; gsub(/\./, "", v); wire=v+0 }
464468
/Pages occupied by compressor:/ { v=$5; gsub(/\./, "", v); comp=v+0 }
465-
END { printf "%.2f", (act+wire+comp)*16384/1073741824 }
469+
END { printf "%.2f", (act+wire+comp)*page_size/1073741824 }
466470
')
467471
echo "ram_loaded=$RAM" >> $GITHUB_OUTPUT
468472
echo "RAM after load: ${RAM} GB"
@@ -485,13 +489,14 @@ jobs:
485489
-H "Content-Type: application/json" \
486490
-d '{"model":"test","messages":[{"role":"user","content":"What is 2+2? One word."}],"max_tokens":32,"stream":false}' \
487491
2>/dev/null || echo "{}")
488-
echo "inf_result=$RESULT" >> $GITHUB_OUTPUT
492+
echo "$RESULT" > /tmp/inf_result.json
489493
490-
RAM=$(vm_stat | awk '
494+
PAGE_SIZE=$(sysctl -n hw.pagesize)
495+
RAM=$(vm_stat | awk -v page_size="$PAGE_SIZE" '
491496
/Pages active:/ { v=$3; gsub(/\./, "", v); act=v+0 }
492497
/Pages wired down:/ { v=$4; gsub(/\./, "", v); wire=v+0 }
493498
/Pages occupied by compressor:/ { v=$5; gsub(/\./, "", v); comp=v+0 }
494-
END { printf "%.2f", (act+wire+comp)*16384/1073741824 }
499+
END { printf "%.2f", (act+wire+comp)*page_size/1073741824 }
495500
')
496501
echo "ram_peak=$RAM" >> $GITHUB_OUTPUT
497502
echo "RAM after inference: ${RAM} GB"
@@ -508,7 +513,7 @@ jobs:
508513
509514
- name: "[3/3] Validate inference response"
510515
run: |
511-
RESULT='${{ steps.ram_peak.outputs.inf_result }}'
516+
RESULT=$(cat /tmp/inf_result.json)
512517
if echo "$RESULT" | grep -q '"content"'; then
513518
TEXT=$(echo "$RESULT" | python3 -c \
514519
"import sys,json;d=json.load(sys.stdin);print(d['choices'][0]['message']['content'])" \

Sources/SwiftLM/Server.swift

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,8 @@ struct MLXServer: AsyncParsableCommand {
345345
draftFootprintBytes = 0
346346
}
347347

348+
var mainModelProfile: ModelProfile? = nil
349+
348350
if self.streamExperts, let modelDir = modelDirectory {
349351
setenv("EXPERIMENTAL_SSD_STREAM", modelDir.path, 1)
350352
// Activate the modern Swift ExpertStreamingConfig so Load.swift can:
@@ -381,7 +383,8 @@ struct MLXServer: AsyncParsableCommand {
381383
Memory.cacheLimit = computeSSDMemoryBudget(totalRAMBytes: system.totalRAMBytes, draftWeightBytes: draftFootprintBytes)
382384

383385
// Determine safe memoryLimit sentinel
384-
let mainFootprintBytes = ModelProfiler.profile(modelDirectory: modelDir, modelId: modelId)?.weightFileSizeBytes ?? 0
386+
mainModelProfile = ModelProfiler.profile(modelDirectory: modelDir, modelId: modelId)
387+
let mainFootprintBytes = mainModelProfile?.weightFileSizeBytes ?? 0
385388
let combinedFootprint = mainFootprintBytes + draftFootprintBytes
386389
let physicalRAM = Int(system.totalRAMBytes)
387390
let combinedExceedsRAM = combinedFootprint > Int(Double(physicalRAM) * 0.70)
@@ -417,8 +420,9 @@ struct MLXServer: AsyncParsableCommand {
417420
}
418421

419422
var partitionPlan: PartitionPlan?
420-
if let modelDir = modelDirectory,
421-
let profile = ModelProfiler.profile(modelDirectory: modelDir, modelId: modelId) {
423+
if let modelDir = modelDirectory {
424+
let profile = mainModelProfile ?? ModelProfiler.profile(modelDirectory: modelDir, modelId: modelId)
425+
if let profile = profile {
422426
let system = ModelProfiler.systemProfile()
423427
let contextSize = self.ctxSize ?? 4096
424428
let plan = ModelProfiler.plan(model: profile, system: system, contextSize: contextSize)
@@ -441,7 +445,6 @@ struct MLXServer: AsyncParsableCommand {
441445
// draftFootprintBytes pre-computed once above (Copilot review).
442446
let physicalBudget = computeSSDMemoryBudget(totalRAMBytes: system.totalRAMBytes, draftWeightBytes: draftFootprintBytes)
443447
Memory.cacheLimit = physicalBudget
444-
Memory.memoryLimit = 200 * 1024 * 1024 * 1024 // 200GB sentinel to bypass MLX eval_impl spin loop
445448
print("[SwiftLM] 💾 Memory strategy: SSD STREAMING (page-cache managed, \(physicalBudget / (1024*1024*1024))GB RAM budget, no swap)")
446449
} else {
447450
Memory.cacheLimit = plan.recommendedCacheLimit
@@ -453,7 +456,6 @@ struct MLXServer: AsyncParsableCommand {
453456
// draftFootprintBytes pre-computed once above (Copilot review).
454457
let physicalBudget = computeSSDMemoryBudget(totalRAMBytes: system.totalRAMBytes, draftWeightBytes: draftFootprintBytes)
455458
Memory.cacheLimit = physicalBudget
456-
Memory.memoryLimit = 200 * 1024 * 1024 * 1024 // 200GB sentinel to bypass MLX eval_impl spin loop
457459
print("[SwiftLM] 💾 Memory strategy: SSD STREAMING (page-cache managed, \(physicalBudget / (1024*1024*1024))GB RAM budget, no swap)")
458460
} else {
459461
Memory.cacheLimit = plan.recommendedCacheLimit
@@ -465,6 +467,7 @@ struct MLXServer: AsyncParsableCommand {
465467
print("[SwiftLM] \(plan.strategy.emoji) WARNING: Model is \(String(format: "%.1f", plan.overcommitRatio))× system RAM. Loading will be extremely slow.")
466468
for w in plan.warnings { print("[SwiftLM] \(w)") }
467469
}
470+
}
468471
} else if self.info {
469472
print("[SwiftLM] Model not yet downloaded. Run without --info to download first, or provide a local path.")
470473
return

run_benchmark.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1174,11 +1174,12 @@ if [ "$suite_opt" == "10" ]; then
11741174

11751175
# Measure RAM via vm_stat (Apple Silicon page size = 16384 bytes)
11761176
get_ram_gb_t10() {
1177-
vm_stat | awk '
1177+
PAGE_SIZE=$(sysctl -n hw.pagesize)
1178+
vm_stat | awk -v page_size="$PAGE_SIZE" '
11781179
/Pages active:/ { v=$3; gsub(/\./, "", v); act=v+0 }
11791180
/Pages wired down:/ { v=$4; gsub(/\./, "", v); wire=v+0 }
11801181
/Pages occupied by compressor:/ { v=$5; gsub(/\./, "", v); comp=v+0 }
1181-
END { printf "%.2f", (act+wire+comp)*16384/1073741824 }
1182+
END { printf "%.2f", (act+wire+comp)*page_size/1073741824 }
11821183
'
11831184
}
11841185

0 commit comments

Comments
 (0)