Skip to content

Commit 8385350

Browse files
fix: allow custom model selection in benchmark test 10
1 parent 7b0bfd4 commit 8385350

2 files changed

Lines changed: 35 additions & 29 deletions

File tree

run_benchmark.sh

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1161,14 +1161,22 @@ fi
11611161
# ✅ Peak RAM < 80% physical RAM (proves no swap explosion)
11621162
# ✅ /v1/chat/completions returns content (proves the combo is functional)
11631163
if [ "$suite_opt" == "10" ]; then
1164+
T10_PORT=15472
1165+
T10_MAIN="$MODEL"
1166+
1167+
echo ""
1168+
read -p " Enter Draft Model HuggingFace ID (default: mlx-community/Qwen3.5-0.8B-MLX-4bit): " custom_draft
1169+
if [ -z "$custom_draft" ]; then
1170+
T10_DRAFT="mlx-community/Qwen3.5-0.8B-MLX-4bit"
1171+
else
1172+
T10_DRAFT="$custom_draft"
1173+
fi
1174+
11641175
echo ""
11651176
echo "=> Test 10: Issue #72 SSD + Draft Model Memory Regression"
1166-
echo " Main: mlx-community/Qwen3.5-4B-MLX-4bit (SSD-streamed)"
1167-
echo " Draft: mlx-community/Qwen3.5-0.8B-MLX-4bit (in-RAM)"
1177+
echo " Main: $T10_MAIN (SSD-streamed)"
1178+
echo " Draft: $T10_DRAFT (in-RAM)"
11681179

1169-
T10_PORT=15472
1170-
T10_MAIN="mlx-community/Qwen3.5-4B-MLX-4bit"
1171-
T10_DRAFT="mlx-community/Qwen3.5-0.8B-MLX-4bit"
11721180
T10_LOG="./tmp/test10_issue72.log"
11731181
mkdir -p tmp
11741182

tests/SwiftLMTests/SSDPersistentBufferGuardTests.swift

Lines changed: 22 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -41,24 +41,22 @@ final class SSDDraftStrategyTests: XCTestCase {
4141
"Auto-capped 1 draft token → 2-position verify fan-out (2× SSD I/O cost)")
4242
}
4343

44-
/// Net throughput is positive when: acceptance_rate × draft_tps > fan_out_penalty × base_tps
45-
/// At 50% acceptance and 2× fan-out this is just barely net-neutral.
46-
/// At 70% acceptance (typical for family models) it's clearly positive.
44+
/// With 1 draft token, the verify pass covers 2 positions, so SSD I/O fan-out is 2×.
45+
/// In this simplified model, break-even acceptance is therefore 1 / fan_out = 50%.
46+
/// At 70% acceptance (typical for same-family models), the capped strategy is on the
47+
/// positive side of that threshold.
4748
func testNetThroughput_CappedDraft_PositiveAt70PctAcceptance() {
48-
let baseTPS = 5.0 // tok/s for SSD streaming alone
49-
let draftTPS = 73.0 // tok/s for a 4B draft model in RAM
5049
let fanOutPenalty = 2.0 // 2× I/O at 1 draft token
51-
let acceptRate = 0.70 // typical for same-family models
50+
let acceptRate = 0.70 // typical for same-family models
5251

53-
// Net effective TPS with draft (simplified model):
54-
// Each round: draft generates 1 token fast, main verifies 2 positions.
55-
// If accepted: 1 extra token at draft speed per round.
56-
// Cost: main model verify at base_tps / fan_out_penalty.
57-
let effectiveVerifyTPS = baseTPS / fanOutPenalty
58-
let netTPS = effectiveVerifyTPS + acceptRate * (draftTPS / draftTPS)
52+
// Reframe the assertion around the auto-cap arithmetic directly:
53+
// break-even acceptance_rate = 1 / verify_positions = 1 / fanOutPenalty.
54+
let breakEvenAcceptanceRate = 1.0 / fanOutPenalty
5955

60-
XCTAssertGreaterThan(netTPS, effectiveVerifyTPS,
61-
"At 70% acceptance + 1 draft token, net TPS must exceed un-assisted verify TPS")
56+
XCTAssertEqual(breakEvenAcceptanceRate, 0.50, accuracy: 0.000_001,
57+
"At 1 draft token, 2 verify positions imply a 50% break-even acceptance threshold")
58+
XCTAssertGreaterThan(acceptRate, breakEvenAcceptanceRate,
59+
"At 70% acceptance + 1 draft token, acceptance is above the capped 2-position break-even threshold")
6260
}
6361

6462
/// Auto-cap logic: numDraftTokens > 1 when SSD + draft → should be capped to 1.
@@ -125,13 +123,13 @@ final class SSDDraftStrategyTests: XCTestCase {
125123
/// This is the exact reporter scenario: 35B main (20.4 GB) + 4B draft (3.0 GB).
126124
func testMemoryLimit_TightCap_Issue72ReporterScenario() {
127125
let physicalRAM = Int(16.0 * Double(gb))
128-
let mainBytes = Int(20.4 * 1e9)
129-
let draftBytes = Int(3.0 * 1e9)
126+
let mainBytes = Int(20.4 * Double(gb))
127+
let draftBytes = Int(3.0 * Double(gb))
130128
let combined = mainBytes + draftBytes
131-
let threshold = Int(Double(physicalRAM) * 0.70) // 11.2 GB
129+
let threshold = Int(Double(physicalRAM) * 0.70) // 11.2 GiB
132130

133131
XCTAssertGreaterThan(combined, threshold,
134-
"Reporter scenario: 23.4 GB combined must exceed 70% of 16 GB physical RAM")
132+
"Reporter scenario: 23.4 GiB combined must exceed 70% of 16 GiB physical RAM")
135133

136134
let tightCap = Int(Double(physicalRAM) * 1.1) // ~17.6 GB
137135
let sentinel = 200 * gb
@@ -140,21 +138,21 @@ final class SSDDraftStrategyTests: XCTestCase {
140138
let hasDraftBytes = draftBytes > 0
141139
let limit = (combined > threshold && hasDraftBytes) ? tightCap : sentinel
142140
XCTAssertEqual(limit, tightCap,
143-
"16 GB + combined 23.4 GB: tight cap (~17 GB) must be chosen over 200 GB sentinel")
141+
"16 GiB + combined 23.4 GiB: tight cap (~17.6 GiB) must be chosen over 200 GiB sentinel")
144142
XCTAssertLessThan(limit, 20 * gb,
145143
"Tight cap must be well below 20 GB to force MLX eviction over swap")
146144
}
147145

148146
/// On a 64 GB machine the 200 GB sentinel is preserved — benchmark hardware unaffected.
149147
func testMemoryLimit_Sentinel_PreservedOn64GB() {
150148
let physicalRAM = Int(64.0 * Double(gb))
151-
let mainBytes = Int(20.4 * 1e9)
152-
let draftBytes = Int(3.0 * 1e9)
149+
let mainBytes = Int(20.4 * Double(gb))
150+
let draftBytes = Int(3.0 * Double(gb))
153151
let combined = mainBytes + draftBytes
154-
let threshold = Int(Double(physicalRAM) * 0.70) // 44.8 GB
152+
let threshold = Int(Double(physicalRAM) * 0.70) // 44.8 GiB
155153

156154
XCTAssertLessThan(combined, threshold,
157-
"64 GB machine: 23.4 GB combined fits within 70% threshold — sentinel should apply")
155+
"64 GiB machine: 23.4 GiB combined fits within 70% threshold — sentinel should apply")
158156

159157
let tightCap = Int(Double(physicalRAM) * 1.1)
160158
let sentinel = 200 * gb
@@ -167,7 +165,7 @@ final class SSDDraftStrategyTests: XCTestCase {
167165
/// Solo SSD streaming (no draft): sentinel always used, warm path always active.
168166
func testMemoryLimit_Sentinel_SoloSSDStreaming() {
169167
let physicalRAM = Int(16.0 * Double(gb))
170-
let mainBytes = Int(20.4 * 1e9)
168+
let mainBytes = Int(20.4 * Double(gb))
171169
let draftBytes = 0 // no draft model
172170
let combined = mainBytes + draftBytes
173171
let threshold = Int(Double(physicalRAM) * 0.70)

0 commit comments

Comments
 (0)