@@ -41,24 +41,22 @@ final class SSDDraftStrategyTests: XCTestCase {
4141 " Auto-capped 1 draft token → 2-position verify fan-out (2× SSD I/O cost) " )
4242 }
4343
44- /// Net throughput is positive when: acceptance_rate × draft_tps > fan_out_penalty × base_tps
45- /// At 50% acceptance and 2× fan-out this is just barely net-neutral.
46- /// At 70% acceptance (typical for family models) it's clearly positive.
44+ /// With 1 draft token, the verify pass covers 2 positions, so SSD I/O fan-out is 2×.
45+ /// In this simplified model, break-even acceptance is therefore 1 / fan_out = 50%.
46+ /// At 70% acceptance (typical for same-family models), the capped strategy is on the
47+ /// positive side of that threshold.
4748 func testNetThroughput_CappedDraft_PositiveAt70PctAcceptance( ) {
48- let baseTPS = 5.0 // tok/s for SSD streaming alone
49- let draftTPS = 73.0 // tok/s for a 4B draft model in RAM
5049 let fanOutPenalty = 2.0 // 2× I/O at 1 draft token
51- let acceptRate = 0.70 // typical for same-family models
50+ let acceptRate = 0.70 // typical for same-family models
5251
53- // Net effective TPS with draft (simplified model):
54- // Each round: draft generates 1 token fast, main verifies 2 positions.
55- // If accepted: 1 extra token at draft speed per round.
56- // Cost: main model verify at base_tps / fan_out_penalty.
57- let effectiveVerifyTPS = baseTPS / fanOutPenalty
58- let netTPS = effectiveVerifyTPS + acceptRate * ( draftTPS / draftTPS)
52+ // Reframe the assertion around the auto-cap arithmetic directly:
53+ // break-even acceptance_rate = 1 / verify_positions = 1 / fanOutPenalty.
54+ let breakEvenAcceptanceRate = 1.0 / fanOutPenalty
5955
60- XCTAssertGreaterThan ( netTPS, effectiveVerifyTPS,
61- " At 70% acceptance + 1 draft token, net TPS must exceed un-assisted verify TPS " )
56+ XCTAssertEqual ( breakEvenAcceptanceRate, 0.50 , accuracy: 0.000_001 ,
57+ " At 1 draft token, 2 verify positions imply a 50% break-even acceptance threshold " )
58+ XCTAssertGreaterThan ( acceptRate, breakEvenAcceptanceRate,
59+ " At 70% acceptance + 1 draft token, acceptance is above the capped 2-position break-even threshold " )
6260 }
6361
6462 /// Auto-cap logic: numDraftTokens > 1 when SSD + draft → should be capped to 1.
@@ -125,13 +123,13 @@ final class SSDDraftStrategyTests: XCTestCase {
125123 /// This is the exact reporter scenario: 35B main (20.4 GB) + 4B draft (3.0 GB).
126124 func testMemoryLimit_TightCap_Issue72ReporterScenario( ) {
127125 let physicalRAM = Int ( 16.0 * Double( gb) )
128- let mainBytes = Int ( 20.4 * 1e9 )
129- let draftBytes = Int ( 3.0 * 1e9 )
126+ let mainBytes = Int ( 20.4 * Double ( gb ) )
127+ let draftBytes = Int ( 3.0 * Double ( gb ) )
130128 let combined = mainBytes + draftBytes
131- let threshold = Int ( Double ( physicalRAM) * 0.70 ) // 11.2 GB
129+ let threshold = Int ( Double ( physicalRAM) * 0.70 ) // 11.2 GiB
132130
133131 XCTAssertGreaterThan ( combined, threshold,
134- " Reporter scenario: 23.4 GB combined must exceed 70% of 16 GB physical RAM " )
132+ " Reporter scenario: 23.4 GiB combined must exceed 70% of 16 GiB physical RAM " )
135133
136134 let tightCap = Int ( Double ( physicalRAM) * 1.1 ) // ~17.6 GB
137135 let sentinel = 200 * gb
@@ -140,21 +138,21 @@ final class SSDDraftStrategyTests: XCTestCase {
140138 let hasDraftBytes = draftBytes > 0
141139 let limit = ( combined > threshold && hasDraftBytes) ? tightCap : sentinel
142140 XCTAssertEqual ( limit, tightCap,
143- " 16 GB + combined 23.4 GB : tight cap (~17 GB ) must be chosen over 200 GB sentinel " )
141+ " 16 GiB + combined 23.4 GiB : tight cap (~17.6 GiB ) must be chosen over 200 GiB sentinel " )
144142 XCTAssertLessThan ( limit, 20 * gb,
145143 " Tight cap must be well below 20 GB to force MLX eviction over swap " )
146144 }
147145
148146 /// On a 64 GB machine the 200 GB sentinel is preserved — benchmark hardware unaffected.
149147 func testMemoryLimit_Sentinel_PreservedOn64GB( ) {
150148 let physicalRAM = Int ( 64.0 * Double( gb) )
151- let mainBytes = Int ( 20.4 * 1e9 )
152- let draftBytes = Int ( 3.0 * 1e9 )
149+ let mainBytes = Int ( 20.4 * Double ( gb ) )
150+ let draftBytes = Int ( 3.0 * Double ( gb ) )
153151 let combined = mainBytes + draftBytes
154- let threshold = Int ( Double ( physicalRAM) * 0.70 ) // 44.8 GB
152+ let threshold = Int ( Double ( physicalRAM) * 0.70 ) // 44.8 GiB
155153
156154 XCTAssertLessThan ( combined, threshold,
157- " 64 GB machine: 23.4 GB combined fits within 70% threshold — sentinel should apply " )
155+ " 64 GiB machine: 23.4 GiB combined fits within 70% threshold — sentinel should apply " )
158156
159157 let tightCap = Int ( Double ( physicalRAM) * 1.1 )
160158 let sentinel = 200 * gb
@@ -167,7 +165,7 @@ final class SSDDraftStrategyTests: XCTestCase {
167165 /// Solo SSD streaming (no draft): sentinel always used, warm path always active.
168166 func testMemoryLimit_Sentinel_SoloSSDStreaming( ) {
169167 let physicalRAM = Int ( 16.0 * Double( gb) )
170- let mainBytes = Int ( 20.4 * 1e9 )
168+ let mainBytes = Int ( 20.4 * Double ( gb ) )
171169 let draftBytes = 0 // no draft model
172170 let combined = mainBytes + draftBytes
173171 let threshold = Int ( Double ( physicalRAM) * 0.70 )
0 commit comments