@@ -14,17 +14,10 @@ local WARMUP_CAP = 1000
1414local SETTLE_SECONDS = 2
1515local SAMPLE_INTERVAL = 1
1616
17- --[[
18- Iter count for CPU benches is auto-tuned: probe the operation's
19- per-call cost, then pick a count that runs for ~MEASURE_BUDGET
20- seconds. This makes timings comparable across tests of wildly
21- different cost (a 50ns bool encode vs a 50us 100x-entity encode)
22- without hand-tuned divisors at every call site.
23- ]]
24- local PROBE_ITERS = 256
25- local MEASURE_BUDGET = 0.05 -- seconds
26- local MIN_ITERS = 1000
27- local MAX_ITERS = 1000000
17+ -- Fixed iter count for every CPU bench. Picked so cheap operations still
18+ -- get enough samples for stable timings and expensive ones don't drag
19+ -- the run out — no per-test tuning, no probe, the column is always 1000.
20+ local ITERS = 1000
2821
2922-- Column-aligned output formats so every row lines up regardless of
3023-- label or value content.
@@ -54,31 +47,6 @@ local function warmupFor(iterations: number): number
5447 return minN (WARMUP_CAP , iterations // WARMUP_FRACTION )
5548end
5649
57- --[[
58- Run `fn` PROBE_ITERS times, measure how long it took, and return the
59- iter count that fits MEASURE_BUDGET seconds (clamped to a sane range).
60- Avoids the random `// 2`, `// 10`, `// 50` divisors at call sites:
61- every test gets a similarly-sized budget so timings are comparable.
62- ]]
63- local function pickIterCount (fn : () -> ()): number
64- local t0 = clock ()
65- for _ = 1 , PROBE_ITERS do
66- fn ()
67- end
68- local elapsed = clock () - t0
69- if elapsed <= 0 then
70- return MAX_ITERS
71- end
72- local target = floor (PROBE_ITERS * MEASURE_BUDGET / elapsed )
73- if target < MIN_ITERS then
74- return MIN_ITERS
75- end
76- if target > MAX_ITERS then
77- return MAX_ITERS
78- end
79- return target
80- end
81-
8250local function percentile (sorted : { number }, p : number ): number
8351 local idx = floor (# sorted * p / 100 + 0.5 )
8452 return sorted [maxN (1 , minN (idx , # sorted ))]
@@ -153,18 +121,14 @@ function Harness.note(text: string): ()
153121 print (format ("%s%s" , INDENT , text ))
154122end
155123
156- --[[
157- Encode-only throughput. Iter count is auto-picked from a probe so
158- every test runs for roughly MEASURE_BUDGET seconds.
159- ]]
124+ -- Encode-only throughput. ITERS iterations every test, no auto-tuning.
160125function Harness .benchEncode (label : string , fn : () -> ()): ()
161- local iterations = pickIterCount (fn )
162- for _ = 1 , warmupFor (iterations ) do
126+ for _ = 1 , warmupFor (ITERS ) do
163127 fn ()
164128 end
165129
166130 local t0 = clock ()
167- for _ = 1 , iterations do
131+ for _ = 1 , ITERS do
168132 fn ()
169133 end
170134 local elapsed = clock () - t0
@@ -174,9 +138,9 @@ function Harness.benchEncode(label: string, fn: () -> ()): ()
174138 FMT_ROW_ENC ,
175139 INDENT ,
176140 label ,
177- fmtNum (iterations / elapsed ),
178- fmtTime (elapsed / iterations * 1e6 ),
179- fmtNum (iterations )
141+ fmtNum (ITERS / elapsed ),
142+ fmtTime (elapsed / ITERS * 1e6 ),
143+ fmtNum (ITERS )
180144 )
181145 )
182146end
@@ -253,11 +217,7 @@ function Harness.benchDelta(
253217 )
254218end
255219
256- --[[
257- Full encode + decode round-trip. Iter count is auto-picked from a
258- probe of the combined enc+dec cycle so every test runs for roughly
259- MEASURE_BUDGET seconds.
260- ]]
220+ -- Full encode + decode round-trip. ITERS iterations every test.
261221function Harness .benchRoundTrip (label : string , codec : any , value : any ): ()
262222 local ch = Channel .create ()
263223 codec .write (ch , value )
@@ -267,32 +227,27 @@ function Harness.benchRoundTrip(label: string, codec: any, value: any): ()
267227 buffer.copy (buf , 0 , ch .buff , 0 , written )
268228 local refs = if ch .refCount > 0 then table.clone (ch .refs ) else nil
269229
270- local function cycle (): ()
230+ for _ = 1 , warmupFor ( ITERS ) do
271231 ch .cursor = 0
272232 codec .write (ch , value )
273233 codec .read (buf , 0 , refs )
274234 end
275235
276- local iterations = pickIterCount (cycle )
277- for _ = 1 , warmupFor (iterations ) do
278- cycle ()
279- end
280-
281236 local t0 = clock ()
282- for _ = 1 , iterations do
237+ for _ = 1 , ITERS do
283238 ch .cursor = 0
284239 codec .write (ch , value )
285240 end
286241 local encodeTime = clock () - t0
287242
288243 local t1 = clock ()
289- for _ = 1 , iterations do
244+ for _ = 1 , ITERS do
290245 codec .read (buf , 0 , refs )
291246 end
292247 local decodeTime = clock () - t1
293248
294- local encUs = encodeTime / iterations * 1e6
295- local decUs = decodeTime / iterations * 1e6
249+ local encUs = encodeTime / ITERS * 1e6
250+ local decUs = decodeTime / ITERS * 1e6
296251
297252 print (
298253 format (
@@ -302,7 +257,7 @@ function Harness.benchRoundTrip(label: string, codec: any, value: any): ()
302257 fmtTime (encUs ),
303258 fmtTime (decUs ),
304259 fmtTime (encUs + decUs ),
305- fmtNum (iterations / (encodeTime + decodeTime ))
260+ fmtNum (ITERS / (encodeTime + decodeTime ))
306261 )
307262 )
308263end
0 commit comments