66
77#include < chrono>
88#include < cmath>
9+ #include < algorithm>
910#include < cstdint>
1011#include < cstdlib>
1112#include < ctime>
1213#include < fstream>
14+ #include < functional>
1315#include < iomanip>
1416#include < iostream>
1517#include < string>
@@ -22,6 +24,12 @@ struct Options {
2224 std::uint64_t warmups = 5 ;
2325 std::uint64_t values = 20 ;
2426 std::uint64_t runs = 20 ;
27+ double min_time_sec = 0.0 ;
28+ // Safety cap for the calibration doubling loop. Set high enough that even
29+ // sub-nanosecond ops can reach typical --min-time targets (e.g. 100ms).
30+ // A warning is printed if calibration hits this cap before reaching min-time.
31+ std::uint64_t max_loops = 100000000 ;
32+ std::uint64_t calibrate_rounds = 3 ;
2533 std::string output_path;
2634 std::string benchmark_name;
2735};
@@ -46,6 +54,18 @@ inline Options parse_args(int argc, char** argv) {
4654 options.warmups = std::strtoull (argv[++i], nullptr , 10 );
4755 continue ;
4856 }
57+ if (arg == " --min-time" && i + 1 < argc) {
58+ options.min_time_sec = std::strtod (argv[++i], nullptr );
59+ continue ;
60+ }
61+ if (arg == " --max-loops" && i + 1 < argc) {
62+ options.max_loops = std::strtoull (argv[++i], nullptr , 10 );
63+ continue ;
64+ }
65+ if (arg == " --calibrate-rounds" && i + 1 < argc) {
66+ options.calibrate_rounds = std::strtoull (argv[++i], nullptr , 10 );
67+ continue ;
68+ }
4969 if (arg == " --values" && i + 1 < argc) {
5070 options.values = std::strtoull (argv[++i], nullptr , 10 );
5171 continue ;
@@ -68,6 +88,9 @@ inline Options parse_args(int argc, char** argv) {
6888 << " --warmups N Warmup values per run (default: 5)\n "
6989 << " --values N Timed values per run (default: 20)\n "
7090 << " --runs N Number of runs (default: 20)\n "
91+ << " --min-time S Calibrate loops to reach S seconds per value\n "
92+ << " --max-loops N Safety cap for calibration loop count (default: 100000000)\n "
93+ << " --calibrate-rounds N Calibration passes (default: 3)\n "
7194 << " -o, --output F Write pyperf-compatible JSON to file\n "
7295 << " --name S Benchmark name (overrides default)\n " ;
7396 std::exit (0 );
@@ -93,6 +116,82 @@ inline std::string iso_now() {
93116 return std::string (buf);
94117}
95118
119+ // Calibrate loop count to hit a minimum wall time per value.
120+ // Returns the chosen loop count. If `capped_out` is non-null, it is set to
121+ // true when calibration reached `max_loops` before hitting `min_time_sec`
122+ // (meaning --min-time was NOT actually satisfied by the calibration).
123+ template <typename Fn>
124+ std::uint64_t calibrate_loops (
125+ const Options& options,
126+ Fn&& fn,
127+ const std::function<void ()>& post_calibrate = {},
128+ bool * capped_out = nullptr ,
129+ double * last_elapsed_out = nullptr
130+ ) {
131+ if (options.min_time_sec <= 0.0 ) {
132+ if (capped_out) *capped_out = false ;
133+ if (last_elapsed_out) *last_elapsed_out = 0.0 ;
134+ return options.loops ;
135+ }
136+
137+ // Allow callers (e.g. the explicit-loop overload) to request a minimum
138+ // starting loop count via options.loops.
139+ const std::uint64_t start_loops = std::max<std::uint64_t >(1 , options.loops );
140+ const std::uint64_t max_loops = std::max<std::uint64_t >(start_loops, options.max_loops );
141+ const std::uint64_t rounds = std::max<std::uint64_t >(1 , options.calibrate_rounds );
142+
143+ // Track the round that produced the best (largest) loop count so the
144+ // returned loop count, capped flag, and last-elapsed time all describe
145+ // the same round.
146+ std::uint64_t best_loops = 0 ;
147+ bool best_capped = false ;
148+ double best_elapsed = 0.0 ;
149+
150+ for (std::uint64_t round = 0 ; round < rounds; ++round) {
151+ std::uint64_t loops = start_loops;
152+ bool round_capped = false ;
153+ double elapsed = 0.0 ;
154+
155+ while (true ) {
156+ const auto t0 = std::chrono::steady_clock::now ();
157+ for (std::uint64_t i = 0 ; i < loops; ++i) {
158+ fn ();
159+ }
160+ const auto t1 = std::chrono::steady_clock::now ();
161+ elapsed = std::chrono::duration<double >(t1 - t0).count ();
162+
163+ // Drain any state left behind by this probe (e.g. queued async
164+ // work on a persistent stream) before the next probe, the next
165+ // round, or the first measured warmup/value runs.
166+ if (post_calibrate) {
167+ post_calibrate ();
168+ }
169+ if (elapsed >= options.min_time_sec ) {
170+ break ;
171+ }
172+ if (loops >= max_loops) {
173+ round_capped = true ;
174+ break ;
175+ }
176+ if (loops > max_loops / 2 ) {
177+ loops = max_loops;
178+ } else {
179+ loops *= 2 ;
180+ }
181+ }
182+
183+ if (loops >= best_loops) {
184+ best_loops = loops;
185+ best_capped = round_capped;
186+ best_elapsed = elapsed;
187+ }
188+ }
189+
190+ if (capped_out) *capped_out = best_capped;
191+ if (last_elapsed_out) *last_elapsed_out = best_elapsed;
192+ return best_loops;
193+ }
194+
96195// Run a benchmark function. The function signature is: void fn() — one call = one operation.
97196// The harness calls fn() in a tight loop `loops` times per value.
98197template <typename Fn>
@@ -235,22 +334,57 @@ class BenchmarkSuite {
235334public:
236335 explicit BenchmarkSuite (Options options) : options_(std::move(options)) {}
237336
337+ // Post-calibration hook. If set, invoked between calibration probes so
338+ // async benchmarks can drain state left behind by each probe before the
339+ // next one runs. The final probe leaves the benchmark in a drained state
340+ // before the first measured warmup/value. Can be overridden per-call via
341+ // the `post_calibrate` parameter on `run()`.
342+ void set_post_calibrate (std::function<void ()> hook) {
343+ post_calibrate_ = std::move (hook);
344+ }
345+
238346 // Run a benchmark and record it. The name is used as the benchmark ID.
347+ // If --min-time is set, loop count is auto-calibrated. `post_calibrate`,
348+ // if provided, runs between calibration probes to reset async state.
239349 template <typename Fn>
240- void run (const std::string& name, Fn&& fn) {
241- auto results = run_benchmark (options_, std::forward<Fn>(fn));
350+ void run (
351+ const std::string& name,
352+ Fn&& fn,
353+ std::function<void ()> post_calibrate = {}
354+ ) {
355+ std::uint64_t loops = options_.loops ;
356+ Options custom = options_;
357+ if (options_.min_time_sec > 0.0 ) {
358+ loops = calibrate_and_warn (name, options_, fn, select_post_calibrate (post_calibrate));
359+ custom.loops = loops;
360+ }
361+ auto results = run_benchmark (custom, std::forward<Fn>(fn));
242362 print_summary (name, results);
243- entries_.push_back ({name, options_. loops , std::move (results)});
363+ entries_.push_back ({name, loops, std::move (results)});
244364 }
245365
246- // Run a benchmark with a custom loop count (for slow operations like compilation).
366+ // Run a benchmark with a custom loop count (used as a floor for fast ops
367+ // or a fixed count for slow ops like compilation). When --min-time is set,
368+ // calibration still runs but starts from `loops_override` as the minimum.
247369 template <typename Fn>
248- void run (const std::string& name, std::uint64_t loops_override, Fn&& fn) {
370+ void run (
371+ const std::string& name,
372+ std::uint64_t loops_override,
373+ Fn&& fn,
374+ std::function<void ()> post_calibrate = {}
375+ ) {
376+ std::uint64_t loops = loops_override;
249377 Options custom = options_;
250378 custom.loops = loops_override;
379+ if (options_.min_time_sec > 0.0 ) {
380+ Options calib_opts = options_;
381+ calib_opts.loops = loops_override; // floor
382+ loops = calibrate_and_warn (name, calib_opts, fn, select_post_calibrate (post_calibrate));
383+ custom.loops = loops;
384+ }
251385 auto results = run_benchmark (custom, std::forward<Fn>(fn));
252386 print_summary (name, results);
253- entries_.push_back ({name, loops_override , std::move (results)});
387+ entries_.push_back ({name, loops , std::move (results)});
254388 }
255389
256390 // Write all collected benchmarks to the output file (if -o was given).
@@ -263,6 +397,36 @@ class BenchmarkSuite {
263397private:
264398 Options options_;
265399 std::vector<BenchmarkEntry> entries_;
400+ std::function<void ()> post_calibrate_;
401+
402+ std::function<void ()> select_post_calibrate (const std::function<void ()>& per_call) const {
403+ if (per_call) {
404+ return per_call;
405+ }
406+ return post_calibrate_;
407+ }
408+
409+ template <typename Fn>
410+ std::uint64_t calibrate_and_warn (
411+ const std::string& name,
412+ const Options& calib_opts,
413+ Fn&& fn,
414+ const std::function<void ()>& post_calibrate
415+ ) const {
416+ bool capped = false ;
417+ double last_elapsed = 0.0 ;
418+ std::uint64_t loops = calibrate_loops (
419+ calib_opts, std::forward<Fn>(fn), post_calibrate, &capped, &last_elapsed
420+ );
421+ if (capped) {
422+ std::cerr << " WARNING: " << name
423+ << " : calibration hit --max-loops (" << calib_opts.max_loops
424+ << " ) before reaching --min-time (" << calib_opts.min_time_sec
425+ << " s). Last sample: " << last_elapsed
426+ << " s. Raise --max-loops to satisfy --min-time for this benchmark.\n " ;
427+ }
428+ return loops;
429+ }
266430
267431 static void write_multi_pyperf_json (
268432 const std::string& output_path,
0 commit comments