Skip to content

Commit b2a4543

Browse files
[V4] Benchmark suite 1/N (#105)
* strassen * skynet * scan * quicksort * primes * nqueens * matmul * mandel * knapsack * integrate * heat * cmake * warnings * format * include fixed * doc benchmarks * move * sub folder * order tweaks * some stubs * order * nameing * icons * icon * common benchmark utils * tidy up * simplify * refactor * tmp * notes * full refactor * clean ups * tidy * dry * comments
1 parent 3d896e9 commit b2a4543

63 files changed

Lines changed: 2418 additions & 301 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

benchmark/lib/CMakeLists.txt

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,21 @@ target_sources(benchmark_common
99
FILE_SET HEADERS
1010
BASE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}
1111
FILES
12+
bench.hpp
1213
fib.hpp
1314
fold.hpp
14-
uts.hpp
15+
heat.hpp
16+
integrate.hpp
17+
knapsack.hpp
1518
macros.hpp
19+
mandelbrot.hpp
20+
matmul.hpp
21+
nqueens.hpp
22+
primes.hpp
23+
quicksort.hpp
24+
scan.hpp
25+
skynet.hpp
26+
uts.hpp
1627
)
1728

1829
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/../external/uts external/uts)

benchmark/lib/bench.hpp

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
#pragma once
2+
3+
#include <benchmark/benchmark.h>
4+
5+
#ifdef LF_BENCH_NO_IMPORT_STD
6+
#include <cstdint>
7+
#include <format>
8+
#include <functional>
9+
#else
10+
import std;
11+
#endif
12+
13+
namespace lf_bench {
14+
15+
inline constexpr std::int64_t no_threads = 0;
16+
17+
inline auto inverse_complexity(benchmark::IterationCount n) -> double { return 1.0 / static_cast<double>(n); }
18+
19+
inline void report_threads(benchmark::State &state, std::int64_t threads) {
20+
if (threads == no_threads) {
21+
return;
22+
}
23+
24+
state.counters["p"] = static_cast<double>(threads);
25+
state.SetComplexityN(static_cast<benchmark::IterationCount>(threads));
26+
}
27+
28+
// `bench` reports mismatches with a `std::format` call that formats both
29+
// `result` and `expected`, so `Expected` and `std::invoke_result_t<Fn>` must be
30+
// formattable.
31+
template <typename Expected, typename Check, typename Fn>
32+
void bench(benchmark::State &state, std::int64_t threads, const Expected &expected, Check check, Fn fn) {
33+
report_threads(state, threads);
34+
35+
for (auto _ : state) {
36+
auto result = std::invoke(fn);
37+
38+
if (!std::invoke(check, result, expected)) {
39+
state.SkipWithError(std::format("incorrect result: {} != {}", result, expected));
40+
break;
41+
}
42+
43+
benchmark::DoNotOptimize(result);
44+
}
45+
}
46+
47+
template <typename Expected, typename Fn>
48+
void bench(benchmark::State &state, std::int64_t threads, const Expected &expected, Fn fn) {
49+
bench(state, threads, expected, std::equal_to<>{}, fn);
50+
}
51+
52+
template <typename Expected, typename Check, typename Fn>
53+
void bench(benchmark::State &state, const Expected &expected, Check check, Fn fn) {
54+
bench(state, no_threads, expected, check, fn);
55+
}
56+
57+
template <typename Expected, typename Fn>
58+
void bench(benchmark::State &state, const Expected &expected, Fn fn) {
59+
bench(state, no_threads, expected, fn);
60+
}
61+
62+
} // namespace lf_bench

benchmark/lib/fib.hpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
11
#pragma once
22

3+
#include <benchmark/benchmark.h>
4+
5+
#include "bench.hpp"
6+
37
#ifdef LF_BENCH_NO_IMPORT_STD
48
#include <cstdint>
9+
#include <functional>
510
#else
611
import std;
712
#endif
@@ -29,3 +34,20 @@ constexpr auto fib_ref(std::int64_t n) -> std::int64_t {
2934

3035
return curr;
3136
}
37+
38+
template <typename Fn>
39+
void run_fib(benchmark::State &state, std::int64_t threads, Fn fn) {
40+
std::int64_t n = state.range(0);
41+
std::int64_t expect = fib_ref(n);
42+
43+
state.counters["n"] = static_cast<double>(n);
44+
45+
lf_bench::bench(state, threads, expect, [n, fn]() -> std::int64_t {
46+
return std::invoke(fn, n);
47+
});
48+
}
49+
50+
template <typename Fn>
51+
void run_fib(benchmark::State &state, Fn fn) {
52+
run_fib(state, lf_bench::no_threads, fn);
53+
}

benchmark/lib/fold.hpp

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,11 @@
88
#include <concepts>
99
#include <cstddef>
1010
#include <cstdint>
11-
#include <format>
1211
#include <functional>
1312
#include <new>
1413
#include <ranges>
1514
#include <span>
1615
#include <type_traits>
17-
#include <utility>
1816
#include <vector>
1917
#else
2018
import std;
@@ -63,29 +61,30 @@ auto fold_result_is_correct(fold_accum_t<T> result, fold_accum_t<T> expect) -> b
6361
}
6462

6563
template <fold_data_mode Data, typename T, typename Fn>
66-
void run_fold_input(benchmark::State &state, Fn &&fn) {
67-
64+
void run_fold_input(benchmark::State &state, std::int64_t threads, Fn fn) {
6865
auto n = static_cast<std::size_t>(state.range(0));
6966
auto expect = expected_fold_result<T>(n);
7067

71-
auto bench = [&](auto range) -> void {
72-
for (auto _ : state) {
73-
if (auto result = std::invoke(fn, range); !fold_result_is_correct<T>(result, expect)) {
74-
state.SkipWithError(std::format("incorrect result: {} != {}", result, expect));
75-
break;
76-
}
77-
}
68+
auto run = [&](auto const &range) -> void {
69+
lf_bench::bench(state, threads, expect, fold_result_is_correct<T>, [&]() -> fold_accum_t<T> {
70+
return std::invoke(fn, range);
71+
});
7872
};
7973

8074
if constexpr (Data == fold_data_mode::memory) {
81-
bench(make_fold_range<T>(n) | std::ranges::to<std::vector<T>>());
75+
run(make_fold_range<T>(n) | std::ranges::to<std::vector<T>>());
8276
} else {
83-
bench(make_fold_range<T>(n));
77+
run(make_fold_range<T>(n));
8478
}
8579

8680
state.SetItemsProcessed(state.iterations() * static_cast<std::int64_t>(n));
8781
}
8882

83+
template <fold_data_mode Data, typename T, typename Fn>
84+
void run_fold_input(benchmark::State &state, Fn fn) {
85+
run_fold_input<Data, T>(state, lf_bench::no_threads, fn);
86+
}
87+
8988
// Use alias for shorted names.
9089
inline constexpr auto memory = fold_data_mode::memory;
9190
inline constexpr auto lazy = fold_data_mode::lazy;
@@ -98,13 +97,13 @@ inline constexpr auto async_proj = fold_projection_mode::async;
9897
using int32 = std::int32_t;
9998
using float32 = float;
10099

101-
#define LF_FOLD_BENCH_SIZES_SMALL(bench_fn, category, name, ...) \
100+
#define LF_FOLD_BENCH_SIZES_SMALL(bench_fn, category, name, ...) \
102101
BENCH_ONE(bench_fn, category, name, test, fold __VA_OPT__(, ) __VA_ARGS__) \
103102
BENCH_ONE(bench_fn, category, name, base, fold_1024 __VA_OPT__(, ) __VA_ARGS__) \
104-
BENCH_ONE(bench_fn, category, name, base, fold_1024_sq __VA_OPT__(, ) __VA_ARGS__) \
103+
BENCH_ONE(bench_fn, category, name, base, fold_1024_sq __VA_OPT__(, ) __VA_ARGS__)
105104

106105
#define LF_FOLD_BENCH_SIZES(bench_fn, category, name, ...) \
107-
LF_FOLD_BENCH_SIZES_SMALL(bench_fn, category, name __VA_OPT__(, ) __VA_ARGS__) \
106+
LF_FOLD_BENCH_SIZES_SMALL(bench_fn, category, name __VA_OPT__(, ) __VA_ARGS__) \
108107
BENCH_ONE(bench_fn, category, name, base, fold_1024_cu __VA_OPT__(, ) __VA_ARGS__)
109108

110109
#define LF_FOLD_BENCH_SIZES_MT(bench_fn, category, name, ...) \

benchmark/lib/heat.hpp

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
#pragma once
2+
3+
#include "bench.hpp"
4+
5+
#ifdef LF_BENCH_NO_IMPORT_STD
6+
#include <cmath>
7+
#include <cstddef>
8+
#include <functional>
9+
#include <utility>
10+
#include <vector>
11+
#else
12+
import std;
13+
#endif
14+
15+
inline constexpr std::size_t heat_test = 64;
16+
inline constexpr std::size_t heat_base = 1024;
17+
18+
inline constexpr std::size_t heat_iters = 16;
19+
20+
// Initialise grid with a fixed analytic profile (boundaries clamped).
21+
inline auto heat_make_grid(std::size_t n) -> std::vector<double> {
22+
std::vector<double> g(n * n);
23+
for (std::size_t y = 0; y < n; ++y) {
24+
for (std::size_t x = 0; x < n; ++x) {
25+
double dx = static_cast<double>(x) / static_cast<double>(n - 1) - 0.5;
26+
double dy = static_cast<double>(y) / static_cast<double>(n - 1) - 0.5;
27+
g[y * n + x] = std::exp(-8.0 * (dx * dx + dy * dy));
28+
}
29+
}
30+
return g;
31+
}
32+
33+
inline auto heat_matches(std::vector<double> const &actual, std::vector<double> const &expected) -> bool {
34+
for (std::size_t i = 0; i < actual.size(); ++i) {
35+
if (std::abs(actual[i] - expected[i]) > 1e-12) {
36+
return false;
37+
}
38+
}
39+
return true;
40+
}
41+
42+
inline void heat_jacobi_step(double const *src, double *dst, std::size_t n) {
43+
for (std::size_t y = 1; y < n - 1; ++y) {
44+
for (std::size_t x = 1; x < n - 1; ++x) {
45+
std::size_t i = y * n + x;
46+
dst[i] = 0.25 * (src[i - 1] + src[i + 1] + src[i - n] + src[i + n]);
47+
}
48+
}
49+
for (std::size_t x = 0; x < n; ++x) {
50+
dst[x] = src[x];
51+
dst[(n - 1) * n + x] = src[(n - 1) * n + x];
52+
}
53+
for (std::size_t y = 0; y < n; ++y) {
54+
dst[y * n] = src[y * n];
55+
dst[y * n + (n - 1)] = src[y * n + (n - 1)];
56+
}
57+
}
58+
59+
inline auto
60+
heat_reference(std::vector<double> initial, std::size_t n, std::size_t iters) -> std::vector<double> {
61+
std::vector<double> scratch(initial.size());
62+
double *src = initial.data();
63+
double *dst = scratch.data();
64+
65+
for (std::size_t t = 0; t < iters; ++t) {
66+
heat_jacobi_step(src, dst, n);
67+
std::swap(src, dst);
68+
}
69+
70+
if (src == initial.data()) {
71+
return initial;
72+
}
73+
return scratch;
74+
}
75+
76+
template <typename Fn>
77+
void run_heat(benchmark::State &state, Fn fn) {
78+
auto n = static_cast<std::size_t>(state.range(0));
79+
state.counters["n"] = static_cast<double>(n);
80+
state.counters["iters"] = static_cast<double>(heat_iters);
81+
82+
std::vector<double> initial = heat_make_grid(n);
83+
std::vector<double> a(initial.size());
84+
std::vector<double> b(initial.size());
85+
std::vector<double> reference = heat_reference(initial, n, heat_iters);
86+
87+
lf_bench::bench(state, true, [&]() -> bool {
88+
a = initial;
89+
std::invoke(fn, a.data(), b.data(), n, heat_iters);
90+
benchmark::DoNotOptimize(a.data());
91+
return heat_matches((heat_iters % 2 == 0) ? a : b, reference);
92+
});
93+
}

benchmark/lib/integrate.hpp

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#pragma once
2+
3+
#include "bench.hpp"
4+
5+
#ifdef LF_BENCH_NO_IMPORT_STD
6+
#include <cmath>
7+
#include <cstdint>
8+
#include <functional>
9+
#else
10+
import std;
11+
#endif
12+
13+
inline constexpr std::int64_t integrate_test = 100;
14+
inline constexpr std::int64_t integrate_base = 10'000;
15+
16+
inline constexpr double integrate_epsilon = 1.0e-9;
17+
18+
inline constexpr auto integrate_fn(double x) -> double { return (x * x + 1.0) * x; }
19+
20+
inline constexpr auto integrate_exact(double a, double b) -> double {
21+
auto indefinite = [](double x) {
22+
return 0.25 * x * x * (x * x + 2);
23+
};
24+
return indefinite(b) - indefinite(a);
25+
}
26+
27+
inline auto integrate_is_close(double result, double expect) -> bool {
28+
return std::abs(result - expect) <= 1e-3 * std::abs(expect);
29+
}
30+
31+
template <typename Fn>
32+
void run_integrate(benchmark::State &state, Fn fn) {
33+
std::int64_t n = state.range(0);
34+
double upper = static_cast<double>(n);
35+
double expect = integrate_exact(0, upper);
36+
37+
state.counters["n"] = static_cast<double>(n);
38+
39+
lf_bench::bench(state, expect, integrate_is_close, [upper, fn]() -> double {
40+
return std::invoke(fn, upper);
41+
});
42+
}

0 commit comments

Comments
 (0)