Skip to content

Commit fb67d3c

Browse files
committed
refactor(arch): architecture deepening - eliminate duplication and shallow modules
Extract shared implementations and remove shallow abstractions: - Extract SPSCQueue and MPMCQueue to lock_free_queue.hpp (240 lines) - Eliminate ~90 lines of duplication in test file - Add 4 new MPMCQueue unit tests - Extract Buffer class to buffer.hpp (101 lines) - Eliminate ~45 lines of duplication in benchmark - Simplify benchmark_utils.hpp - Remove DoNotOptimize/ClobberMemory wrappers (use native Google Benchmark) - Remove BenchmarkResult/Suite and export_to_json (native JSON support) - Reduce ~150 lines of shallow code - Refactor CompilerOptions.cmake - Add _hpc_add_config_options helper - Reduce ~30 lines of repetition - Update docs config with mermaid and llms plugins Stats: +300 lines (shared headers), -285 lines (duplication), net -15 lines All 65 tests pass.
1 parent 5eaa504 commit fb67d3c

23 files changed

Lines changed: 3138 additions & 1823 deletions

File tree

benchmarks/common/benchmark_utils.hpp

Lines changed: 5 additions & 186 deletions
Original file line numberDiff line numberDiff line change
@@ -3,205 +3,21 @@
33
* @file benchmark_utils.hpp
44
* @brief Common utilities for benchmarking
55
*
6-
* Provides helpers for preventing compiler optimizations and
7-
* exporting benchmark results.
6+
* Provides helpers for formatting benchmark results and manual timing.
87
*
98
* Validates:
109
* - Requirement 1.1: Google Benchmark Integration
1110
* - Requirement 1.2: Parameterized Benchmarks
12-
* - Requirement 1.4: JSON Output Format
13-
* - Requirement 1.5: Comparison Charts
1411
*/
1512

1613
#include <benchmark/benchmark.h>
1714

1815
#include <chrono>
19-
#include <fstream>
20-
#include <iomanip>
21-
#include <map>
22-
#include <sstream>
23-
#include <stdexcept>
16+
#include <cstdio>
2417
#include <string>
25-
#include <vector>
2618

2719
namespace hpc::bench {
2820

29-
/**
30-
* @brief Prevent compiler from optimizing away a value
31-
*
32-
* This is a wrapper around Google Benchmark's DoNotOptimize.
33-
*/
34-
template <typename T>
35-
inline void DoNotOptimize(T&& value) {
36-
benchmark::DoNotOptimize(std::forward<T>(value));
37-
}
38-
39-
/**
40-
* @brief Force memory barrier
41-
*
42-
* Prevents compiler from reordering memory operations.
43-
*/
44-
inline void ClobberMemory() {
45-
benchmark::ClobberMemory();
46-
}
47-
48-
/**
49-
* @brief Benchmark result structure
50-
*/
51-
struct BenchmarkResult {
52-
std::string name;
53-
std::string module;
54-
int64_t iterations;
55-
double real_time_ns;
56-
double cpu_time_ns;
57-
double bytes_per_second;
58-
double items_per_second;
59-
std::map<std::string, double> counters;
60-
std::string timestamp;
61-
62-
BenchmarkResult() = default;
63-
64-
BenchmarkResult(const std::string& n, int64_t iter, double real_ns, double cpu_ns)
65-
: name(n),
66-
iterations(iter),
67-
real_time_ns(real_ns),
68-
cpu_time_ns(cpu_ns),
69-
bytes_per_second(0),
70-
items_per_second(0) {
71-
auto now = std::chrono::system_clock::now();
72-
auto time_t = std::chrono::system_clock::to_time_t(now);
73-
std::stringstream ss;
74-
ss << std::put_time(std::localtime(&time_t), "%Y-%m-%dT%H:%M:%S");
75-
timestamp = ss.str();
76-
}
77-
};
78-
79-
/**
80-
* @brief Benchmark suite containing multiple results
81-
*/
82-
struct BenchmarkSuite {
83-
std::string version;
84-
std::string compiler;
85-
std::string cpu_info;
86-
std::vector<BenchmarkResult> results;
87-
88-
BenchmarkSuite() : version("1.0.0") {}
89-
};
90-
91-
/**
92-
* @brief Validate benchmark result
93-
*/
94-
inline bool validate_result(const BenchmarkResult& result) {
95-
if (result.name.empty())
96-
return false;
97-
if (result.iterations <= 0)
98-
return false;
99-
if (result.real_time_ns <= 0)
100-
return false;
101-
if (result.cpu_time_ns <= 0)
102-
return false;
103-
return true;
104-
}
105-
106-
/**
107-
* @brief Export benchmark results to JSON
108-
*
109-
* Note: Google Benchmark already supports JSON output via --benchmark_out=file.json
110-
* This is a simplified custom exporter for demonstration.
111-
*/
112-
inline void export_to_json(const std::string& filename,
113-
const std::vector<BenchmarkResult>& results) {
114-
std::ofstream file(filename);
115-
if (!file.is_open()) {
116-
throw std::runtime_error("Cannot open file: " + filename);
117-
}
118-
119-
file << "{\n";
120-
if (results.empty()) {
121-
file << " \"benchmarks\": []\n";
122-
file << "}\n";
123-
return;
124-
}
125-
126-
file << " \"benchmarks\": [\n";
127-
128-
for (size_t i = 0; i < results.size(); ++i) {
129-
const auto& r = results[i];
130-
file << " {\n";
131-
file << " \"name\": \"" << r.name << "\",\n";
132-
if (!r.module.empty()) {
133-
file << " \"module\": \"" << r.module << "\",\n";
134-
}
135-
file << " \"iterations\": " << r.iterations << ",\n";
136-
file << " \"real_time\": " << std::fixed << std::setprecision(2) << r.real_time_ns
137-
<< ",\n";
138-
file << " \"cpu_time\": " << std::fixed << std::setprecision(2) << r.cpu_time_ns
139-
<< ",\n";
140-
file << " \"bytes_per_second\": " << r.bytes_per_second << ",\n";
141-
file << " \"items_per_second\": " << r.items_per_second;
142-
143-
if (!r.counters.empty()) {
144-
file << ",\n \"counters\": {\n";
145-
size_t counter_idx = 0;
146-
for (const auto& [key, value] : r.counters) {
147-
file << " \"" << key << "\": " << value;
148-
if (++counter_idx < r.counters.size())
149-
file << ",";
150-
file << "\n";
151-
}
152-
file << " }";
153-
}
154-
155-
if (!r.timestamp.empty()) {
156-
file << ",\n \"timestamp\": \"" << r.timestamp << "\"";
157-
}
158-
159-
file << "\n }";
160-
if (i < results.size() - 1)
161-
file << ",";
162-
file << "\n";
163-
}
164-
165-
file << " ]\n";
166-
file << "}\n";
167-
}
168-
169-
/**
170-
* @brief Export benchmark suite to JSON
171-
*/
172-
inline void export_suite_to_json(const std::string& filename, const BenchmarkSuite& suite) {
173-
std::ofstream file(filename);
174-
if (!file.is_open()) {
175-
throw std::runtime_error("Cannot open file: " + filename);
176-
}
177-
178-
file << "{\n";
179-
file << " \"version\": \"" << suite.version << "\",\n";
180-
if (!suite.compiler.empty()) {
181-
file << " \"compiler\": \"" << suite.compiler << "\",\n";
182-
}
183-
if (!suite.cpu_info.empty()) {
184-
file << " \"cpu_info\": \"" << suite.cpu_info << "\",\n";
185-
}
186-
file << " \"benchmarks\": [\n";
187-
188-
for (size_t i = 0; i < suite.results.size(); ++i) {
189-
const auto& r = suite.results[i];
190-
file << " {\n";
191-
file << " \"name\": \"" << r.name << "\",\n";
192-
file << " \"iterations\": " << r.iterations << ",\n";
193-
file << " \"real_time\": " << r.real_time_ns << ",\n";
194-
file << " \"cpu_time\": " << r.cpu_time_ns << "\n";
195-
file << " }";
196-
if (i < suite.results.size() - 1)
197-
file << ",";
198-
file << "\n";
199-
}
200-
201-
file << " ]\n";
202-
file << "}\n";
203-
}
204-
20521
/**
20622
* @brief Calculate speedup between two times
20723
*/
@@ -253,6 +69,9 @@ inline std::string format_time(double nanoseconds) {
25369

25470
/**
25571
* @brief Simple timer for manual benchmarking
72+
*
73+
* Note: For most benchmarks, prefer Google Benchmark's built-in timing
74+
* via the benchmark::State parameter.
25675
*/
25776
class Timer {
25877
public:

cmake/CompilerOptions.cmake

Lines changed: 43 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -15,90 +15,74 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
1515
set(HPC_IS_GCC TRUE)
1616
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
1717
set(HPC_IS_CLANG TRUE)
18-
# Detect Apple Clang (has different warning behavior)
1918
if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
2019
set(HPC_IS_APPLE_CLANG TRUE)
2120
endif()
2221
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
2322
set(HPC_IS_MSVC TRUE)
2423
endif()
2524

26-
# Detect ARM architecture (Apple Silicon, Raspberry Pi, etc.)
2725
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64|ARM")
2826
set(HPC_IS_ARM TRUE)
2927
endif()
3028

29+
#------------------------------------------------------------------------------
30+
# Helper: Add compile options for specific configurations
31+
#------------------------------------------------------------------------------
32+
function(_hpc_add_config_options target)
33+
cmake_parse_arguments(ARG "" "RELEASE;RELWITHDEBINFO;DEBUG" "" ${ARGN})
34+
35+
if(ARG_RELEASE)
36+
target_compile_options(${target} PRIVATE "$<$<CONFIG:Release>:${ARG_RELEASE}>")
37+
endif()
38+
if(ARG_RELWITHDEBINFO)
39+
target_compile_options(${target} PRIVATE "$<$<CONFIG:RelWithDebInfo>:${ARG_RELWITHDEBINFO}>")
40+
endif()
41+
if(ARG_DEBUG)
42+
target_compile_options(${target} PRIVATE "$<$<CONFIG:Debug>:${ARG_DEBUG}>")
43+
endif()
44+
endfunction()
45+
3146
#------------------------------------------------------------------------------
3247
# hpc_set_compiler_options(target)
3348
# Sets warning levels and optimization flags for a target
3449
#------------------------------------------------------------------------------
3550
function(hpc_set_compiler_options target)
36-
# Parse arguments
3751
cmake_parse_arguments(ARG "DISABLE_WARNINGS;ENABLE_FAST_MATH" "" "" ${ARGN})
3852

39-
# Warning flags
4053
if(NOT ARG_DISABLE_WARNINGS)
4154
if(HPC_IS_GCC OR HPC_IS_CLANG)
4255
target_compile_options(${target} PRIVATE
43-
-Wall
44-
-Wextra
45-
-Wpedantic
46-
-Wconversion
47-
-Wshadow
48-
-Wunused
49-
-Wnon-virtual-dtor
50-
-Wold-style-cast
51-
-Wcast-align
52-
-Woverloaded-virtual
53-
-Wformat=2
56+
-Wall -Wextra -Wpedantic -Wconversion -Wshadow -Wunused
57+
-Wnon-virtual-dtor -Wold-style-cast -Wcast-align
58+
-Woverloaded-virtual -Wformat=2
5459
)
55-
# Apple Clang has stricter sign-conversion warnings that trigger
56-
# in third-party libraries like RapidCheck. Suppress them.
5760
if(HPC_IS_APPLE_CLANG)
58-
target_compile_options(${target} PRIVATE
59-
-Wno-sign-conversion
60-
)
61+
target_compile_options(${target} PRIVATE -Wno-sign-conversion)
6162
endif()
6263
elseif(HPC_IS_MSVC)
63-
target_compile_options(${target} PRIVATE
64-
/W4
65-
/permissive-
66-
)
64+
target_compile_options(${target} PRIVATE /W4 /permissive-)
6765
endif()
6866
endif()
6967

70-
# Release optimization flags
7168
if(HPC_IS_ARM)
72-
# ARM uses -mcpu=native instead of -march=native
73-
target_compile_options(${target} PRIVATE
74-
$<$<CONFIG:Release>:-O3>
75-
$<$<CONFIG:Release>:-mcpu=native>
76-
$<$<CONFIG:RelWithDebInfo>:-O2>
77-
$<$<CONFIG:RelWithDebInfo>:-g>
69+
_hpc_add_config_options(${target}
70+
RELEASE "-O3;-mcpu=native"
71+
RELWITHDEBINFO "-O2;-g"
7872
)
7973
elseif(HPC_IS_GCC OR HPC_IS_CLANG)
80-
target_compile_options(${target} PRIVATE
81-
$<$<CONFIG:Release>:-O3>
82-
$<$<CONFIG:Release>:-march=native>
83-
$<$<CONFIG:Release>:-mtune=native>
84-
$<$<CONFIG:RelWithDebInfo>:-O2>
85-
$<$<CONFIG:RelWithDebInfo>:-g>
86-
$<$<CONFIG:RelWithDebInfo>:-march=native>
74+
_hpc_add_config_options(${target}
75+
RELEASE "-O3;-march=native;-mtune=native"
76+
RELWITHDEBINFO "-O2;-g;-march=native"
8777
)
8878

89-
# Fast math (use with caution - breaks IEEE compliance)
9079
if(ARG_ENABLE_FAST_MATH)
91-
target_compile_options(${target} PRIVATE
92-
$<$<CONFIG:Release>:-ffast-math>
93-
)
80+
_hpc_add_config_options(${target} RELEASE "-ffast-math")
9481
endif()
9582

96-
# Enable vectorization reports (opt-in to avoid noisy builds)
9783
if(HPC_VECTORIZE_REPORT)
9884
if(HPC_IS_GCC)
99-
target_compile_options(${target} PRIVATE
100-
$<$<CONFIG:Release>:-fopt-info-vec-optimized>
101-
)
85+
_hpc_add_config_options(${target} RELEASE "-fopt-info-vec-optimized")
10286
elseif(HPC_IS_CLANG)
10387
target_compile_options(${target} PRIVATE
10488
$<$<CONFIG:Release>:-Rpass=loop-vectorize>
@@ -108,17 +92,13 @@ function(hpc_set_compiler_options target)
10892
endif()
10993

11094
elseif(HPC_IS_MSVC)
111-
target_compile_options(${target} PRIVATE
112-
$<$<CONFIG:Release>:/O2>
113-
$<$<CONFIG:Release>:/arch:AVX2>
114-
$<$<CONFIG:RelWithDebInfo>:/O2>
115-
$<$<CONFIG:RelWithDebInfo>:/Zi>
95+
_hpc_add_config_options(${target}
96+
RELEASE "/O2;/arch:AVX2"
97+
RELWITHDEBINFO "/O2;/Zi"
11698
)
11799

118100
if(ARG_ENABLE_FAST_MATH)
119-
target_compile_options(${target} PRIVATE
120-
$<$<CONFIG:Release>:/fp:fast>
121-
)
101+
_hpc_add_config_options(${target} RELEASE "/fp:fast")
122102
endif()
123103
endif()
124104
endfunction()
@@ -131,25 +111,25 @@ function(hpc_enable_simd target)
131111
cmake_parse_arguments(ARG "SSE;AVX;AVX2;AVX512" "" "" ${ARGN})
132112

133113
if(HPC_IS_ARM)
134-
# ARM uses NEON instead of x86 SIMD
135114
if(HPC_IS_CLANG OR HPC_IS_GCC)
136-
# NEON is enabled by default on AArch64, but we can add march flag
137-
target_compile_options(${target} PRIVATE
138-
$<$<CONFIG:Release>:-mcpu=native>
139-
)
115+
_hpc_add_config_options(${target} RELEASE "-mcpu=native")
140116
endif()
141117
elseif(HPC_IS_GCC OR HPC_IS_CLANG)
118+
set(simd_flags "")
142119
if(ARG_SSE)
143-
target_compile_options(${target} PRIVATE -msse4.2)
120+
list(APPEND simd_flags -msse4.2)
144121
endif()
145122
if(ARG_AVX)
146-
target_compile_options(${target} PRIVATE -mavx)
123+
list(APPEND simd_flags -mavx)
147124
endif()
148125
if(ARG_AVX2)
149-
target_compile_options(${target} PRIVATE -mavx2 -mfma)
126+
list(APPEND simd_flags -mavx2 -mfma)
150127
endif()
151128
if(ARG_AVX512)
152-
target_compile_options(${target} PRIVATE -mavx512f -mavx512dq)
129+
list(APPEND simd_flags -mavx512f -mavx512dq)
130+
endif()
131+
if(simd_flags)
132+
target_compile_options(${target} PRIVATE ${simd_flags})
153133
endif()
154134
elseif(HPC_IS_MSVC)
155135
if(ARG_AVX)

0 commit comments

Comments
 (0)