Skip to content

Commit 5c0d202

Browse files
committed
refactor: CMake build system, SIMD/concurrency examples, property-based tests, benchmark improvements
1 parent af81a5f commit 5c0d202

15 files changed

Lines changed: 36 additions & 70 deletions

File tree

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ include(ExampleTemplate)
2727
option(HPC_BUILD_TESTS "Build tests" ON)
2828
option(HPC_BUILD_BENCHMARKS "Build benchmarks" ON)
2929
option(HPC_ENABLE_OPENMP "Enable OpenMP support" ON)
30+
option(HPC_VECTORIZE_REPORT "Enable compiler vectorization reports (noisy)" OFF)
3031

3132
# Find OpenMP if enabled
3233
if(HPC_ENABLE_OPENMP)

build_errors.txt

Whitespace-only changes.

cmake/CompilerOptions.cmake

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -67,16 +67,18 @@ function(hpc_set_compiler_options target)
6767
)
6868
endif()
6969

70-
# Enable vectorization reports
71-
if(HPC_IS_GCC)
72-
target_compile_options(${target} PRIVATE
73-
$<$<CONFIG:Release>:-fopt-info-vec-optimized>
74-
)
75-
elseif(HPC_IS_CLANG)
76-
target_compile_options(${target} PRIVATE
77-
$<$<CONFIG:Release>:-Rpass=loop-vectorize>
78-
$<$<CONFIG:Release>:-Rpass-missed=loop-vectorize>
79-
)
70+
# Enable vectorization reports (opt-in to avoid noisy builds)
71+
if(HPC_VECTORIZE_REPORT)
72+
if(HPC_IS_GCC)
73+
target_compile_options(${target} PRIVATE
74+
$<$<CONFIG:Release>:-fopt-info-vec-optimized>
75+
)
76+
elseif(HPC_IS_CLANG)
77+
target_compile_options(${target} PRIVATE
78+
$<$<CONFIG:Release>:-Rpass=loop-vectorize>
79+
$<$<CONFIG:Release>:-Rpass-missed=loop-vectorize>
80+
)
81+
endif()
8082
endif()
8183

8284
elseif(HPC_IS_MSVC)

cmake/ExampleTemplate.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ function(hpc_add_example)
6969
add_executable(${bench_name} ${ARG_BENCHMARK_SOURCES})
7070

7171
hpc_set_compiler_options(${bench_name})
72+
hpc_enable_sanitizers(${bench_name})
7273

7374
target_link_libraries(${bench_name} PRIVATE
7475
benchmark::benchmark

examples/02-memory-cache/src/prefetch.cpp

Lines changed: 1 addition & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
* - When prefetching helps (and when it doesn't)
1313
*/
1414

15+
#include "memory_utils.hpp"
1516
#include <chrono>
1617
#include <cstdint>
1718
#include <iostream>
@@ -20,32 +21,6 @@
2021

2122
namespace hpc::memory {
2223

23-
//------------------------------------------------------------------------------
24-
// Prefetch intrinsics
25-
//------------------------------------------------------------------------------
26-
27-
/**
28-
* @brief Prefetch for read with high temporal locality
29-
*/
30-
template<typename T>
31-
inline void prefetch_read(const T* ptr) {
32-
#if defined(__GNUC__) || defined(__clang__)
33-
__builtin_prefetch(ptr, 0, 3); // Read, high temporal locality
34-
#elif defined(_MSC_VER)
35-
_mm_prefetch(reinterpret_cast<const char*>(ptr), _MM_HINT_T0);
36-
#endif
37-
}
38-
39-
/**
40-
* @brief Prefetch for write
41-
*/
42-
template<typename T>
43-
inline void prefetch_write(T* ptr) {
44-
#if defined(__GNUC__) || defined(__clang__)
45-
__builtin_prefetch(ptr, 1, 3); // Write, high temporal locality
46-
#endif
47-
}
48-
4924
//------------------------------------------------------------------------------
5025
// Array traversal implementations
5126
//------------------------------------------------------------------------------

examples/03-modern-cpp/src/compile_time.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <array>
1616
#include <chrono>
1717
#include <cmath>
18+
#include <cstdint>
1819
#include <iostream>
1920

2021
namespace hpc::compile_time {
@@ -104,9 +105,9 @@ double fast_sin(double angle) {
104105
constexpr double TWO_PI = 2.0 * PI;
105106
constexpr size_t TABLE_SIZE = SIN_TABLE.size();
106107

107-
// Normalize angle to [0, 2*PI)
108-
while (angle < 0) angle += TWO_PI;
109-
while (angle >= TWO_PI) angle -= TWO_PI;
108+
// Normalize angle to [0, 2*PI) in O(1) using fmod
109+
angle = std::fmod(angle, TWO_PI);
110+
if (angle < 0) angle += TWO_PI;
110111

111112
// Convert to table index
112113
size_t index = static_cast<size_t>((angle / TWO_PI) * TABLE_SIZE) % TABLE_SIZE;

examples/03-modern-cpp/src/ranges_vs_loops.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -151,12 +151,14 @@ int64_t sum_algorithm(const std::vector<int>& input) {
151151
}
152152

153153
/**
154-
* @brief Sum using ranges fold
154+
* @brief Sum using ranges
155+
*
156+
* C++23 introduces std::ranges::fold_left for this purpose.
157+
* In C++20 we iterate over a ranges::subrange to stay within the ranges API.
155158
*/
156159
int64_t sum_ranges(const std::vector<int>& input) {
157-
// C++23 has std::ranges::fold_left, for C++20 we use a workaround
158160
int64_t sum = 0;
159-
for (int x : input) {
161+
for (int x : std::ranges::subrange(input)) {
160162
sum += x;
161163
}
162164
return sum;

examples/04-simd-vectorization/bench/simd_bench.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99
*/
1010

1111
#include <benchmark/benchmark.h>
12-
#include "../include/simd_utils.hpp"
13-
#include "../include/simd_wrapper.hpp"
12+
#include "simd_utils.hpp"
13+
#include "simd_wrapper.hpp"
1414
#include <vector>
1515
#include <random>
1616
#include <cmath>

examples/04-simd-vectorization/include/simd_utils.hpp

Lines changed: 4 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,10 @@ inline size_t get_simd_alignment() {
6969

7070
/**
7171
* @brief Aligned memory allocator for SIMD operations
72+
*
73+
* Note: This is intentionally separate from memory_utils.hpp's AlignedAllocator
74+
* to keep the SIMD module self-contained. This allocator uses runtime SIMD
75+
* detection for alignment, whereas AlignedAllocator uses a compile-time parameter.
7276
*/
7377
template<typename T>
7478
class aligned_allocator {
@@ -208,24 +212,4 @@ inline size_t simd_vector_width(SIMDLevel level) {
208212
}
209213
}
210214

211-
/**
212-
* @brief Add two arrays using SIMD wrapper
213-
*/
214-
void add_arrays_wrapped(const float* a, const float* b, float* c, size_t n);
215-
216-
/**
217-
* @brief Compute dot product using SIMD wrapper
218-
*/
219-
float dot_product_wrapped(const float* a, const float* b, size_t n);
220-
221-
/**
222-
* @brief Scale array using SIMD wrapper
223-
*/
224-
void scale_array_wrapped(float* arr, float scalar, size_t n);
225-
226-
/**
227-
* @brief Clamp array values using SIMD wrapper
228-
*/
229-
void clamp_array_wrapped(float* arr, float min_val, float max_val, size_t n);
230-
231215
} // namespace hpc::simd

examples/05-concurrency/src/atomic_ordering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
* 6. memory_order_seq_cst - Sequential consistency (default, strongest)
1212
*/
1313

14-
#include "../include/concurrency_utils.hpp"
14+
#include "concurrency_utils.hpp"
1515
#include <iostream>
1616
#include <cassert>
1717
#include <array>

0 commit comments

Comments
 (0)