refactor: CMake build system, SIMD/concurrency examples, property-based tests, benchmark improvements

LessUp · LessUp · commit 5c0d202c556a · 2026-03-09T14:08:19.000+08:00
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -27,6 +27,7 @@ include(ExampleTemplate)
 option(HPC_BUILD_TESTS "Build tests" ON)
 option(HPC_BUILD_BENCHMARKS "Build benchmarks" ON)
 option(HPC_ENABLE_OPENMP "Enable OpenMP support" ON)
+option(HPC_VECTORIZE_REPORT "Enable compiler vectorization reports (noisy)" OFF)
 
 # Find OpenMP if enabled
 if(HPC_ENABLE_OPENMP)
diff --git a/build_errors.txt b/build_errors.txt
diff --git a/cmake/CompilerOptions.cmake b/cmake/CompilerOptions.cmake
@@ -67,16 +67,18 @@ function(hpc_set_compiler_options target)
             )
         endif()
         
-        # Enable vectorization reports
-        if(HPC_IS_GCC)
-            target_compile_options(${target} PRIVATE
-                $<$<CONFIG:Release>:-fopt-info-vec-optimized>
-            )
-        elseif(HPC_IS_CLANG)
-            target_compile_options(${target} PRIVATE
-                $<$<CONFIG:Release>:-Rpass=loop-vectorize>
-                $<$<CONFIG:Release>:-Rpass-missed=loop-vectorize>
-            )
+        # Enable vectorization reports (opt-in to avoid noisy builds)
+        if(HPC_VECTORIZE_REPORT)
+            if(HPC_IS_GCC)
+                target_compile_options(${target} PRIVATE
+                    $<$<CONFIG:Release>:-fopt-info-vec-optimized>
+                )
+            elseif(HPC_IS_CLANG)
+                target_compile_options(${target} PRIVATE
+                    $<$<CONFIG:Release>:-Rpass=loop-vectorize>
+                    $<$<CONFIG:Release>:-Rpass-missed=loop-vectorize>
+                )
+            endif()
         endif()
         
     elseif(HPC_IS_MSVC)
diff --git a/cmake/ExampleTemplate.cmake b/cmake/ExampleTemplate.cmake
@@ -69,6 +69,7 @@ function(hpc_add_example)
         add_executable(${bench_name} ${ARG_BENCHMARK_SOURCES})
         
         hpc_set_compiler_options(${bench_name})
+        hpc_enable_sanitizers(${bench_name})
         
         target_link_libraries(${bench_name} PRIVATE
             benchmark::benchmark
diff --git a/examples/02-memory-cache/src/prefetch.cpp b/examples/02-memory-cache/src/prefetch.cpp
@@ -12,6 +12,7 @@
  * - When prefetching helps (and when it doesn't)
  */
 
+#include "memory_utils.hpp"
 #include <chrono>
 #include <cstdint>
 #include <iostream>
@@ -20,32 +21,6 @@
 
 namespace hpc::memory {
 
-//------------------------------------------------------------------------------
-// Prefetch intrinsics
-//------------------------------------------------------------------------------
-
-/**
- * @brief Prefetch for read with high temporal locality
- */
-template<typename T>
-inline void prefetch_read(const T* ptr) {
-#if defined(__GNUC__) || defined(__clang__)
-    __builtin_prefetch(ptr, 0, 3);  // Read, high temporal locality
-#elif defined(_MSC_VER)
-    _mm_prefetch(reinterpret_cast<const char*>(ptr), _MM_HINT_T0);
-#endif
-}
-
-/**
- * @brief Prefetch for write
- */
-template<typename T>
-inline void prefetch_write(T* ptr) {
-#if defined(__GNUC__) || defined(__clang__)
-    __builtin_prefetch(ptr, 1, 3);  // Write, high temporal locality
-#endif
-}
-
 //------------------------------------------------------------------------------
 // Array traversal implementations
 //------------------------------------------------------------------------------
diff --git a/examples/03-modern-cpp/src/compile_time.cpp b/examples/03-modern-cpp/src/compile_time.cpp
@@ -15,6 +15,7 @@
 #include <array>
 #include <chrono>
 #include <cmath>
+#include <cstdint>
 #include <iostream>
 
 namespace hpc::compile_time {
@@ -104,9 +105,9 @@ double fast_sin(double angle) {
     constexpr double TWO_PI = 2.0 * PI;
     constexpr size_t TABLE_SIZE = SIN_TABLE.size();
     
-    // Normalize angle to [0, 2*PI)
-    while (angle < 0) angle += TWO_PI;
-    while (angle >= TWO_PI) angle -= TWO_PI;
+    // Normalize angle to [0, 2*PI) in O(1) using fmod
+    angle = std::fmod(angle, TWO_PI);
+    if (angle < 0) angle += TWO_PI;
     
     // Convert to table index
     size_t index = static_cast<size_t>((angle / TWO_PI) * TABLE_SIZE) % TABLE_SIZE;
diff --git a/examples/03-modern-cpp/src/ranges_vs_loops.cpp b/examples/03-modern-cpp/src/ranges_vs_loops.cpp
@@ -151,12 +151,14 @@ int64_t sum_algorithm(const std::vector<int>& input) {
 }
 
 /**
- * @brief Sum using ranges fold
+ * @brief Sum using ranges
+ *
+ * C++23 introduces std::ranges::fold_left for this purpose.
+ * In C++20 we iterate over a ranges::subrange to stay within the ranges API.
  */
 int64_t sum_ranges(const std::vector<int>& input) {
-    // C++23 has std::ranges::fold_left, for C++20 we use a workaround
     int64_t sum = 0;
-    for (int x : input) {
+    for (int x : std::ranges::subrange(input)) {
         sum += x;
     }
     return sum;
diff --git a/examples/04-simd-vectorization/bench/simd_bench.cpp b/examples/04-simd-vectorization/bench/simd_bench.cpp
@@ -9,8 +9,8 @@
  */
 
 #include <benchmark/benchmark.h>
-#include "../include/simd_utils.hpp"
-#include "../include/simd_wrapper.hpp"
+#include "simd_utils.hpp"
+#include "simd_wrapper.hpp"
 #include <vector>
 #include <random>
 #include <cmath>
diff --git a/examples/04-simd-vectorization/include/simd_utils.hpp b/examples/04-simd-vectorization/include/simd_utils.hpp
@@ -69,6 +69,10 @@ inline size_t get_simd_alignment() {
 
 /**
  * @brief Aligned memory allocator for SIMD operations
+ *
+ * Note: This is intentionally separate from memory_utils.hpp's AlignedAllocator
+ * to keep the SIMD module self-contained. This allocator uses runtime SIMD
+ * detection for alignment, whereas AlignedAllocator uses a compile-time parameter.
  */
 template<typename T>
 class aligned_allocator {
@@ -208,24 +212,4 @@ inline size_t simd_vector_width(SIMDLevel level) {
     }
 }
 
-/**
- * @brief Add two arrays using SIMD wrapper
- */
-void add_arrays_wrapped(const float* a, const float* b, float* c, size_t n);
-
-/**
- * @brief Compute dot product using SIMD wrapper
- */
-float dot_product_wrapped(const float* a, const float* b, size_t n);
-
-/**
- * @brief Scale array using SIMD wrapper
- */
-void scale_array_wrapped(float* arr, float scalar, size_t n);
-
-/**
- * @brief Clamp array values using SIMD wrapper
- */
-void clamp_array_wrapped(float* arr, float min_val, float max_val, size_t n);
-
 } // namespace hpc::simd
diff --git a/examples/05-concurrency/src/atomic_ordering.cpp b/examples/05-concurrency/src/atomic_ordering.cpp
@@ -11,7 +11,7 @@
  * 6. memory_order_seq_cst - Sequential consistency (default, strongest)
  */
 
-#include "../include/concurrency_utils.hpp"
+#include "concurrency_utils.hpp"
 #include <iostream>
 #include <cassert>
 #include <array>
diff --git a/examples/05-concurrency/src/lock_free_queue.cpp b/examples/05-concurrency/src/lock_free_queue.cpp
@@ -8,7 +8,7 @@
  * 3. Cache-friendly queue design
  */
 
-#include "../include/concurrency_utils.hpp"
+#include "concurrency_utils.hpp"
 #include <iostream>
 #include <vector>
 #include <optional>
diff --git a/examples/05-concurrency/src/openmp_basics.cpp b/examples/05-concurrency/src/openmp_basics.cpp
@@ -11,7 +11,7 @@
  * Compile with: -fopenmp (GCC/Clang) or /openmp (MSVC)
  */
 
-#include "../include/concurrency_utils.hpp"
+#include "concurrency_utils.hpp"
 #include <iostream>
 #include <vector>
 #include <numeric>
diff --git a/tests/property/benchmark_properties.cpp b/tests/property/benchmark_properties.cpp
@@ -15,7 +15,7 @@
 #include <filesystem>
 #include <regex>
 
-#include "../../benchmarks/common/benchmark_utils.hpp"
+#include "benchmark_utils.hpp"
 
 namespace {
 
diff --git a/tests/property/concurrency_properties.cpp b/tests/property/concurrency_properties.cpp
@@ -16,7 +16,7 @@
 #include <algorithm>
 #include <numeric>
 
-#include "../../examples/05-concurrency/include/concurrency_utils.hpp"
+#include "concurrency_utils.hpp"
 
 namespace {
 
diff --git a/tests/property/simd_properties.cpp b/tests/property/simd_properties.cpp
@@ -20,7 +20,7 @@
 #include <vector>
 
 // Include SIMD wrapper
-#include "../../examples/04-simd-vectorization/include/simd_wrapper.hpp"
+#include "simd_wrapper.hpp"
 
 namespace {