Update LexCHA.cpp

Yusheng-Hu · web-flow · commit ee04e123efaf · 2026-06-10T14:10:11.000+08:00
diff --git a/FullPermutation/LexCHA.cpp b/FullPermutation/LexCHA.cpp
@@ -2,9 +2,7 @@
  * Official Implementation of LexCHA Indexing Algorithms
  * Author: Yusheng Hu
  * Research: A Divide-and-Conquer Engine for Lexicographical Permutations
- * Repository: https://github.com/Yusheng-Hu/Position-Pure-Algorithm
- * * Note: Manual permutation logic is used in precomputation to prevent 
- * GCC -Wstringop-overflow warnings during aggressive optimization.
+ * * Note: Added <cstdint> to resolve compilation errors regarding uint8_t.
  */
 
 #include <iostream>
@@ -13,6 +11,7 @@
 #include <immintrin.h>
 #include <cstring>
 #include <algorithm>
+#include <cstdint> // Required for uint8_t
 
 // ── Architecture Configuration ───────────────────────────────────────
 constexpr int TAIL_DEPTH = 5;
@@ -22,7 +21,7 @@ constexpr int XMM_LANES = 16;
 // Align LUT to 16-byte boundary for SIMD compatibility
 alignas(16) uint8_t flat_lut_N5[FLAT_STEPS][XMM_LANES];
 
-// ── 1. Precompute: Manual permutation to avoid iterator-based warnings ──
+// ── 1. Precompute: Manual permutation ────────────────────────────────
 void next_perm_manual(uint8_t* p, int n) {
     int i = n - 1;
     while (i > 0 && p[i - 1] >= p[i]) i--;
@@ -46,20 +45,18 @@ void precompute_only_flat_lut_N5() {
         
         next_perm_manual(P, TAIL_DEPTH);
         
-        // Ensure all lanes are initialized to prevent garbage data
         std::memset(flat_lut_N5[step], 0, XMM_LANES);
         for (int i = 0; i < TAIL_DEPTH; ++i) {
             flat_lut_N5[step][i] = M[P[i]];
         }
     }
 }
 
-// ── 2. Accelerated engine: SIMD blind ops + boundary skip ──
+// ── 2. Accelerated engine ──────────────────────────────────────────
 unsigned long long benchmark_accelerated(int N) {
     std::vector<int> D(N);
     for(int i = 0; i < N; ++i) D[i] = i;
     
-    // Aligned buffer to ensure safe memory access for SIMD instructions
     alignas(16) uint8_t buffer[32] = {0}; 
     std::memcpy(buffer, &D[N - TAIL_DEPTH], TAIL_DEPTH * sizeof(int));
     __m128i p_reg = _mm_load_si128((__m128i*)buffer);
@@ -69,18 +66,15 @@ unsigned long long benchmark_accelerated(int N) {
     for(int i = 1; i <= N; ++i) max_perms *= i;
 
     while (total_count < max_perms) {
-        // [SIMD path]: Execute 119 rapid state transitions
         for (int step = 0; step < FLAT_STEPS; ++step) {
             __m128i mask = _mm_load_si128((__m128i*)flat_lut_N5[step]);
             p_reg = _mm_shuffle_epi8(p_reg, mask);
         }
         total_count += FLAT_STEPS;
 
-        // [Sync]: Write back to memory
         _mm_store_si128((__m128i*)buffer, p_reg);
         std::memcpy(&D[N - TAIL_DEPTH], buffer, TAIL_DEPTH * sizeof(int));
         
-        // Handle block boundary with standard library permutation
         if (std::next_permutation(D.begin(), D.end())) {
             total_count++;
             std::memcpy(buffer, &D[N - TAIL_DEPTH], TAIL_DEPTH * sizeof(int));