Skip to content

Commit ee04e12

Browse files
authored
Update LexCHA.cpp
1 parent b745b74 commit ee04e12

1 file changed

Lines changed: 4 additions & 10 deletions

File tree

FullPermutation/LexCHA.cpp

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,7 @@
22
* Official Implementation of LexCHA Indexing Algorithms
33
* Author: Yusheng Hu
44
* Research: A Divide-and-Conquer Engine for Lexicographical Permutations
5-
* Repository: https://github.com/Yusheng-Hu/Position-Pure-Algorithm
6-
* * Note: Manual permutation logic is used in precomputation to prevent
7-
* GCC -Wstringop-overflow warnings during aggressive optimization.
5+
* * Note: Added <cstdint> to resolve compilation errors regarding uint8_t.
86
*/
97

108
#include <iostream>
@@ -13,6 +11,7 @@
1311
#include <immintrin.h>
1412
#include <cstring>
1513
#include <algorithm>
14+
#include <cstdint> // Required for uint8_t
1615

1716
// ── Architecture Configuration ───────────────────────────────────────
1817
constexpr int TAIL_DEPTH = 5;
@@ -22,7 +21,7 @@ constexpr int XMM_LANES = 16;
2221
// Align LUT to 16-byte boundary for SIMD compatibility
2322
alignas(16) uint8_t flat_lut_N5[FLAT_STEPS][XMM_LANES];
2423

25-
// ── 1. Precompute: Manual permutation to avoid iterator-based warnings ──
24+
// ── 1. Precompute: Manual permutation ────────────────────────────────
2625
void next_perm_manual(uint8_t* p, int n) {
2726
int i = n - 1;
2827
while (i > 0 && p[i - 1] >= p[i]) i--;
@@ -46,20 +45,18 @@ void precompute_only_flat_lut_N5() {
4645

4746
next_perm_manual(P, TAIL_DEPTH);
4847

49-
// Ensure all lanes are initialized to prevent garbage data
5048
std::memset(flat_lut_N5[step], 0, XMM_LANES);
5149
for (int i = 0; i < TAIL_DEPTH; ++i) {
5250
flat_lut_N5[step][i] = M[P[i]];
5351
}
5452
}
5553
}
5654

57-
// ── 2. Accelerated engine: SIMD blind ops + boundary skip ──
55+
// ── 2. Accelerated engine ──────────────────────────────────────────
5856
unsigned long long benchmark_accelerated(int N) {
5957
std::vector<int> D(N);
6058
for(int i = 0; i < N; ++i) D[i] = i;
6159

62-
// Aligned buffer to ensure safe memory access for SIMD instructions
6360
alignas(16) uint8_t buffer[32] = {0};
6461
std::memcpy(buffer, &D[N - TAIL_DEPTH], TAIL_DEPTH * sizeof(int));
6562
__m128i p_reg = _mm_load_si128((__m128i*)buffer);
@@ -69,18 +66,15 @@ unsigned long long benchmark_accelerated(int N) {
6966
for(int i = 1; i <= N; ++i) max_perms *= i;
7067

7168
while (total_count < max_perms) {
72-
// [SIMD path]: Execute 119 rapid state transitions
7369
for (int step = 0; step < FLAT_STEPS; ++step) {
7470
__m128i mask = _mm_load_si128((__m128i*)flat_lut_N5[step]);
7571
p_reg = _mm_shuffle_epi8(p_reg, mask);
7672
}
7773
total_count += FLAT_STEPS;
7874

79-
// [Sync]: Write back to memory
8075
_mm_store_si128((__m128i*)buffer, p_reg);
8176
std::memcpy(&D[N - TAIL_DEPTH], buffer, TAIL_DEPTH * sizeof(int));
8277

83-
// Handle block boundary with standard library permutation
8478
if (std::next_permutation(D.begin(), D.end())) {
8579
total_count++;
8680
std::memcpy(buffer, &D[N - TAIL_DEPTH], TAIL_DEPTH * sizeof(int));

0 commit comments

Comments
 (0)