|
1 | 1 | /* Blake2s.c -- BLAKE2sp Hash |
2 | | -2024-01-29 : Igor Pavlov : Public domain |
| 2 | +2024-05-18 : Igor Pavlov : Public domain |
3 | 3 | 2015-2019 : Samuel Neves : original code : CC0 1.0 Universal (CC0 1.0). */ |
4 | 4 |
|
5 | 5 | #include "Precomp.h" |
|
12 | 12 | #include "Compiler.h" |
13 | 13 | #include "CpuArch.h" |
14 | 14 |
|
| 15 | +/* |
| 16 | + if defined(__AVX512F__) && defined(__AVX512VL__) |
| 17 | + { |
| 18 | + we define Z7_BLAKE2S_USE_AVX512_ALWAYS, |
| 19 | + but the compiler can use avx512 for any code. |
| 20 | + } |
| 21 | + else if defined(Z7_BLAKE2S_USE_AVX512_ALWAYS) |
| 22 | + { we use avx512 only for sse* and avx* branches of code. } |
| 23 | +*/ |
| 24 | +// #define Z7_BLAKE2S_USE_AVX512_ALWAYS // for debug |
| 25 | + |
15 | 26 | #if defined(__SSE2__) |
16 | 27 | #define Z7_BLAKE2S_USE_VECTORS |
17 | 28 | #elif defined(MY_CPU_X86_OR_AMD64) |
|
59 | 70 | #endif // SSSE3 |
60 | 71 |
|
61 | 72 | #if defined(__GNUC__) || defined(__clang__) |
| 73 | +#if defined(Z7_BLAKE2S_USE_AVX512_ALWAYS) && !(defined(__AVX512F__) && defined(__AVX512VL__)) |
| 74 | + #define BLAKE2S_ATTRIB_128BIT __attribute__((__target__("avx512vl,avx512f"))) |
| 75 | +#else |
62 | 76 | #if defined(Z7_BLAKE2S_USE_SSE41) |
63 | 77 | #define BLAKE2S_ATTRIB_128BIT __attribute__((__target__("sse4.1"))) |
64 | 78 | #elif defined(Z7_BLAKE2S_USE_SSSE3) |
|
67 | 81 | #define BLAKE2S_ATTRIB_128BIT __attribute__((__target__("sse2"))) |
68 | 82 | #endif |
69 | 83 | #endif |
| 84 | +#endif |
70 | 85 |
|
71 | 86 |
|
72 | 87 | #if defined(__AVX2__) |
|
77 | 92 | || defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30100) |
78 | 93 | #define Z7_BLAKE2S_USE_AVX2 |
79 | 94 | #ifdef Z7_BLAKE2S_USE_AVX2 |
| 95 | +#if defined(Z7_BLAKE2S_USE_AVX512_ALWAYS) && !(defined(__AVX512F__) && defined(__AVX512VL__)) |
| 96 | + #define BLAKE2S_ATTRIB_AVX2 __attribute__((__target__("avx512vl,avx512f"))) |
| 97 | +#else |
80 | 98 | #define BLAKE2S_ATTRIB_AVX2 __attribute__((__target__("avx2"))) |
| 99 | +#endif |
81 | 100 | #endif |
82 | 101 | #elif defined(Z7_MSC_VER_ORIGINAL) && (Z7_MSC_VER_ORIGINAL >= 1800) \ |
83 | 102 | || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1400) |
|
107 | 126 |
|
108 | 127 | #if defined(__AVX512F__) && defined(__AVX512VL__) |
109 | 128 | // && defined(Z7_MSC_VER_ORIGINAL) && (Z7_MSC_VER_ORIGINAL > 1930) |
| 129 | + #ifndef Z7_BLAKE2S_USE_AVX512_ALWAYS |
110 | 130 | #define Z7_BLAKE2S_USE_AVX512_ALWAYS |
| 131 | + #endif |
111 | 132 | // #pragma message ("=== Blake2s AVX512") |
112 | 133 | #endif |
113 | 134 |
|
@@ -1164,7 +1185,9 @@ Blake2sp_Final_V128_Fast(UInt32 *states) |
1164 | 1185 | #if 1 && defined(Z7_BLAKE2S_USE_AVX512_ALWAYS) |
1165 | 1186 | #define MM256_ROR_EPI32 _mm256_ror_epi32 |
1166 | 1187 | #define Z7_MM256_ROR_EPI32_IS_SUPPORTED |
| 1188 | +#ifdef Z7_BLAKE2S_USE_AVX2_WAY2 |
1167 | 1189 | #define LOAD_ROTATE_CONSTS_256 |
| 1190 | +#endif |
1168 | 1191 | #else |
1169 | 1192 | #ifdef Z7_BLAKE2S_USE_AVX2_WAY_SLOW |
1170 | 1193 | #ifdef Z7_BLAKE2S_USE_AVX2_WAY2 |
@@ -2549,9 +2572,11 @@ void z7_Black2sp_Prepare(void) |
2549 | 2572 |
|
2550 | 2573 | #if defined(MY_CPU_X86_OR_AMD64) |
2551 | 2574 | #if defined(Z7_BLAKE2S_USE_AVX512_ALWAYS) |
| 2575 | + // optional check |
| 2576 | + #if 0 || !(defined(__AVX512F__) && defined(__AVX512VL__)) |
2552 | 2577 | if (CPU_IsSupported_AVX512F_AVX512VL()) |
2553 | | - #endif |
2554 | | - #if defined(Z7_BLAKE2S_USE_SSE41) |
| 2578 | + #endif |
| 2579 | + #elif defined(Z7_BLAKE2S_USE_SSE41) |
2555 | 2580 | if (CPU_IsSupported_SSE41()) |
2556 | 2581 | #elif defined(Z7_BLAKE2S_USE_SSSE3) |
2557 | 2582 | if (CPU_IsSupported_SSSE3()) |
@@ -2584,12 +2609,14 @@ void z7_Black2sp_Prepare(void) |
2584 | 2609 |
|
2585 | 2610 | #ifdef Z7_BLAKE2S_USE_AVX2 |
2586 | 2611 | #if defined(MY_CPU_X86_OR_AMD64) |
2587 | | - if ( |
2588 | | - #if 0 && defined(Z7_BLAKE2S_USE_AVX512_ALWAYS) |
2589 | | - CPU_IsSupported_AVX512F_AVX512VL() && |
| 2612 | + |
| 2613 | + #if defined(Z7_BLAKE2S_USE_AVX512_ALWAYS) |
| 2614 | + #if 0 |
| 2615 | + if (CPU_IsSupported_AVX512F_AVX512VL()) |
| 2616 | + #endif |
| 2617 | + #else |
| 2618 | + if (CPU_IsSupported_AVX2()) |
2590 | 2619 | #endif |
2591 | | - CPU_IsSupported_AVX2() |
2592 | | - ) |
2593 | 2620 | #endif |
2594 | 2621 | { |
2595 | 2622 | // #pragma message ("=== Blake2s AVX2") |
|
0 commit comments