Skip to content

Commit b85d169

Browse files
Abseil Teamcopybara-github
authored andcommitted
Optimzie crc32 on AMD Milan+
We have AVX encoded vector PCLMULQDQ on Milan, so use it to make crc32c computations ~10% faster. We need to use inline asm, since building this twice with different complier flags for dynamic dispatch performed worse due to missing inlining. BM_Calculate/0 1.136n ± 0% 1.136n ± 1% ~ (p=0.968 n=6) BM_Calculate/1 1.420n ± 0% 1.421n ± 1% ~ (p=0.870 n=6) BM_Calculate/100 9.089n ± 0% 9.660n ± 1% +6.29% (p=0.002 n=6) BM_Calculate/2048 75.30n ± 1% 67.67n ± 1% -10.13% (p=0.002 n=6) BM_Calculate/10000 313.1n ± 0% 286.1n ± 0% -8.63% (p=0.002 n=6) BM_Calculate/500000 14.91µ ± 4% 13.49µ ± 1% -9.48% (p=0.002 n=6) BM_Extend/0 1.136n ± 1% 1.136n ± 1% ~ (p=0.636 n=6) BM_Extend/1 1.420n ± 0% 1.420n ± 1% ~ (p=0.636 n=6) BM_Extend/100 9.247n ± 2% 9.800n ± 2% +5.99% (p=0.002 n=6) BM_Extend/2048 75.73n ± 1% 67.37n ± 1% -11.04% (p=0.002 n=6) BM_Extend/10000 313.2n ± 1% 286.2n ± 0% -8.62% (p=0.002 n=6) BM_Extend/500000 14.87µ ± 1% 13.57µ ± 1% -8.74% (p=0.002 n=6) BM_Extend/100000000 3.185m ± 2% 2.816m ± 3% -11.60% (p=0.002 n=6) BM_ExtendCacheMiss/10 26.07m ± 1% 26.06m ± 1% ~ (p=1.000 n=6) BM_ExtendCacheMiss/100 13.86m ± 4% 14.36m ± 2% +3.61% (p=0.026 n=6) BM_ExtendCacheMiss/1000 27.02m ± 4% 27.28m ± 4% ~ (p=0.699 n=6) BM_ExtendCacheMiss/100000 5.114m ± 5% 4.600m ± 8% -10.07% (p=0.002 n=6) BM_ExtendByZeroes/1 1.420n ± 0% 1.420n ± 0% ~ (p=0.670 n=12) BM_ExtendByZeroes/10 1.704n ± 1% 1.704n ± 0% ~ (p=1.000 n=6) BM_ExtendByZeroes/100 3.128n ± 0% 3.128n ± 0% ~ (p=1.000 n=6) BM_ExtendByZeroes/1000 6.758n ± 0% 6.638n ± 1% -1.78% (p=0.002 n=6) BM_ExtendByZeroes/10000 6.619n ± 1% 6.503n ± 0% -1.75% (p=0.002 n=6) BM_ExtendByZeroes/100000 8.537n ± 1% 8.479n ± 0% -0.67% (p=0.019 n=6) BM_ExtendByZeroes/1000000 9.766n ± 1% 9.692n ± 1% -0.75% (p=0.002 n=6) PiperOrigin-RevId: 900897540 Change-Id: I57d8df2bf10690afc07009d61f8c4ea61e88ce50
1 parent 5f9d5bf commit b85d169

2 files changed

Lines changed: 80 additions & 182 deletions

File tree

absl/crc/internal/crc32_x86_arm_combined_simd.h

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
#ifndef ABSL_CRC_INTERNAL_CRC32_X86_ARM_COMBINED_SIMD_H_
1616
#define ABSL_CRC_INTERNAL_CRC32_X86_ARM_COMBINED_SIMD_H_
1717

18-
#include <array>
1918
#include <cstdint>
2019

2120
#include "absl/base/config.h"
@@ -66,13 +65,6 @@ using V128 = uint64x2_t;
6665
using V128 = __m128i;
6766
#endif
6867

69-
#if defined(__AVX__)
70-
using V256 = __m256i;
71-
#else
72-
// Placeholder for V256 when AVX is not available.
73-
using V256 = std::array<uint64_t, 4>;
74-
#endif
75-
7668
// Starting with the initial value in |crc|, accumulates a CRC32 value for
7769
// unsigned integers of different sizes.
7870
uint32_t CRC32_u8(uint32_t crc, uint8_t v);
@@ -127,17 +119,6 @@ int64_t V128_Low64(const V128 l);
127119
// Add packed 64-bit integers in |l| and |r|.
128120
V128 V128_Add64(const V128 l, const V128 r);
129121

130-
#if defined(__AVX__)
131-
inline V256 V256_LoadU(const V256* src);
132-
inline V256 V256_Broadcast128(const V128* src);
133-
#else
134-
template <typename T = V256>
135-
T V256_LoadU(const T* src);
136-
137-
template <typename T = V256>
138-
T V256_Broadcast128(const V128* src);
139-
#endif
140-
141122
#endif
142123

143124
#if defined(ABSL_CRC_INTERNAL_HAVE_X86_SIMD)
@@ -290,26 +271,6 @@ inline V128 V128_Add64(const V128 l, const V128 r) { return vaddq_u64(l, r); }
290271

291272
#endif
292273

293-
#if defined(__AVX__)
294-
inline V256 V256_LoadU(const V256* src) { return _mm256_loadu_si256(src); }
295-
296-
inline V256 V256_Broadcast128(const V128* src) {
297-
return _mm256_castps_si256(
298-
_mm256_broadcast_ps(reinterpret_cast<const __m128*>(src)));
299-
}
300-
#elif defined(ABSL_CRC_INTERNAL_HAVE_X86_SIMD) || \
301-
defined(ABSL_CRC_INTERNAL_HAVE_ARM_SIMD)
302-
template <typename T>
303-
inline T V256_LoadU(const T* src) {
304-
return T{};
305-
}
306-
307-
template <typename T>
308-
inline T V256_Broadcast128(const V128* src) {
309-
return T{};
310-
}
311-
#endif
312-
313274
} // namespace crc_internal
314275
ABSL_NAMESPACE_END
315276
} // namespace absl

0 commit comments

Comments
 (0)