Skip to content

Commit 0f48a94

Browse files
committed
EIGEN FIX: use helper functions for intrinsincs when gcc < 10.2.0
1 parent adcbb23 commit 0f48a94

File tree

3 files changed

+101
-11
lines changed

3 files changed

+101
-11
lines changed

lib/eigen_5.0.1/Eigen/src/Core/arch/AVX/TypeCasting.h

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,40 @@ struct type_casting_traits<int64_t, double> : vectorized_type_casting_traits<int
5656
#endif
5757
#endif
5858

59+
EIGEN_STRONG_INLINE __m256 eigen_mm256_set_m128(__m128 hi, __m128 lo) {
60+
#if EIGEN_COMP_GNUC && !EIGEN_COMP_CLANG && EIGEN_COMP_GNUC < 800
61+
__m256 result = _mm256_castps128_ps256(lo);
62+
return _mm256_insertf128_ps(result, hi, 1);
63+
#else
64+
return _mm256_set_m128(hi, lo);
65+
#endif
66+
}
67+
68+
EIGEN_STRONG_INLINE __m256d eigen_mm256_set_m128d(__m128d hi, __m128d lo) {
69+
#if EIGEN_COMP_GNUC && !EIGEN_COMP_CLANG && EIGEN_COMP_GNUC < 800
70+
__m256d result = _mm256_castpd128_pd256(lo);
71+
return _mm256_insertf128_pd(result, hi, 1);
72+
#else
73+
return _mm256_set_m128d(hi, lo);
74+
#endif
75+
}
76+
77+
EIGEN_STRONG_INLINE __m256i eigen_mm256_set_m128i(__m128i hi, __m128i lo) {
78+
#if EIGEN_COMP_GNUC && !EIGEN_COMP_CLANG && EIGEN_COMP_GNUC < 800
79+
#if defined(EIGEN_VECTORIZE_AVX2)
80+
__m256i result = _mm256_castsi128_si256(lo);
81+
return _mm256_inserti128_si256(result, hi, 1);
82+
#else
83+
EIGEN_ALIGN32 int32_t tmp[8];
84+
_mm_storeu_si128(reinterpret_cast<__m128i*>(tmp), lo);
85+
_mm_storeu_si128(reinterpret_cast<__m128i*>(tmp + 4), hi);
86+
return _mm256_loadu_si256(reinterpret_cast<const __m256i*>(tmp));
87+
#endif
88+
#else
89+
return _mm256_set_m128i(hi, lo);
90+
#endif
91+
}
92+
5993
template <>
6094
EIGEN_STRONG_INLINE Packet16b pcast<Packet8f, Packet16b>(const Packet8f& a, const Packet8f& b) {
6195
__m256 nonzero_a = _mm256_cmp_ps(a, pzero(a), _CMP_NEQ_UQ);
@@ -109,7 +143,7 @@ EIGEN_STRONG_INLINE Packet8i pcast<Packet8f, Packet8i>(const Packet8f& a) {
109143

110144
template <>
111145
EIGEN_STRONG_INLINE Packet8i pcast<Packet4d, Packet8i>(const Packet4d& a, const Packet4d& b) {
112-
return _mm256_set_m128i(_mm256_cvttpd_epi32(b), _mm256_cvttpd_epi32(a));
146+
return eigen_mm256_set_m128i(_mm256_cvttpd_epi32(b), _mm256_cvttpd_epi32(a));
113147
}
114148

115149
template <>
@@ -124,7 +158,7 @@ EIGEN_STRONG_INLINE Packet8f pcast<Packet8i, Packet8f>(const Packet8i& a) {
124158

125159
template <>
126160
EIGEN_STRONG_INLINE Packet8f pcast<Packet4d, Packet8f>(const Packet4d& a, const Packet4d& b) {
127-
return _mm256_set_m128(_mm256_cvtpd_ps(b), _mm256_cvtpd_ps(a));
161+
return eigen_mm256_set_m128(_mm256_cvtpd_ps(b), _mm256_cvtpd_ps(a));
128162
}
129163

130164
template <>
@@ -249,7 +283,7 @@ EIGEN_STRONG_INLINE Packet4d pcast<Packet4l, Packet4d>(const Packet4l& a) {
249283

250284
template <>
251285
EIGEN_STRONG_INLINE Packet4d pcast<Packet2l, Packet4d>(const Packet2l& a, const Packet2l& b) {
252-
return _mm256_set_m128d((pcast<Packet2l, Packet2d>(b)), (pcast<Packet2l, Packet2d>(a)));
286+
return eigen_mm256_set_m128d((pcast<Packet2l, Packet2d>(b)), (pcast<Packet2l, Packet2d>(a)));
253287
}
254288

255289
template <>

lib/eigen_5.0.1/Eigen/src/Core/arch/AVX512/PacketMath.h

Lines changed: 55 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,54 @@ typedef eigen_packet_wrapper<__m512i, 6> Packet32s;
4444
typedef eigen_packet_wrapper<__m256i, 6> Packet16s;
4545
typedef eigen_packet_wrapper<__m128i, 6> Packet8s;
4646

47+
EIGEN_STRONG_INLINE __m512i eigen_mm512_loadu_epi32(const int* from) {
48+
#if EIGEN_COMP_GNUC && !EIGEN_COMP_CLANG && EIGEN_COMP_GNUC < 800
49+
return _mm512_loadu_si512(reinterpret_cast<const void*>(from));
50+
#else
51+
return _mm512_loadu_epi32(from);
52+
#endif
53+
}
54+
55+
EIGEN_STRONG_INLINE __m512i eigen_mm512_loadu_epi64(const int64_t* from) {
56+
#if EIGEN_COMP_GNUC && !EIGEN_COMP_CLANG && EIGEN_COMP_GNUC < 800
57+
return _mm512_loadu_si512(reinterpret_cast<const void*>(from));
58+
#else
59+
return _mm512_loadu_epi64(from);
60+
#endif
61+
}
62+
63+
EIGEN_STRONG_INLINE void eigen_mm512_storeu_epi32(void* to, const __m512i& from) {
64+
#if EIGEN_COMP_GNUC && !EIGEN_COMP_CLANG && EIGEN_COMP_GNUC < 800
65+
_mm512_storeu_si512(to, from);
66+
#else
67+
_mm512_storeu_epi32(to, from);
68+
#endif
69+
}
70+
71+
EIGEN_STRONG_INLINE void eigen_mm512_storeu_epi64(void* to, const __m512i& from) {
72+
#if EIGEN_COMP_GNUC && !EIGEN_COMP_CLANG && EIGEN_COMP_GNUC < 800
73+
_mm512_storeu_si512(to, from);
74+
#else
75+
_mm512_storeu_epi64(to, from);
76+
#endif
77+
}
78+
79+
EIGEN_STRONG_INLINE void eigen_mm256_storeu_epi32(void* to, const __m256i& from) {
80+
#if EIGEN_COMP_GNUC && !EIGEN_COMP_CLANG && EIGEN_COMP_GNUC < 800
81+
_mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from);
82+
#else
83+
_mm256_storeu_epi32(to, from);
84+
#endif
85+
}
86+
87+
EIGEN_STRONG_INLINE void eigen_mm_storeu_epi32(void* to, const __m128i& from) {
88+
#if EIGEN_COMP_GNUC && !EIGEN_COMP_CLANG && EIGEN_COMP_GNUC < 800
89+
_mm_storeu_si128(reinterpret_cast<__m128i*>(to), from);
90+
#else
91+
_mm_storeu_epi32(to, from);
92+
#endif
93+
}
94+
4795
template <>
4896
struct is_arithmetic<__m512> {
4997
enum { value = true };
@@ -1033,11 +1081,11 @@ EIGEN_STRONG_INLINE Packet8d ploadu<Packet8d>(const double* from) {
10331081
}
10341082
template <>
10351083
EIGEN_STRONG_INLINE Packet16i ploadu<Packet16i>(const int* from) {
1036-
EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_epi32(from);
1084+
EIGEN_DEBUG_UNALIGNED_LOAD return eigen_mm512_loadu_epi32(from);
10371085
}
10381086
template <>
10391087
EIGEN_STRONG_INLINE Packet8l ploadu<Packet8l>(const int64_t* from) {
1040-
EIGEN_DEBUG_UNALIGNED_LOAD return _mm512_loadu_epi64(from);
1088+
EIGEN_DEBUG_UNALIGNED_LOAD return eigen_mm512_loadu_epi64(from);
10411089
}
10421090

10431091
template <>
@@ -1158,11 +1206,11 @@ EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet8d& from) {
11581206
}
11591207
template <>
11601208
EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet16i& from) {
1161-
EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_epi32(to, from);
1209+
EIGEN_DEBUG_UNALIGNED_STORE eigen_mm512_storeu_epi32(to, from);
11621210
}
11631211
template <>
11641212
EIGEN_STRONG_INLINE void pstoreu<int64_t>(int64_t* to, const Packet8l& from) {
1165-
EIGEN_DEBUG_UNALIGNED_STORE _mm512_storeu_epi64(to, from);
1213+
EIGEN_DEBUG_UNALIGNED_STORE eigen_mm512_storeu_epi64(to, from);
11661214
}
11671215
template <>
11681216
EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet16f& from, uint16_t umask) {
@@ -2997,19 +3045,19 @@ EIGEN_STRONG_INLINE void pstore<numext::int16_t, Packet8s>(numext::int16_t* out,
29973045
template <>
29983046
EIGEN_STRONG_INLINE void pstoreu<numext::int16_t, Packet32s>(numext::int16_t* out, const Packet32s& x) {
29993047
EIGEN_DEBUG_UNALIGNED_STORE
3000-
_mm512_storeu_epi32(out, x);
3048+
eigen_mm512_storeu_epi32(out, x);
30013049
}
30023050

30033051
template <>
30043052
EIGEN_STRONG_INLINE void pstoreu<numext::int16_t, Packet16s>(numext::int16_t* out, const Packet16s& x) {
30053053
EIGEN_DEBUG_UNALIGNED_STORE
3006-
_mm256_storeu_epi32(out, x);
3054+
eigen_mm256_storeu_epi32(out, x);
30073055
}
30083056

30093057
template <>
30103058
EIGEN_STRONG_INLINE void pstoreu<numext::int16_t, Packet8s>(numext::int16_t* out, const Packet8s& x) {
30113059
EIGEN_DEBUG_UNALIGNED_STORE
3012-
_mm_storeu_epi32(out, x);
3060+
eigen_mm_storeu_epi32(out, x);
30133061
}
30143062

30153063
template <>

lib/eigen_5.0.1/Eigen/src/Core/arch/AVX512/TypeCasting.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,17 @@ struct type_casting_traits<bfloat16, float> : vectorized_type_casting_traits<bfl
5252
template <>
5353
struct type_casting_traits<float, bfloat16> : vectorized_type_casting_traits<float, bfloat16> {};
5454

55+
EIGEN_STRONG_INLINE __mmask16 eigen_mm512_cmpneq_ps_mask(__m512 a, __m512 b) {
56+
#if EIGEN_COMP_GNUC && !EIGEN_COMP_CLANG && EIGEN_COMP_GNUC < 800
57+
return _mm512_cmp_ps_mask(a, b, _CMP_NEQ_UQ);
58+
#else
59+
return _mm512_cmpneq_ps_mask(a, b);
60+
#endif
61+
}
62+
5563
template <>
5664
EIGEN_STRONG_INLINE Packet16b pcast<Packet16f, Packet16b>(const Packet16f& a) {
57-
__mmask16 mask = _mm512_cmpneq_ps_mask(a, pzero(a));
65+
__mmask16 mask = eigen_mm512_cmpneq_ps_mask(a, pzero(a));
5866
return _mm512_maskz_cvtepi32_epi8(mask, _mm512_set1_epi32(1));
5967
}
6068

0 commit comments

Comments
 (0)