@@ -37,21 +37,6 @@ namespace xsimd
3737
3838 namespace detail
3939 {
40- XSIMD_INLINE x86_barrier_tag barrier_tag (sse2 const &) noexcept
41- {
42- return {};
43- }
44-
45- template <class T >
46- XSIMD_INLINE void reassociation_barrier (T& x, x86_barrier_tag) noexcept
47- {
48- #if XSIMD_WITH_GNU_INLINE_ASM && XSIMD_TARGET_X86
49- __asm__ volatile (" " : " +x" (x));
50- #else
51- detail::reassociation_barrier (x, memory_barrier_tag {});
52- #endif
53- }
54-
5540 constexpr uint32_t shuffle (uint32_t w, uint32_t x, uint32_t y, uint32_t z)
5641 {
5742 return (z << 6 ) | (y << 4 ) | (x << 2 ) | w;
@@ -731,6 +716,8 @@ namespace xsimd
731716 __m128i mask = _mm_setr_epi16 (0xFFFF , 0xFFFF , 0x0000 , 0x0000 , 0xFFFF , 0xFFFF , 0x0000 , 0x0000 );
732717 __m128i xL = _mm_or_si128 (_mm_and_si128 (mask, x), _mm_andnot_si128 (mask, _mm_castpd_si128 (_mm_set1_pd (0x0010000000000000 )))); // 2^52
733718 __m128d f = _mm_sub_pd (_mm_castsi128_pd (xH), _mm_set1_pd (19342813118337666422669312 .)); // 2^84 + 2^52
719+ // Prevent -ffast-math from reassociating (xH-C)+xL into xH+(xL-C).
720+ detail::reassociation_barrier (f, sse2 {});
734721 return _mm_add_pd (f, _mm_castsi128_pd (xL));
735722 }
736723
@@ -745,6 +732,8 @@ namespace xsimd
745732 __m128i mask = _mm_setr_epi16 (0xFFFF , 0xFFFF , 0xFFFF , 0x0000 , 0xFFFF , 0xFFFF , 0xFFFF , 0x0000 );
746733 __m128i xL = _mm_or_si128 (_mm_and_si128 (mask, x), _mm_andnot_si128 (mask, _mm_castpd_si128 (_mm_set1_pd (0x0010000000000000 )))); // 2^52
747734 __m128d f = _mm_sub_pd (_mm_castsi128_pd (xH), _mm_set1_pd (442726361368656609280 .)); // 3*2^67 + 2^52
735+ // Prevent -ffast-math from reassociating (xH-C)+xL into xH+(xL-C).
736+ detail::reassociation_barrier (f, sse2 {});
748737 return _mm_add_pd (f, _mm_castsi128_pd (xL));
749738 }
750739
0 commit comments