@@ -937,7 +937,11 @@ namespace xsimd
937937 template <class A , class T >
938938 XSIMD_INLINE batch<T, A> exp10 (batch<T, A> const & self, requires_arch<common>) noexcept
939939 {
940- return detail::exp<detail::exp10_tag>(self);
940+ using batch_type = batch<T, A>;
941+ batch_type out = detail::exp<detail::exp10_tag>(self);
942+ // Prevent -ffast-math from folding the whole exp10 batch path for literal inputs.
943+ detail::reassociation_barrier (out, A {});
944+ return out;
941945 }
942946
943947 // exp2
@@ -1494,6 +1498,8 @@ namespace xsimd
14941498 batch_type R = t2 + t1;
14951499 batch_type hfsq = batch_type (0.5 ) * f * f;
14961500 batch_type dk = to_float (k);
1501+ // Keep the compensated k -> float conversion intact before scaling by split log(2).
1502+ detail::reassociation_barrier (dk, A {});
14971503 batch_type r = fma (dk, constants::log_2hi<batch_type>(), fma (s, (hfsq + R), dk * constants::log_2lo<batch_type>()) - hfsq + f);
14981504#ifdef __FAST_MATH__
14991505 return r;
@@ -1525,6 +1531,8 @@ namespace xsimd
15251531 hx += 0x3ff00000 - 0x3fe6a09e ;
15261532 k += (hx >> 20 ) - 0x3ff ;
15271533 batch_type dk = to_float (k);
1534+ // Keep the compensated k -> double conversion intact before scaling by split log(2).
1535+ detail::reassociation_barrier (dk, A {});
15281536 hx = (hx & i_type (0x000fffff )) + 0x3fe6a09e ;
15291537 x = ::xsimd::bitwise_cast<double >(hx << 32 | (i_type (0xffffffff ) & ::xsimd::bitwise_cast<int_type>(x)));
15301538
@@ -1705,6 +1713,8 @@ namespace xsimd
17051713 batch_type t2 = z * detail::horner<batch_type, 0x3f2aaaaa , 0x3e91e9ee >(w);
17061714 batch_type R = t2 + t1;
17071715 batch_type dk = to_float (k);
1716+ // Prevent fast-math from distributing later multiplies through the compensated exponent conversion.
1717+ detail::reassociation_barrier (dk, A {});
17081718 batch_type hfsq = batch_type (0.5 ) * f * f;
17091719 batch_type hibits = f - hfsq;
17101720 hibits &= ::xsimd::bitwise_cast<float >(i_type (0xfffff000 ));
@@ -1752,10 +1762,12 @@ namespace xsimd
17521762#endif
17531763 hx += 0x3ff00000 - 0x3fe6a09e ;
17541764 k += (hx >> 20 ) - 0x3ff ;
1765+ batch_type dk = to_float (k);
1766+ // Prevent fast-math from distributing later multiplies through the compensated exponent conversion.
1767+ detail::reassociation_barrier (dk, A {});
17551768 hx = (hx & i_type (0x000fffff )) + 0x3fe6a09e ;
17561769 x = ::xsimd::bitwise_cast<double >(hx << 32 | (i_type (0xffffffff ) & ::xsimd::bitwise_cast<int_type>(x)));
17571770 batch_type f = --x;
1758- batch_type dk = to_float (k);
17591771 batch_type s = f / (batch_type (2 .) + f);
17601772 batch_type z = s * s;
17611773 batch_type w = z * z;
@@ -1818,6 +1830,8 @@ namespace xsimd
18181830 batch_type R = t2 + t1;
18191831 batch_type hfsq = batch_type (0.5 ) * f * f;
18201832 batch_type dk = to_float (k);
1833+ // Prevent fast-math from distributing later multiplies through the compensated exponent conversion.
1834+ detail::reassociation_barrier (dk, A {});
18211835 /* correction term ~ log(1+x)-log(u), avoid underflow in c/u */
18221836 batch_type c = select (batch_bool_cast<float >(k >= i_type (2 )), batch_type (1 .) - (uf - self), self - (uf - batch_type (1 .))) / uf;
18231837 batch_type r = fma (dk, constants::log_2hi<batch_type>(), fma (s, (hfsq + R), dk * constants::log_2lo<batch_type>() + c) - hfsq + f);
@@ -1853,6 +1867,8 @@ namespace xsimd
18531867 batch_type t2 = z * detail::horner<batch_type, 0x3fe5555555555593ll , 0x3fd2492494229359ll , 0x3fc7466496cb03dell , 0x3fc2f112df3e5244ll >(w);
18541868 batch_type R = t2 + t1;
18551869 batch_type dk = to_float (k);
1870+ // Prevent fast-math from distributing later multiplies through the compensated exponent conversion.
1871+ detail::reassociation_barrier (dk, A {});
18561872 batch_type r = fma (dk, constants::log_2hi<batch_type>(), fma (s, hfsq + R, dk * constants::log_2lo<batch_type>() + c) - hfsq + f);
18571873#ifdef __FAST_MATH__
18581874 return r;
@@ -1900,17 +1916,10 @@ namespace xsimd
19001916 batch_type s = bitofsign (self);
19011917 batch_type v = self ^ s;
19021918 batch_type t2n = constants::twotonmb<batch_type>();
1903- // Under fast-math, reordering is possible and the compiler optimizes d
1904- // to v. That's not what we want, so prevent compiler optimization here.
1905- // FIXME: it may be better to emit a memory barrier here (?).
1906- #ifdef __FAST_MATH__
19071919 batch_type d0 = v + t2n;
1908- asm volatile (" " ::" r" (&d0) : " memory" );
1920+ // Prevent fast-math from collapsing (v + 2^n) - 2^n back to v.
1921+ detail::reassociation_barrier (d0.data , A {});
19091922 batch_type d = d0 - t2n;
1910- #else
1911- batch_type d0 = v + t2n;
1912- batch_type d = d0 - t2n;
1913- #endif
19141923 return s ^ select (v < t2n, d, v);
19151924 }
19161925 }
0 commit comments