@@ -878,6 +878,7 @@ namespace xsimd
878878 {
879879 batch_type k = nearbyint (a);
880880 x = (a - k) * constants::log_2<batch_type>();
881+ detail::reassociation_barrier (x, " keep reduced exponent ordered before finalize" );
881882 return k;
882883 }
883884
@@ -937,7 +938,10 @@ namespace xsimd
937938 template <class A , class T >
938939 XSIMD_INLINE batch<T, A> exp10 (batch<T, A> const & self, requires_arch<common>) noexcept
939940 {
940- return detail::exp<detail::exp10_tag>(self);
941+ using batch_type = batch<T, A>;
942+ batch_type out = detail::exp<detail::exp10_tag>(self);
943+ detail::reassociation_barrier (out, " prevent folding exp10 for literal inputs" );
944+ return out;
941945 }
942946
943947 // exp2
@@ -1494,6 +1498,7 @@ namespace xsimd
14941498 batch_type R = t2 + t1;
14951499 batch_type hfsq = batch_type (0.5 ) * f * f;
14961500 batch_type dk = to_float (k);
1501+ detail::reassociation_barrier (dk, " keep compensated k conversion before split log(2) scaling" );
14971502 batch_type r = fma (dk, constants::log_2hi<batch_type>(), fma (s, (hfsq + R), dk * constants::log_2lo<batch_type>()) - hfsq + f);
14981503#ifdef __FAST_MATH__
14991504 return r;
@@ -1525,6 +1530,7 @@ namespace xsimd
15251530 hx += 0x3ff00000 - 0x3fe6a09e ;
15261531 k += (hx >> 20 ) - 0x3ff ;
15271532 batch_type dk = to_float (k);
1533+ detail::reassociation_barrier (dk, " keep compensated k conversion before split log(2) scaling" );
15281534 hx = (hx & i_type (0x000fffff )) + 0x3fe6a09e ;
15291535 x = ::xsimd::bitwise_cast<double >(hx << 32 | (i_type (0xffffffff ) & ::xsimd::bitwise_cast<int_type>(x)));
15301536
@@ -1705,6 +1711,7 @@ namespace xsimd
17051711 batch_type t2 = z * detail::horner<batch_type, 0x3f2aaaaa , 0x3e91e9ee >(w);
17061712 batch_type R = t2 + t1;
17071713 batch_type dk = to_float (k);
1714+ detail::reassociation_barrier (dk, " prevent distributing multiplies through compensated exponent conversion" );
17081715 batch_type hfsq = batch_type (0.5 ) * f * f;
17091716 batch_type hibits = f - hfsq;
17101717 hibits &= ::xsimd::bitwise_cast<float >(i_type (0xfffff000 ));
@@ -1752,10 +1759,11 @@ namespace xsimd
17521759#endif
17531760 hx += 0x3ff00000 - 0x3fe6a09e ;
17541761 k += (hx >> 20 ) - 0x3ff ;
1762+ batch_type dk = to_float (k);
1763+ detail::reassociation_barrier (dk, " prevent distributing multiplies through compensated exponent conversion" );
17551764 hx = (hx & i_type (0x000fffff )) + 0x3fe6a09e ;
17561765 x = ::xsimd::bitwise_cast<double >(hx << 32 | (i_type (0xffffffff ) & ::xsimd::bitwise_cast<int_type>(x)));
17571766 batch_type f = --x;
1758- batch_type dk = to_float (k);
17591767 batch_type s = f / (batch_type (2 .) + f);
17601768 batch_type z = s * s;
17611769 batch_type w = z * z;
@@ -1818,6 +1826,7 @@ namespace xsimd
18181826 batch_type R = t2 + t1;
18191827 batch_type hfsq = batch_type (0.5 ) * f * f;
18201828 batch_type dk = to_float (k);
1829+ detail::reassociation_barrier (dk, " prevent distributing multiplies through compensated exponent conversion" );
18211830 /* correction term ~ log(1+x)-log(u), avoid underflow in c/u */
18221831 batch_type c = select (batch_bool_cast<float >(k >= i_type (2 )), batch_type (1 .) - (uf - self), self - (uf - batch_type (1 .))) / uf;
18231832 batch_type r = fma (dk, constants::log_2hi<batch_type>(), fma (s, (hfsq + R), dk * constants::log_2lo<batch_type>() + c) - hfsq + f);
@@ -1853,6 +1862,7 @@ namespace xsimd
18531862 batch_type t2 = z * detail::horner<batch_type, 0x3fe5555555555593ll , 0x3fd2492494229359ll , 0x3fc7466496cb03dell , 0x3fc2f112df3e5244ll >(w);
18541863 batch_type R = t2 + t1;
18551864 batch_type dk = to_float (k);
1865+ detail::reassociation_barrier (dk, " prevent distributing multiplies through compensated exponent conversion" );
18561866 batch_type r = fma (dk, constants::log_2hi<batch_type>(), fma (s, hfsq + R, dk * constants::log_2lo<batch_type>() + c) - hfsq + f);
18571867#ifdef __FAST_MATH__
18581868 return r;
@@ -1900,17 +1910,9 @@ namespace xsimd
19001910 batch_type s = bitofsign (self);
19011911 batch_type v = self ^ s;
19021912 batch_type t2n = constants::twotonmb<batch_type>();
1903- // Under fast-math, reordering is possible and the compiler optimizes d
1904- // to v. That's not what we want, so prevent compiler optimization here.
1905- // FIXME: it may be better to emit a memory barrier here (?).
1906- #ifdef __FAST_MATH__
19071913 batch_type d0 = v + t2n;
1908- asm volatile ( " " :: " r " (&d0) : " memory " );
1914+ detail::reassociation_barrier (d0, " prevent collapsing (v + 2^n) - 2^n back to v " );
19091915 batch_type d = d0 - t2n;
1910- #else
1911- batch_type d0 = v + t2n;
1912- batch_type d = d0 - t2n;
1913- #endif
19141916 return s ^ select (v < t2n, d, v);
19151917 }
19161918 }
@@ -2192,12 +2194,16 @@ namespace xsimd
21922194 template <class A >
21932195 XSIMD_INLINE batch<float , A> remainder (batch<float , A> const & self, batch<float , A> const & other, requires_arch<common>) noexcept
21942196 {
2195- return fnma (nearbyint (self / other), other, self);
2197+ batch<float , A> q = nearbyint (self / other);
2198+ detail::reassociation_barrier (q, " prevent pulling multiply back through rounded quotient" );
2199+ return fnma (q, other, self);
21962200 }
21972201 template <class A >
21982202 XSIMD_INLINE batch<double , A> remainder (batch<double , A> const & self, batch<double , A> const & other, requires_arch<common>) noexcept
21992203 {
2200- return fnma (nearbyint (self / other), other, self);
2204+ batch<double , A> q = nearbyint (self / other);
2205+ detail::reassociation_barrier (q, " prevent pulling multiply back through rounded quotient" );
2206+ return fnma (q, other, self);
22012207 }
22022208 template <class A , class T , class = std::enable_if_t <std::is_integral<T>::value>>
22032209 XSIMD_INLINE batch<T, A> remainder (batch<T, A> const & self, batch<T, A> const & other, requires_arch<common>) noexcept
0 commit comments