@@ -743,7 +743,9 @@ namespace xsimd
743743 static XSIMD_INLINE batch_type reduce (const batch_type& a, batch_type& x) noexcept
744744 {
745745 batch_type k = nearbyint (constants::invlog_2<batch_type>() * a);
746+ detail::reassociation_barrier (k, " compensated exp range reduction" );
746747 x = fnma (k, constants::log_2hi<batch_type>(), a);
748+ detail::reassociation_barrier (x, " compensated exp range reduction" );
747749 x = fnma (k, constants::log_2lo<batch_type>(), x);
748750 return k;
749751 }
@@ -769,7 +771,9 @@ namespace xsimd
769771 static XSIMD_INLINE batch_type reduce (const batch_type& a, batch_type& x) noexcept
770772 {
771773 batch_type k = nearbyint (constants::invlog10_2<batch_type>() * a);
774+ detail::reassociation_barrier (k, " compensated exp10 range reduction" );
772775 x = fnma (k, constants::log10_2hi<batch_type>(), a);
776+ detail::reassociation_barrier (x, " compensated exp10 range reduction" );
773777 x -= k * constants::log10_2lo<batch_type>();
774778 return k;
775779 }
@@ -794,6 +798,7 @@ namespace xsimd
794798 static XSIMD_INLINE batch_type reduce (const batch_type& a, batch_type& x) noexcept
795799 {
796800 batch_type k = nearbyint (a);
801+ detail::reassociation_barrier (k, " compensated exp2 range reduction" );
797802 x = (a - k);
798803 return k;
799804 }
@@ -819,7 +824,9 @@ namespace xsimd
819824 static XSIMD_INLINE batch_type reduce (const batch_type& a, batch_type& hi, batch_type& lo, batch_type& x) noexcept
820825 {
821826 batch_type k = nearbyint (constants::invlog_2<batch_type>() * a);
827+ detail::reassociation_barrier (k, " compensated exp range reduction" );
822828 hi = fnma (k, constants::log_2hi<batch_type>(), a);
829+ detail::reassociation_barrier (hi, " compensated exp range reduction" );
823830 lo = k * constants::log_2lo<batch_type>();
824831 x = hi - lo;
825832 return k;
@@ -846,7 +853,9 @@ namespace xsimd
846853 static XSIMD_INLINE batch_type reduce (const batch_type& a, batch_type&, batch_type&, batch_type& x) noexcept
847854 {
848855 batch_type k = nearbyint (constants::invlog10_2<batch_type>() * a);
856+ detail::reassociation_barrier (k, " compensated exp10 range reduction" );
849857 x = fnma (k, constants::log10_2hi<batch_type>(), a);
858+ detail::reassociation_barrier (x, " compensated exp10 range reduction" );
850859 x = fnma (k, constants::log10_2lo<batch_type>(), x);
851860 return k;
852861 }
@@ -878,6 +887,7 @@ namespace xsimd
878887 {
879888 batch_type k = nearbyint (a);
880889 x = (a - k) * constants::log_2<batch_type>();
890+ detail::reassociation_barrier (x, " keep reduced exponent ordered before finalize" );
881891 return k;
882892 }
883893
@@ -937,7 +947,10 @@ namespace xsimd
937947 template <class A , class T >
938948 XSIMD_INLINE batch<T, A> exp10 (batch<T, A> const & self, requires_arch<common>) noexcept
939949 {
940- return detail::exp<detail::exp10_tag>(self);
950+ using batch_type = batch<T, A>;
951+ batch_type out = detail::exp<detail::exp10_tag>(self);
952+ detail::reassociation_barrier (out, " prevent folding exp10 for literal inputs" );
953+ return out;
941954 }
942955
943956 // exp2
@@ -1494,6 +1507,7 @@ namespace xsimd
14941507 batch_type R = t2 + t1;
14951508 batch_type hfsq = batch_type (0.5 ) * f * f;
14961509 batch_type dk = to_float (k);
1510+ detail::reassociation_barrier (dk, " keep compensated k conversion before split log(2) scaling" );
14971511 batch_type r = fma (dk, constants::log_2hi<batch_type>(), fma (s, (hfsq + R), dk * constants::log_2lo<batch_type>()) - hfsq + f);
14981512#ifdef __FAST_MATH__
14991513 return r;
@@ -1525,6 +1539,7 @@ namespace xsimd
15251539 hx += 0x3ff00000 - 0x3fe6a09e ;
15261540 k += (hx >> 20 ) - 0x3ff ;
15271541 batch_type dk = to_float (k);
1542+ detail::reassociation_barrier (dk, " keep compensated k conversion before split log(2) scaling" );
15281543 hx = (hx & i_type (0x000fffff )) + 0x3fe6a09e ;
15291544 x = ::xsimd::bitwise_cast<double >(hx << 32 | (i_type (0xffffffff ) & ::xsimd::bitwise_cast<int_type>(x)));
15301545
@@ -1584,6 +1599,7 @@ namespace xsimd
15841599 batch_type R = t1 + t2;
15851600 batch_type hfsq = batch_type (0.5 ) * f * f;
15861601 batch_type dk = to_float (k);
1602+ detail::reassociation_barrier (dk, " prevent distributing multiplies through compensated exponent conversion" );
15871603 batch_type r = fma (fms (s, hfsq + R, hfsq) + f, constants::invlog_2<batch_type>(), dk);
15881604#ifdef __FAST_MATH__
15891605 return r;
@@ -1629,7 +1645,9 @@ namespace xsimd
16291645 batch_type val_hi = hi * constants::invlog_2hi<batch_type>();
16301646 batch_type val_lo = fma (lo + hi, constants::invlog_2lo<batch_type>(), lo * constants::invlog_2hi<batch_type>());
16311647 batch_type dk = to_float (k);
1648+ detail::reassociation_barrier (dk, " Kahan compensated log2 summation" );
16321649 batch_type w1 = dk + val_hi;
1650+ detail::reassociation_barrier (w1, " Kahan compensated log2 summation" );
16331651 val_lo += (dk - w1) + val_hi;
16341652 val_hi = w1;
16351653 batch_type r = val_lo + val_hi;
@@ -1705,6 +1723,7 @@ namespace xsimd
17051723 batch_type t2 = z * detail::horner<batch_type, 0x3f2aaaaa , 0x3e91e9ee >(w);
17061724 batch_type R = t2 + t1;
17071725 batch_type dk = to_float (k);
1726+ detail::reassociation_barrier (dk, " prevent distributing multiplies through compensated exponent conversion" );
17081727 batch_type hfsq = batch_type (0.5 ) * f * f;
17091728 batch_type hibits = f - hfsq;
17101729 hibits &= ::xsimd::bitwise_cast<float >(i_type (0xfffff000 ));
@@ -1752,10 +1771,11 @@ namespace xsimd
17521771#endif
17531772 hx += 0x3ff00000 - 0x3fe6a09e ;
17541773 k += (hx >> 20 ) - 0x3ff ;
1774+ batch_type dk = to_float (k);
1775+ detail::reassociation_barrier (dk, " prevent distributing multiplies through compensated exponent conversion" );
17551776 hx = (hx & i_type (0x000fffff )) + 0x3fe6a09e ;
17561777 x = ::xsimd::bitwise_cast<double >(hx << 32 | (i_type (0xffffffff ) & ::xsimd::bitwise_cast<int_type>(x)));
17571778 batch_type f = --x;
1758- batch_type dk = to_float (k);
17591779 batch_type s = f / (batch_type (2 .) + f);
17601780 batch_type z = s * s;
17611781 batch_type w = z * z;
@@ -1818,6 +1838,7 @@ namespace xsimd
18181838 batch_type R = t2 + t1;
18191839 batch_type hfsq = batch_type (0.5 ) * f * f;
18201840 batch_type dk = to_float (k);
1841+ detail::reassociation_barrier (dk, " prevent distributing multiplies through compensated exponent conversion" );
18211842 /* correction term ~ log(1+x)-log(u), avoid underflow in c/u */
18221843 batch_type c = select (batch_bool_cast<float >(k >= i_type (2 )), batch_type (1 .) - (uf - self), self - (uf - batch_type (1 .))) / uf;
18231844 batch_type r = fma (dk, constants::log_2hi<batch_type>(), fma (s, (hfsq + R), dk * constants::log_2lo<batch_type>() + c) - hfsq + f);
@@ -1853,6 +1874,7 @@ namespace xsimd
18531874 batch_type t2 = z * detail::horner<batch_type, 0x3fe5555555555593ll , 0x3fd2492494229359ll , 0x3fc7466496cb03dell , 0x3fc2f112df3e5244ll >(w);
18541875 batch_type R = t2 + t1;
18551876 batch_type dk = to_float (k);
1877+ detail::reassociation_barrier (dk, " prevent distributing multiplies through compensated exponent conversion" );
18561878 batch_type r = fma (dk, constants::log_2hi<batch_type>(), fma (s, hfsq + R, dk * constants::log_2lo<batch_type>() + c) - hfsq + f);
18571879#ifdef __FAST_MATH__
18581880 return r;
@@ -1900,17 +1922,9 @@ namespace xsimd
19001922 batch_type s = bitofsign (self);
19011923 batch_type v = self ^ s;
19021924 batch_type t2n = constants::twotonmb<batch_type>();
1903- // Under fast-math, reordering is possible and the compiler optimizes d
1904- // to v. That's not what we want, so prevent compiler optimization here.
1905- // FIXME: it may be better to emit a memory barrier here (?).
1906- #ifdef __FAST_MATH__
19071925 batch_type d0 = v + t2n;
1908- asm volatile ( " " :: " r " (&d0) : " memory " );
1926+ detail::reassociation_barrier (d0, " prevent collapsing (v + 2^n) - 2^n back to v " );
19091927 batch_type d = d0 - t2n;
1910- #else
1911- batch_type d0 = v + t2n;
1912- batch_type d = d0 - t2n;
1913- #endif
19141928 return s ^ select (v < t2n, d, v);
19151929 }
19161930 }
@@ -2192,12 +2206,16 @@ namespace xsimd
21922206 template <class A >
21932207 XSIMD_INLINE batch<float , A> remainder (batch<float , A> const & self, batch<float , A> const & other, requires_arch<common>) noexcept
21942208 {
2195- return fnma (nearbyint (self / other), other, self);
2209+ batch<float , A> q = nearbyint (self / other);
2210+ detail::reassociation_barrier (q, " prevent pulling multiply back through rounded quotient" );
2211+ return fnma (q, other, self);
21962212 }
21972213 template <class A >
21982214 XSIMD_INLINE batch<double , A> remainder (batch<double , A> const & self, batch<double , A> const & other, requires_arch<common>) noexcept
21992215 {
2200- return fnma (nearbyint (self / other), other, self);
2216+ batch<double , A> q = nearbyint (self / other);
2217+ detail::reassociation_barrier (q, " prevent pulling multiply back through rounded quotient" );
2218+ return fnma (q, other, self);
22012219 }
22022220 template <class A , class T , class = std::enable_if_t <std::is_integral<T>::value>>
22032221 XSIMD_INLINE batch<T, A> remainder (batch<T, A> const & self, batch<T, A> const & other, requires_arch<common>) noexcept
0 commit comments