diff --git a/.github/workflows/sanitizer.yml b/.github/workflows/sanitizer.yml index 852019729..7eceeadc9 100644 --- a/.github/workflows/sanitizer.yml +++ b/.github/workflows/sanitizer.yml @@ -9,15 +9,25 @@ defaults: jobs: build: runs-on: ubuntu-latest - name: 'sanitizer - ${{ matrix.sanitizer }}' + name: 'sanitizer - ${{ matrix.flags }}' strategy: matrix: - sanitizer: - - address - - undefined + flags: + - sanitize=address + - sanitize=undefined + - fast-math -fsanitize=undefined + llvm-version: [20] + env: + CC: clang-${{ matrix.llvm-version }} + CXX: clang++-${{ matrix.llvm-version }} steps: - name: Checkout xsimd uses: actions/checkout@v3 + - name: Setup compiler + run: | + wget https://apt.llvm.org/llvm.sh + chmod +x llvm.sh + sudo ./llvm.sh ${{ matrix.llvm-version }} - name: Configure build run: | mkdir _build @@ -26,9 +36,8 @@ jobs: -DBUILD_BENCHMARK=ON \ -DBUILD_EXAMPLES=ON \ -DDOWNLOAD_DOCTEST=ON \ - -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_CXX_COMPILER=clang++ \ - -DCMAKE_CXX_FLAGS='-fsanitize=${{ matrix.sanitizer }}' \ + -DCMAKE_BUILD_TYPE=Debug \ + -DCMAKE_CXX_FLAGS='-f${{ matrix.flags }} -O0 -g -fno-inline' \ -G Ninja - name: Build run: ninja -C _build diff --git a/include/xsimd/arch/common/xsimd_common_complex.hpp b/include/xsimd/arch/common/xsimd_common_complex.hpp index 1283aecca..874825182 100644 --- a/include/xsimd/arch/common/xsimd_common_complex.hpp +++ b/include/xsimd/arch/common/xsimd_common_complex.hpp @@ -78,11 +78,15 @@ namespace xsimd using batch_type = complex_batch_type_t>; using real_batch = typename batch_type::real_batch; using real_value_type = typename real_batch::value_type; +#ifdef __FAST_MATH__ + return { self }; +#else auto cond = xsimd::isinf(real(self)) || xsimd::isinf(imag(self)); return select(cond, batch_type(constants::infinity(), copysign(real_batch(real_value_type(0)), imag(self))), batch_type(self)); +#endif } template diff --git a/include/xsimd/arch/common/xsimd_common_logical.hpp b/include/xsimd/arch/common/xsimd_common_logical.hpp index 7640c2841..6fe242c68 100644 --- a/include/xsimd/arch/common/xsimd_common_logical.hpp +++ b/include/xsimd/arch/common/xsimd_common_logical.hpp @@ -124,12 +124,22 @@ namespace xsimd template XSIMD_INLINE batch_bool isinf(batch const& self, requires_arch) noexcept { +#ifdef __FAST_MATH__ + (void)self; + return { false }; +#else return abs(self) == std::numeric_limits::infinity(); +#endif } template XSIMD_INLINE batch_bool isinf(batch const& self, requires_arch) noexcept { +#ifdef __FAST_MATH__ + (void)self; + return { false }; +#else return abs(self) == std::numeric_limits::infinity(); +#endif } // isfinite diff --git a/include/xsimd/arch/common/xsimd_common_math.hpp b/include/xsimd/arch/common/xsimd_common_math.hpp index 8fb23fd8a..dc9a8b322 100644 --- a/include/xsimd/arch/common/xsimd_common_math.hpp +++ b/include/xsimd/arch/common/xsimd_common_math.hpp @@ -896,7 +896,9 @@ namespace xsimd batch_type k = reducer_t::reduce(self, x); x = reducer_t::approx(x); x = select(self <= reducer_t::minlog(), batch_type(0.), ldexp(x, to_int(k))); +#ifndef __FAST_MATH__ x = select(self >= reducer_t::maxlog(), constants::infinity(), x); +#endif return x; } @@ -910,7 +912,9 @@ namespace xsimd batch_type c = reducer_t::approx(x); c = reducer_t::finalize(x, c, hi, lo); c = select(self <= reducer_t::minlog(), batch_type(0.), ldexp(c, to_int(k))); +#ifndef __FAST_MATH__ c = select(self >= reducer_t::maxlog(), constants::infinity(), c); +#endif return c; } } @@ -1014,11 +1018,11 @@ namespace xsimd XSIMD_INLINE batch expm1(batch const& self, requires_arch) noexcept { using batch_type = batch; - return select(self < constants::logeps(), - batch_type(-1.), - select(self > constants::maxlog(), - constants::infinity(), - detail::expm1(self))); + auto x = detail::expm1(self); +#ifndef __FAST_MATH__ + x = select(self > constants::maxlog(), constants::infinity(), x); +#endif + return select(self < constants::logeps(), batch_type(-1.), x); } template @@ -1245,12 +1249,20 @@ namespace xsimd batch_type r1 = other(q); if (any(ltza)) { +#ifdef __FAST_MATH__ + r = negative(q, r1); +#else r = select(inf_result, constants::infinity(), negative(q, r1)); +#endif if (all(ltza)) return r; } batch_type r2 = select(ltza, r, r1); +#ifdef __FAST_MATH__ + return r2; +#else return select(a == constants::minusinfinity(), constants::nan(), select(inf_result, constants::infinity(), r2)); +#endif } private: @@ -1371,7 +1383,11 @@ namespace xsimd } batch_type r1 = other(a); batch_type r2 = select(test, r, r1); +#ifdef __FAST_MATH__ + return r2; +#else return select(a == constants::minusinfinity(), constants::nan(), select(inf_result, constants::infinity(), r2)); +#endif } private: @@ -1479,12 +1495,12 @@ namespace xsimd batch_type hfsq = batch_type(0.5) * f * f; batch_type dk = to_float(k); batch_type r = fma(dk, constants::log_2hi(), fma(s, (hfsq + R), dk * constants::log_2lo()) - hfsq + f); -#ifndef XSIMD_NO_INFINITIES - batch_type zz = select(isnez, select(self == constants::infinity(), constants::infinity(), r), constants::minusinfinity()); +#ifdef __FAST_MATH__ + return r; #else - batch_type zz = select(isnez, r, constants::minusinfinity()); -#endif + batch_type zz = select(isnez, select(self == constants::infinity(), constants::infinity(), r), constants::minusinfinity()); return select(!(self >= batch_type(0.)), constants::nan(), zz); +#endif } template @@ -1522,12 +1538,12 @@ namespace xsimd batch_type t2 = z * detail::horner(w); batch_type R = t2 + t1; batch_type r = fma(dk, constants::log_2hi(), fma(s, (hfsq + R), dk * constants::log_2lo()) - hfsq + f); -#ifndef XSIMD_NO_INFINITIES - batch_type zz = select(isnez, select(self == constants::infinity(), constants::infinity(), r), constants::minusinfinity()); +#ifdef __FAST_MATH__ + return r; #else - batch_type zz = select(isnez, r, constants::minusinfinity()); -#endif + batch_type zz = select(isnez, select(self == constants::infinity(), constants::infinity(), r), constants::minusinfinity()); return select(!(self >= batch_type(0.)), constants::nan(), zz); +#endif } template @@ -1569,12 +1585,12 @@ namespace xsimd batch_type hfsq = batch_type(0.5) * f * f; batch_type dk = to_float(k); batch_type r = fma(fms(s, hfsq + R, hfsq) + f, constants::invlog_2(), dk); -#ifndef XSIMD_NO_INFINITIES - batch_type zz = select(isnez, select(self == constants::infinity(), constants::infinity(), r), constants::minusinfinity()); +#ifdef __FAST_MATH__ + return r; #else - batch_type zz = select(isnez, r, constants::minusinfinity()); -#endif + batch_type zz = select(isnez, select(self == constants::infinity(), constants::infinity(), r), constants::minusinfinity()); return select(!(self >= batch_type(0.)), constants::nan(), zz); +#endif } template @@ -1617,12 +1633,12 @@ namespace xsimd val_lo += (dk - w1) + val_hi; val_hi = w1; batch_type r = val_lo + val_hi; -#ifndef XSIMD_NO_INFINITIES - batch_type zz = select(isnez, select(self == constants::infinity(), constants::infinity(), r), constants::minusinfinity()); +#ifdef __FAST_MATH__ + return r; #else - batch_type zz = select(isnez, r, constants::minusinfinity()); -#endif + batch_type zz = select(isnez, select(self == constants::infinity(), constants::infinity(), r), constants::minusinfinity()); return select(!(self >= batch_type(0.)), constants::nan(), zz); +#endif } namespace detail @@ -1757,12 +1773,12 @@ namespace xsimd val_lo += (y - w1) + val_hi; val_hi = w1; batch_type r = val_lo + val_hi; -#ifndef XSIMD_NO_INFINITIES - batch_type zz = select(isnez, select(self == constants::infinity(), constants::infinity(), r), constants::minusinfinity()); +#ifdef __FAST_MATH__ + return r; #else - batch_type zz = select(isnez, r, constants::minusinfinity()); -#endif + batch_type zz = select(isnez, select(self == constants::infinity(), constants::infinity(), r), constants::minusinfinity()); return select(!(self >= batch_type(0.)), constants::nan(), zz); +#endif } template @@ -1805,12 +1821,12 @@ namespace xsimd /* correction term ~ log(1+x)-log(u), avoid underflow in c/u */ batch_type c = select(batch_bool_cast(k >= i_type(2)), batch_type(1.) - (uf - self), self - (uf - batch_type(1.))) / uf; batch_type r = fma(dk, constants::log_2hi(), fma(s, (hfsq + R), dk * constants::log_2lo() + c) - hfsq + f); -#ifndef XSIMD_NO_INFINITIES - batch_type zz = select(isnez, select(self == constants::infinity(), constants::infinity(), r), constants::minusinfinity()); +#ifdef __FAST_MATH__ + return r; #else - batch_type zz = select(isnez, r, constants::minusinfinity()); -#endif + batch_type zz = select(isnez, select(self == constants::infinity(), constants::infinity(), r), constants::minusinfinity()); return select(!(uf >= batch_type(0.)), constants::nan(), zz); +#endif } template @@ -1838,12 +1854,12 @@ namespace xsimd batch_type R = t2 + t1; batch_type dk = to_float(k); batch_type r = fma(dk, constants::log_2hi(), fma(s, hfsq + R, dk * constants::log_2lo() + c) - hfsq + f); -#ifndef XSIMD_NO_INFINITIES - batch_type zz = select(isnez, select(self == constants::infinity(), constants::infinity(), r), constants::minusinfinity()); +#ifdef __FAST_MATH__ + return r; #else - batch_type zz = select(isnez, r, constants::minusinfinity()); -#endif + batch_type zz = select(isnez, select(self == constants::infinity(), constants::infinity(), r), constants::minusinfinity()); return select(!(uf >= batch_type(0.)), constants::nan(), zz); +#endif } template @@ -1980,13 +1996,21 @@ namespace xsimd static XSIMD_INLINE batch_type next(const batch_type& b) noexcept { batch_type n = ::xsimd::bitwise_cast(::xsimd::bitwise_cast(b) + int_type(1)); +#ifdef __FAST_MATH__ + return n; +#else return select(b == constants::infinity(), b, n); +#endif } static XSIMD_INLINE batch_type prev(const batch_type& b) noexcept { batch_type p = ::xsimd::bitwise_cast(::xsimd::bitwise_cast(b) - int_type(1)); +#ifdef __FAST_MATH__ + return p; +#else return select(b == constants::minusinfinity(), b, p); +#endif } }; } @@ -2355,10 +2379,12 @@ namespace xsimd y *= v; y = select(test, y, y * v); y *= constants::sqrt_2pi() * w; -#ifndef XSIMD_NO_INFINITIES +#ifdef __FAST_MATH__ + return y; +#else y = select(isinf(x), x, y); -#endif return select(x > stirlinglargelim, constants::infinity(), y); +#endif } /* origin: boost/simd/arch/common/detail/common/gamma_kernel.hpp */ @@ -2501,7 +2527,11 @@ namespace xsimd } batch_type r1 = detail::tgamma_other(self, test); batch_type r2 = select(test, r, r1); +#ifdef __FAST_MATH__ + return r2; +#else return select(self == batch_type(0.), copysign(constants::infinity(), self), select(nan_result, constants::nan(), r2)); +#endif } } diff --git a/include/xsimd/arch/common/xsimd_common_trigo.hpp b/include/xsimd/arch/common/xsimd_common_trigo.hpp index 235bbc14b..e15ccdc21 100644 --- a/include/xsimd/arch/common/xsimd_common_trigo.hpp +++ b/include/xsimd/arch/common/xsimd_common_trigo.hpp @@ -310,10 +310,13 @@ namespace xsimd num = x2 + num * num; real_batch den = y - one; den = x2 + den * den; - batch_type res = select((x == real_batch(0.)) && (y == real_batch(1.)), - batch_type(real_batch(0.), constants::infinity()), - batch_type(w, 0.25 * log(num / den))); - return res; +#ifdef __FAST_MATH__ + return batch_type(w, 0.25 * log(num / den)); +#else + return select((x == real_batch(0.)) && (y == real_batch(1.)), + batch_type(real_batch(0.), constants::infinity()), + batch_type(w, 0.25 * log(num / den))); +#endif } // atanh @@ -583,12 +586,14 @@ namespace xsimd for (std::size_t i = 0; i < size; ++i) { double arg = args[i]; +#ifndef __FAST_MATH__ if (arg == std::numeric_limits::infinity()) { tmp[i] = 0.; txr[i] = std::numeric_limits::quiet_NaN(); } else +#endif { double y[2]; std::int32_t n = ::xsimd::detail::__ieee754_rem_pio2(arg, y); @@ -841,11 +846,15 @@ namespace xsimd using batch_type = batch, A>; using real_batch = typename batch_type::real_batch; real_batch d = cos(2 * z.real()) + cosh(2 * z.imag()); - batch_type winf(constants::infinity(), constants::infinity()); real_batch wreal = sin(2 * z.real()) / d; real_batch wimag = sinh(2 * z.imag()); +#ifdef __FAST_MATH__ + return batch_type(wreal, real_batch(1.)), batch_type(wreal, wimag / d); +#else + batch_type winf(constants::infinity(), constants::infinity()); batch_type wres = select(isinf(wimag), batch_type(wreal, real_batch(1.)), batch_type(wreal, wimag / d)); return select(d == real_batch(0.), winf, wres); +#endif } // tanh diff --git a/include/xsimd/arch/xsimd_constants.hpp b/include/xsimd/arch/xsimd_constants.hpp index 51411d287..916cdf70d 100644 --- a/include/xsimd/arch/xsimd_constants.hpp +++ b/include/xsimd/arch/xsimd_constants.hpp @@ -61,7 +61,10 @@ namespace xsimd #pragma GCC push_options #pragma GCC optimize("signed-zeros") #endif +#ifndef __FAST_MATH__ XSIMD_DEFINE_CONSTANT(infinity, (std::numeric_limits::infinity()), (std::numeric_limits::infinity())) + XSIMD_DEFINE_CONSTANT(minusinfinity, (-infinity()), (-infinity())) +#endif XSIMD_DEFINE_CONSTANT(invlog_2, 1.442695040888963407359924681001892137426645954152986f, 1.442695040888963407359924681001892137426645954152986) XSIMD_DEFINE_CONSTANT_HEX(invlog_2hi, 0x3fb8b000, 0x3ff7154765200000) XSIMD_DEFINE_CONSTANT_HEX(invlog_2lo, 0xb9389ad4, 0x3de705fc2eefa200) @@ -83,7 +86,6 @@ namespace xsimd XSIMD_DEFINE_CONSTANT(minlog, -88.3762626647949f, -708.3964185322641) XSIMD_DEFINE_CONSTANT(minlog2, -127.0f, -1023.) XSIMD_DEFINE_CONSTANT(minlog10, -37.89999771118164f, -308.2547155599167) - XSIMD_DEFINE_CONSTANT(minusinfinity, (-infinity()), (-infinity())) XSIMD_DEFINE_CONSTANT_HEX(nan, 0xffffffff, 0xffffffffffffffff) XSIMD_DEFINE_CONSTANT_HEX(oneosqrteps, 0x453504f3, 0x4190000000000000) XSIMD_DEFINE_CONSTANT_HEX(oneotwoeps, 0x4a800000, 0x4320000000000000) diff --git a/include/xsimd/arch/xsimd_scalar.hpp b/include/xsimd/arch/xsimd_scalar.hpp index 82f284958..7bf443173 100644 --- a/include/xsimd/arch/xsimd_scalar.hpp +++ b/include/xsimd/arch/xsimd_scalar.hpp @@ -510,7 +510,11 @@ namespace xsimd template ::value>::type> XSIMD_INLINE bool is_flint(const T& x) noexcept { +#ifdef __FAST_MATH__ + return (x - std::trunc(x)) == T(0); +#else return std::isnan(x - x) ? false : (x - std::trunc(x)) == T(0); +#endif } template ::value>::type> diff --git a/include/xsimd/types/xsimd_api.hpp b/include/xsimd/types/xsimd_api.hpp index 7cecb6ca1..54ac836d2 100644 --- a/include/xsimd/types/xsimd_api.hpp +++ b/include/xsimd/types/xsimd_api.hpp @@ -1114,6 +1114,7 @@ namespace xsimd return kernel::incr_if(x, mask, A {}); } +#ifndef __FAST_MATH__ /** * @ingroup batch_constant * @@ -1128,6 +1129,7 @@ namespace xsimd detail::static_check_supported_config(); return B(std::numeric_limits::infinity()); } +#endif /** * @ingroup batch_data_transfer diff --git a/test/test_basic_math.cpp b/test/test_basic_math.cpp index 5f34db720..e29fdb055 100644 --- a/test/test_basic_math.cpp +++ b/test/test_basic_math.cpp @@ -14,6 +14,7 @@ #include "test_utils.hpp" +#ifndef __FAST_MATH__ namespace detail { template ::value> @@ -49,6 +50,7 @@ namespace detail } }; } +#endif template struct basic_math_test @@ -120,6 +122,7 @@ struct basic_math_test CHECK_BATCH_EQ(res, expected); } +#ifndef __FAST_MATH__ void test_isfinite() { detail::infinity_tester::test_isfinite(); @@ -129,6 +132,7 @@ struct basic_math_test { detail::infinity_tester::test_isinf(); } +#endif void test_nextafter() { @@ -170,8 +174,10 @@ TEST_CASE_TEMPLATE("[basic math tests]", B, BATCH_MATH_TYPES) SUBCASE("remainder") { Test.test_remainder(); } SUBCASE("fdim") { Test.test_fdim(); } SUBCASE("clip") { Test.test_clip(); } +#ifndef __FAST_MATH__ SUBCASE("isfinite") { Test.test_isfinite(); } SUBCASE("isinf") { Test.test_isinf(); } +#endif SUBCASE("nextafter") { Test.test_nextafter(); } } #endif diff --git a/test/test_batch_complex.cpp b/test/test_batch_complex.cpp index fb9c6a2e1..e06b31807 100644 --- a/test/test_batch_complex.cpp +++ b/test/test_batch_complex.cpp @@ -628,6 +628,7 @@ struct batch_complex_test } } +#ifndef __FAST_MATH__ void test_isnan() const { { @@ -639,6 +640,7 @@ struct batch_complex_test CHECK_BATCH_EQ(res, expected); } } +#endif private: batch_type batch_lhs() const @@ -689,6 +691,8 @@ TEST_CASE_TEMPLATE("[xsimd complex batches]", B, BATCH_COMPLEX_TYPES) SUBCASE("boolean_conversion") { Test.test_boolean_conversion(); } +#ifndef __FAST_MATH__ SUBCASE("isnan") { Test.test_isnan(); } +#endif } #endif diff --git a/test/test_power.cpp b/test/test_power.cpp index 4a81b0aa9..a2a425c8c 100644 --- a/test/test_power.cpp +++ b/test/test_power.cpp @@ -13,6 +13,7 @@ #ifndef XSIMD_NO_SUPPORTED_ARCHITECTURE #include "test_utils.hpp" +#include template struct power_test @@ -103,6 +104,7 @@ struct power_test CHECK_EQ(diff, 0); #endif } +#ifndef __FAST_MATH__ // pow 0^-x { std::transform(zero_input.cbegin(), zero_input.cend(), rhs_input.cbegin(), expected.begin(), @@ -120,17 +122,18 @@ struct power_test INFO("pow(0, -x)"); CHECK_EQ(diff, 0); } +#endif // ipow { long k = 0; std::transform(lhs_input.cbegin(), lhs_input.cend(), expected.begin(), [&k, this](const value_type& l) - { auto arg = k / size - nb_input / size / 2; ++k; return std::pow(l, arg); }); + { auto arg = k / size / 8000 - nb_input / size / 8000 / 2; ++k; return std::pow(l, arg); }); batch_type lhs_in, out; for (size_t i = 0; i < nb_input; i += size) { detail::load_batch(lhs_in, lhs_input, i); - out = pow(lhs_in, i / size - nb_input / size / 2); + out = pow(lhs_in, i / size / 8000 - nb_input / size / 8000 / 2); detail::store_batch(out, res, i); } size_t diff = detail::get_nb_diff(res, expected); diff --git a/test/test_utils.hpp b/test/test_utils.hpp index 80914f331..f295a8be6 100644 --- a/test/test_utils.hpp +++ b/test/test_utils.hpp @@ -181,6 +181,7 @@ namespace xsimd namespace detail { +#ifndef __FAST_MATH__ namespace utils { // define some overloads here as integer versions do not exist for msvc @@ -208,6 +209,7 @@ namespace detail return false; } } +#endif inline unsigned char uabs(unsigned char val) { @@ -289,7 +291,7 @@ namespace detail { return lhs == rhs; } - +#ifndef __FAST_MATH__ if (utils::isnan(lhs)) { return utils::isnan(rhs); @@ -299,6 +301,7 @@ namespace detail { return utils::isinf(rhs) && (lhs * rhs > 0) /* same sign */; } +#endif T relative_precision = precision_t::max * std::numeric_limits::epsilon(); T absolute_zero_prox = precision_t::max * std::numeric_limits::epsilon(); diff --git a/test/test_xsimd_api.cpp b/test/test_xsimd_api.cpp index ef41c709b..bea9f1b86 100644 --- a/test/test_xsimd_api.cpp +++ b/test/test_xsimd_api.cpp @@ -488,8 +488,12 @@ struct xsimd_api_float_types_functions } void test_atanh() { - value_type val(1); - CHECK_EQ(extract(xsimd::atanh(T(val))), std::atanh(val)); + value_type val0(0); + CHECK_EQ(extract(xsimd::atanh(T(val0))), std::atanh(val0)); +#ifndef __FAST_MATH__ + value_type val1(1); + CHECK_EQ(extract(xsimd::atanh(T(val1))), std::atanh(val1)); +#endif } void test_cbrt() { @@ -1044,6 +1048,7 @@ struct xsimd_api_complex_types_functions CHECK_EQ(extract(xsimd::proj(T(val))), std::proj(val)); } +#ifndef __FAST_MATH__ void test_isinf() { value_type val(4); @@ -1061,6 +1066,7 @@ struct xsimd_api_complex_types_functions value_type val(4); CHECK_EQ(extract(xsimd::isnan(T(val))), std::isnan(std::real(val))); } +#endif }; TEST_CASE_TEMPLATE("[xsimd api | complex types functions]", B, COMPLEX_TYPES) @@ -1086,6 +1092,8 @@ TEST_CASE_TEMPLATE("[xsimd api | complex types functions]", B, COMPLEX_TYPES) Test.test_proj(); } +#ifndef __FAST_MATH__ + SUBCASE("isinf") { Test.test_isinf(); @@ -1100,6 +1108,7 @@ TEST_CASE_TEMPLATE("[xsimd api | complex types functions]", B, COMPLEX_TYPES) { Test.test_isnan(); } +#endif } /* @@ -1194,7 +1203,7 @@ struct xsimd_api_all_types_functions void test_div() { value_type val0(1); - value_type val1(3); + value_type val1(2); CHECK_EQ(extract(xsimd::div(T(val0), T(val1))), val0 / val1); } @@ -1345,6 +1354,7 @@ TEST_CASE_TEMPLATE("[xsimd api | all types functions]", B, ALL_TYPES) /* * Functions that apply only to floating point types */ +#ifndef __FAST_MATH__ template struct xsimd_api_all_floating_point_types_functions { @@ -1363,6 +1373,7 @@ TEST_CASE_TEMPLATE("[xsimd api | all floating point types functions]", B, ALL_FL xsimd_api_all_floating_point_types_functions Test; Test.test_neq_nan(); } +#endif /* * Functions that apply only to mask type