Skip to content

Commit 0f962d3

Browse files
authored
Merge pull request #218 from cppalliance/bench
Improve 128-bit mul performance
2 parents 63c99da + 737543f commit 0f962d3

3 files changed

Lines changed: 86 additions & 0 deletions

File tree

include/boost/safe_numbers/detail/signed_integer_basis.hpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1463,6 +1463,13 @@ BOOST_SAFE_NUMBERS_HOST_DEVICE constexpr auto signed_underflow_mul_msg() noexcep
14631463
}
14641464
}
14651465

1466+
// clang lowers signed __builtin_mul_overflow on __int128 to __muloti4 (compiler-rt
1467+
// only), which is missing when linking libgcc on clang-13. Limit the fast path to
1468+
// GCC and clang >= 14; everything else uses signed_no_intrin_mul for int128.
1469+
#if BOOST_SAFE_NUMBERS_HAS_BUILTIN(__builtin_mul_overflow) && defined(BOOST_SAFE_NUMBERS_DETAIL_INT128_HAS_INT128) && (!defined(__clang__) || __clang_major__ >= 14)
1470+
# define BOOST_SAFE_NUMBERS_HAS_INT128_SIGNED_INTRIN_MUL
1471+
#endif
1472+
14661473
#if BOOST_SAFE_NUMBERS_HAS_BUILTIN(__builtin_mul_overflow)
14671474

14681475
template <std::signed_integral T>
@@ -1476,6 +1483,24 @@ auto signed_intrin_mul(const T lhs, const T rhs, T& result) -> signed_overflow_s
14761483
return signed_overflow_status::no_error;
14771484
}
14781485

1486+
#ifdef BOOST_SAFE_NUMBERS_HAS_INT128_SIGNED_INTRIN_MUL
1487+
1488+
inline auto signed_intrin_mul(const int128::int128_t lhs, const int128::int128_t rhs, int128::int128_t& result) -> signed_overflow_status
1489+
{
1490+
__int128_t builtin_result;
1491+
const auto overflow {__builtin_mul_overflow(static_cast<__int128_t>(lhs), static_cast<__int128_t>(rhs), &builtin_result)};
1492+
result = builtin_result;
1493+
1494+
if (overflow)
1495+
{
1496+
return (lhs >= 0) == (rhs >= 0) ? signed_overflow_status::overflow : signed_overflow_status::underflow;
1497+
}
1498+
1499+
return signed_overflow_status::no_error;
1500+
}
1501+
1502+
#endif // BOOST_SAFE_NUMBERS_HAS_INT128_SIGNED_INTRIN_MUL
1503+
14791504
#elif defined(BOOST_SAFENUMBERS_HAS_WINDOWS_X64_INTRIN) || defined(BOOST_SAFENUMBERS_HAS_WINDOWS_X86_INTRIN)
14801505

14811506
template <std::signed_integral T>
@@ -1765,7 +1790,11 @@ struct signed_mul_helper
17651790

17661791
#if BOOST_SAFE_NUMBERS_HAS_BUILTIN(__builtin_mul_overflow) || defined(BOOST_SAFENUMBERS_HAS_WINDOWS_X64_INTRIN) || defined(BOOST_SAFENUMBERS_HAS_WINDOWS_X86_INTRIN)
17671792

1793+
// Route int128 through the intrin path only where the signed 128-bit
1794+
// fast path links (GCC, clang >= 14); elsewhere it uses no_intrin.
1795+
#if !defined(BOOST_SAFE_NUMBERS_HAS_INT128_SIGNED_INTRIN_MUL)
17681796
if constexpr (!std::is_same_v<BasisType, int128::int128_t>)
1797+
#endif
17691798
{
17701799
#if !(defined(__CUDACC__) && defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA))
17711800

@@ -1812,7 +1841,11 @@ struct signed_mul_helper<overflow_policy::overflow_tuple, BasisType>
18121841

18131842
#if BOOST_SAFE_NUMBERS_HAS_BUILTIN(__builtin_mul_overflow) || defined(BOOST_SAFENUMBERS_HAS_WINDOWS_X64_INTRIN) || defined(BOOST_SAFENUMBERS_HAS_WINDOWS_X86_INTRIN)
18141843

1844+
// Route int128 through the intrin path only where the signed 128-bit
1845+
// fast path links (GCC, clang >= 14); elsewhere it uses no_intrin.
1846+
#if !defined(BOOST_SAFE_NUMBERS_HAS_INT128_SIGNED_INTRIN_MUL)
18151847
if constexpr (!std::is_same_v<BasisType, int128::int128_t>)
1848+
#endif
18161849
{
18171850
#if !(defined(__CUDACC__) && defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA))
18181851

@@ -1849,7 +1882,11 @@ struct signed_mul_helper<overflow_policy::checked, BasisType>
18491882

18501883
#if BOOST_SAFE_NUMBERS_HAS_BUILTIN(__builtin_mul_overflow) || defined(BOOST_SAFENUMBERS_HAS_WINDOWS_X64_INTRIN) || defined(BOOST_SAFENUMBERS_HAS_WINDOWS_X86_INTRIN)
18511884

1885+
// Route int128 through the intrin path only where the signed 128-bit
1886+
// fast path links (GCC, clang >= 14); elsewhere it uses no_intrin.
1887+
#if !defined(BOOST_SAFE_NUMBERS_HAS_INT128_SIGNED_INTRIN_MUL)
18521888
if constexpr (!std::is_same_v<BasisType, int128::int128_t>)
1889+
#endif
18531890
{
18541891
#if !(defined(__CUDACC__) && defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA))
18551892

include/boost/safe_numbers/detail/unsigned_integer_basis.hpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1252,6 +1252,18 @@ bool unsigned_intrin_mul(const T lhs, const T rhs, T& result)
12521252
return __builtin_mul_overflow(lhs, rhs, &result);
12531253
}
12541254

1255+
#ifdef BOOST_SAFE_NUMBERS_DETAIL_INT128_HAS_INT128
1256+
1257+
inline bool unsigned_intrin_mul(const int128::uint128_t lhs, const int128::uint128_t rhs, int128::uint128_t& result)
1258+
{
1259+
__uint128_t builtin_result;
1260+
const auto overflow {__builtin_mul_overflow(static_cast<__uint128_t>(lhs), static_cast<__uint128_t>(rhs), &builtin_result)};
1261+
result = builtin_result;
1262+
return overflow;
1263+
}
1264+
1265+
#endif
1266+
12551267
#elif defined(BOOST_SAFENUMBERS_HAS_WINDOWS_X64_INTRIN) && !(defined(__CUDACC__) && defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA))
12561268

12571269
template <std::unsigned_integral T>
@@ -1390,7 +1402,10 @@ struct mul_helper
13901402
}
13911403
};
13921404

1405+
// We have an intrin path, but only with __builtin_mul_overflow
1406+
#if BOOST_SAFE_NUMBERS_HAS_BUILTIN(_umul128) || !defined(BOOST_SAFE_NUMBERS_DETAIL_INT128_HAS_INT128)
13931407
if constexpr (!std::is_same_v<BasisType, int128::uint128_t>)
1408+
#endif
13941409
{
13951410
#if (BOOST_SAFE_NUMBERS_HAS_BUILTIN(__builtin_mul_overflow) || BOOST_SAFE_NUMBERS_HAS_BUILTIN(_umul128)) && !(defined(__CUDACC__) && defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA))
13961411

@@ -1431,7 +1446,10 @@ struct mul_helper<overflow_policy::overflow_tuple, BasisType>
14311446
const auto rhs_basis {static_cast<BasisType>(rhs)};
14321447
BasisType res {};
14331448

1449+
// We have an intrin path, but only with __builtin_mul_overflow
1450+
#if BOOST_SAFE_NUMBERS_HAS_BUILTIN(_umul128) || !defined(BOOST_SAFE_NUMBERS_DETAIL_INT128_HAS_INT128)
14341451
if constexpr (!std::is_same_v<BasisType, int128::uint128_t>)
1452+
#endif
14351453
{
14361454
#if (BOOST_SAFE_NUMBERS_HAS_BUILTIN(__builtin_mul_overflow) || BOOST_SAFE_NUMBERS_HAS_BUILTIN(_umul128)) && !(defined(__CUDACC__) && defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA))
14371455

@@ -1464,7 +1482,10 @@ struct mul_helper<overflow_policy::checked, BasisType>
14641482
const auto rhs_basis {static_cast<BasisType>(rhs)};
14651483
BasisType res {};
14661484

1485+
// We have an intrin path, but only with __builtin_mul_overflow
1486+
#if BOOST_SAFE_NUMBERS_HAS_BUILTIN(_umul128) || !defined(BOOST_SAFE_NUMBERS_DETAIL_INT128_HAS_INT128)
14671487
if constexpr (!std::is_same_v<BasisType, int128::uint128_t>)
1488+
#endif
14681489
{
14691490
#if (BOOST_SAFE_NUMBERS_HAS_BUILTIN(__builtin_mul_overflow) || BOOST_SAFE_NUMBERS_HAS_BUILTIN(_umul128)) && !(defined(__CUDACC__) && defined(BOOST_SAFE_NUMBERS_ENABLE_CUDA))
14701491

test/benchmarks/benchmark_float_operations.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,18 @@ auto benchmark_subtraction(const std::vector<T>& values, const char* type)
120120
return benchmark_op(values, std::minus<>(), type, "sub");
121121
}
122122

123+
template <typename T>
124+
auto benchmark_multiplication(const std::vector<T>& values, const char* type)
125+
{
126+
return benchmark_op(values, std::multiplies<>(), type, "mul");
127+
}
128+
129+
template <typename T>
130+
auto benchmark_division(const std::vector<T>& values, const char* type)
131+
{
132+
return benchmark_op(values, std::divides<>(), type, "div");
133+
}
134+
123135
#ifdef _MSC_VER
124136
#pragma optimize("", on)
125137
#endif
@@ -148,6 +160,14 @@ int main()
148160
builtin_runtime = benchmark_subtraction(builtin_values, "float");
149161
lib_runtime = benchmark_subtraction(lib_values, "boost::sn::f32");
150162
print_runtime_ratio(lib_runtime, builtin_runtime);
163+
164+
builtin_runtime = benchmark_multiplication(builtin_values, "float");
165+
lib_runtime = benchmark_multiplication(lib_values, "boost::sn::f32");
166+
print_runtime_ratio(lib_runtime, builtin_runtime);
167+
168+
builtin_runtime = benchmark_division(builtin_values, "float");
169+
lib_runtime = benchmark_division(lib_values, "boost::sn::f32");
170+
print_runtime_ratio(lib_runtime, builtin_runtime);
151171
}
152172
{
153173
std::cout << "\n64-bit Floats\n";
@@ -161,6 +181,14 @@ int main()
161181
builtin_runtime = benchmark_subtraction(builtin_values, "double");
162182
lib_runtime = benchmark_subtraction(lib_values, "boost::sn::f64");
163183
print_runtime_ratio(lib_runtime, builtin_runtime);
184+
185+
builtin_runtime = benchmark_multiplication(builtin_values, "double");
186+
lib_runtime = benchmark_multiplication(lib_values, "boost::sn::f64");
187+
print_runtime_ratio(lib_runtime, builtin_runtime);
188+
189+
builtin_runtime = benchmark_division(builtin_values, "double");
190+
lib_runtime = benchmark_division(lib_values, "boost::sn::f64");
191+
print_runtime_ratio(lib_runtime, builtin_runtime);
164192
}
165193

166194
#else

0 commit comments

Comments
 (0)