Skip to content

Commit ea0d36b

Browse files
authored
Merge pull request #1394 from boostorg/opt_comp
Collected fundamental operation optimizations
2 parents 0b2cffe + 4e6e0f1 commit ea0d36b

15 files changed

Lines changed: 1551 additions & 192 deletions

.drone.jsonnet

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -435,8 +435,14 @@ local windows_pipeline(name, image, environment, arch = "amd64") =
435435
),
436436

437437
windows_pipeline(
438-
"Windows VS2026 msvc-14.5",
438+
"Windows VS2026 msvc-14.5 64-bit",
439439
"cppalliance/dronevs2026:1",
440-
{ TOOLSET: 'msvc-14.5', CXXSTD: '14,17,20,latest', ADDRMD: '32,64' },
440+
{ TOOLSET: 'msvc-14.5', CXXSTD: '14,17,20,latest', ADDRMD: '64' },
441441
),
442+
443+
windows_pipeline(
444+
"Windows VS2026 msvc-14.5 32-bit",
445+
"cppalliance/dronevs2026:1",
446+
{ TOOLSET: 'msvc-14.5', CXXSTD: '14,17,20,latest', ADDRMD: '32' },
447+
),
442448
]

include/boost/decimal/decimal128_t.hpp

Lines changed: 125 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -598,6 +598,41 @@ BOOST_DECIMAL_CUDA_CONSTEXPR auto from_bits(const int128::uint128_t rhs) noexcep
598598
return result;
599599
}
600600

601+
namespace detail {
602+
603+
// IEEE-pack a known-in-range (coeff, exp, sign) triple into a decimal128_t,
604+
// skipping the constructor's bounds check + dead-branch handling. For d128
605+
// the precision (34 digits, 113 bits) always fits in not_11_significand_mask,
606+
// so no combination-field branch is needed. Caller guarantees
607+
// coeff <= d128_max_significand_value and (exp + bias) is in [0, d128_max_biased_exponent].
608+
template <typename T2>
609+
BOOST_DECIMAL_CUDA_CONSTEXPR auto direct_pack_d128(int128::uint128_t coeff, T2 exp, bool sign) noexcept -> decimal128_t
610+
{
611+
const auto biased_exp {static_cast<std::uint64_t>(static_cast<int>(exp) + bias_v<decimal128_t>)};
612+
int128::uint128_t bits {coeff & d128_not_11_significand_mask};
613+
bits.high |= (sign ? d128_sign_mask : UINT64_C(0));
614+
bits.high |= (biased_exp << d128_not_11_exp_high_word_shift) & d128_not_11_exp_mask;
615+
return from_bits(bits);
616+
}
617+
618+
// Definition of the pack_in_range<decimal128_t> overload declared in
619+
// add_impl.hpp. Lives here so the `decimal128_t{coeff, exp, sign}` fallback
620+
// is parsed only after decimal128_t is complete (see add_impl.hpp rationale).
621+
template <typename ReturnType, typename SigType, typename ExpType>
622+
BOOST_DECIMAL_CUDA_CONSTEXPR auto pack_in_range(SigType coeff, ExpType exp, bool sign) noexcept
623+
-> std::enable_if_t<std::is_same<ReturnType, decimal128_t>::value, decimal128_t>
624+
{
625+
const auto biased_exp_check {static_cast<int>(exp) + bias_v<decimal128_t>};
626+
if (BOOST_DECIMAL_LIKELY(biased_exp_check >= 0
627+
&& biased_exp_check <= static_cast<int>(max_biased_exp_v<decimal128_t>)))
628+
{
629+
return direct_pack_d128(static_cast<int128::uint128_t>(coeff), exp, sign);
630+
}
631+
return decimal128_t{coeff, exp, sign};
632+
}
633+
634+
} // namespace detail
635+
601636
BOOST_DECIMAL_CUDA_CONSTEXPR auto decimal128_t::unbiased_exponent() const noexcept -> exponent_type
602637
{
603638
exponent_type expval {};
@@ -819,9 +854,11 @@ BOOST_DECIMAL_CUDA_CONSTEXPR decimal128_t::decimal128_t(T1 coeff, T2 exp, const
819854
}
820855
else if (digit_delta > 0 && coeff_digits + digit_delta <= detail::precision_v<decimal128_t>)
821856
{
857+
// Same overflow-fold pattern as d32/d64: post-shift coeff is <= max_significand_v
858+
// and biased_exp lands in [0, max], so pack_in_range routes to direct_pack.
822859
exp -= digit_delta;
823860
reduced_coeff *= detail::pow10(static_cast<significand_type>(digit_delta));
824-
*this = decimal128_t(reduced_coeff, exp, is_negative);
861+
*this = detail::pack_in_range<decimal128_t>(reduced_coeff, exp, is_negative);
825862
}
826863
else if (coeff_digits + biased_exp <= detail::precision_v<decimal128_t>)
827864
{
@@ -856,10 +893,12 @@ BOOST_DECIMAL_CUDA_CONSTEXPR decimal128_t::decimal128_t(T1 coeff, T2 exp, const
856893
}
857894
else if (digit_delta < 0 && coeff_digits - digit_delta <= detail::precision_v<decimal128_t>)
858895
{
896+
// Expand to use the full precision; biased_exp ends up in [0, max] and
897+
// coeff <= max_significand_v. pack_in_range routes to direct_pack.
859898
const auto offset {detail::precision_v<decimal128_t> - coeff_digits};
860899
exp -= offset;
861900
reduced_coeff *= detail::pow10(static_cast<significand_type>(offset));
862-
*this = decimal128_t(reduced_coeff, exp, is_negative);
901+
*this = detail::pack_in_range<decimal128_t>(reduced_coeff, exp, is_negative);
863902
}
864903
else
865904
{
@@ -1337,6 +1376,11 @@ BOOST_DECIMAL_CUDA_CONSTEXPR auto operator<(const decimal128_t& lhs, const decim
13371376
}
13381377
#endif
13391378

1379+
if (BOOST_DECIMAL_UNLIKELY(lhs.bits_ == rhs.bits_))
1380+
{
1381+
return false;
1382+
}
1383+
13401384
return less_parts_impl<decimal128_t>(lhs.full_significand(), lhs.biased_exponent(), lhs.isneg(),
13411385
rhs.full_significand(), rhs.biased_exponent(), rhs.isneg());
13421386
}
@@ -1469,16 +1513,17 @@ BOOST_DECIMAL_CUDA_CONSTEXPR auto operator<=>(const decimal128_t& lhs, const dec
14691513
{
14701514
return std::partial_ordering::less;
14711515
}
1472-
else if (lhs > rhs)
1516+
if (rhs < lhs)
14731517
{
14741518
return std::partial_ordering::greater;
14751519
}
1476-
else if (lhs == rhs)
1520+
#ifndef BOOST_DECIMAL_FAST_MATH
1521+
if (isnan(lhs) || isnan(rhs))
14771522
{
1478-
return std::partial_ordering::equivalent;
1523+
return std::partial_ordering::unordered;
14791524
}
1480-
1481-
return std::partial_ordering::unordered;
1525+
#endif
1526+
return std::partial_ordering::equivalent;
14821527
}
14831528

14841529
template <typename Integer>
@@ -1682,11 +1727,45 @@ BOOST_DECIMAL_CUDA_CONSTEXPR auto operator+(const decimal128_t& lhs, const decim
16821727
{
16831728
return from_bits(detail::d128_nan_mask);
16841729
}
1685-
1730+
16861731
return detail::check_non_finite(lhs, rhs);
16871732
}
16881733
#endif
16891734

1735+
// Two fast paths (see decimal64_t.hpp:operator+ for full explanation).
1736+
// Both gated on non-zero operands so zero short-circuit logic is preserved
1737+
// by falling through to d128_add_impl_new.
1738+
{
1739+
const auto lhs_sig {lhs.full_significand()};
1740+
const auto rhs_sig {rhs.full_significand()};
1741+
if (BOOST_DECIMAL_LIKELY(lhs_sig != 0U && rhs_sig != 0U))
1742+
{
1743+
const auto lhs_exp {lhs.biased_exponent()};
1744+
const auto rhs_exp {rhs.biased_exponent()};
1745+
const auto exp_diff {lhs_exp > rhs_exp ? lhs_exp - rhs_exp : rhs_exp - lhs_exp};
1746+
if (exp_diff > 75 || exp_diff <= 3)
1747+
{
1748+
auto round {_boost_decimal_global_rounding_mode};
1749+
#ifndef BOOST_DECIMAL_NO_CONSTEVAL_DETECTION
1750+
if (!BOOST_DECIMAL_IS_CONSTANT_EVALUATED(lhs))
1751+
{
1752+
round = _boost_decimal_global_runtime_rounding_mode;
1753+
}
1754+
#endif
1755+
if (BOOST_DECIMAL_LIKELY(round == rounding_mode::fe_dec_to_nearest))
1756+
{
1757+
if (exp_diff > 75)
1758+
{
1759+
return lhs_exp > rhs_exp ? lhs : rhs;
1760+
}
1761+
return detail::aligned_add_kernel<decimal128_t, int128::uint128_t>(
1762+
lhs_sig, rhs_sig, lhs_exp, rhs_exp, static_cast<unsigned>(exp_diff),
1763+
lhs.isneg(), rhs.isneg());
1764+
}
1765+
}
1766+
}
1767+
}
1768+
16901769
auto lhs_components {lhs.to_components()};
16911770
detail::expand_significand<decimal128_t>(lhs_components.sig, lhs_components.exp);
16921771
auto rhs_components {rhs.to_components()};
@@ -1741,11 +1820,45 @@ BOOST_DECIMAL_CUDA_CONSTEXPR auto operator-(const decimal128_t& lhs, const decim
17411820
{
17421821
return -rhs;
17431822
}
1744-
1823+
17451824
return detail::check_non_finite(lhs, rhs);
17461825
}
17471826
#endif
17481827

1828+
// Two fast paths; see operator+ above. Both gated on non-zero operands so
1829+
// zero short-circuit logic is preserved by falling through. For operator-,
1830+
// rhs sign is flipped before kernel dispatch.
1831+
{
1832+
const auto lhs_sig {lhs.full_significand()};
1833+
const auto rhs_sig {rhs.full_significand()};
1834+
if (BOOST_DECIMAL_LIKELY(lhs_sig != 0U && rhs_sig != 0U))
1835+
{
1836+
const auto lhs_exp {lhs.biased_exponent()};
1837+
const auto rhs_exp {rhs.biased_exponent()};
1838+
const auto exp_diff {lhs_exp > rhs_exp ? lhs_exp - rhs_exp : rhs_exp - lhs_exp};
1839+
if (exp_diff > 75 || exp_diff <= 3)
1840+
{
1841+
auto round {_boost_decimal_global_rounding_mode};
1842+
#ifndef BOOST_DECIMAL_NO_CONSTEVAL_DETECTION
1843+
if (!BOOST_DECIMAL_IS_CONSTANT_EVALUATED(lhs))
1844+
{
1845+
round = _boost_decimal_global_runtime_rounding_mode;
1846+
}
1847+
#endif
1848+
if (BOOST_DECIMAL_LIKELY(round == rounding_mode::fe_dec_to_nearest))
1849+
{
1850+
if (exp_diff > 75)
1851+
{
1852+
return lhs_exp > rhs_exp ? lhs : -rhs;
1853+
}
1854+
return detail::aligned_add_kernel<decimal128_t, int128::uint128_t>(
1855+
lhs_sig, rhs_sig, lhs_exp, rhs_exp, static_cast<unsigned>(exp_diff),
1856+
lhs.isneg(), !rhs.isneg());
1857+
}
1858+
}
1859+
}
1860+
}
1861+
17491862
auto lhs_components {lhs.to_components()};
17501863
detail::expand_significand<decimal128_t>(lhs_components.sig, lhs_components.exp);
17511864
auto rhs_components {rhs.to_components()};
@@ -1861,14 +1974,11 @@ BOOST_DECIMAL_CUDA_CONSTEXPR auto operator*(const decimal128_t lhs, const Intege
18611974

18621975
auto lhs_sig {lhs.full_significand()};
18631976
auto lhs_exp {lhs.biased_exponent()};
1864-
const auto lhs_zeros {detail::remove_trailing_zeros(lhs_sig)};
1865-
lhs_sig = lhs_zeros.trimmed_number;
1866-
lhs_exp += static_cast<std::int32_t>(lhs_zeros.number_of_removed_zeros);
1977+
detail::expand_significand<decimal128_t>(lhs_sig, lhs_exp);
18671978

18681979
auto rhs_sig {static_cast<int128::uint128_t>(detail::make_positive_unsigned(rhs))};
1869-
const auto rhs_zeros {detail::remove_trailing_zeros(rhs_sig)};
1870-
rhs_sig = rhs_zeros.trimmed_number;
1871-
const auto rhs_exp = static_cast<exp_type>(rhs_zeros.number_of_removed_zeros);
1980+
exp_type rhs_exp {0};
1981+
detail::normalize<decimal128_t>(rhs_sig, rhs_exp);
18721982

18731983
return detail::d128_mul_impl<decimal128_t>(
18741984
lhs_sig, lhs_exp, lhs.isneg(),

0 commit comments

Comments
 (0)