diff --git a/doc/modules/ROOT/nav.adoc b/doc/modules/ROOT/nav.adoc
index ceeee540..27492ba9 100644
--- a/doc/modules/ROOT/nav.adoc
+++ b/doc/modules/ROOT/nav.adoc
@@ -7,6 +7,7 @@
 ** xref:examples.adoc#examples_bit[`<bit>` support]
 ** xref:examples.adoc#examples_numeric[`<numeric>` support (Saturating Arithmetic)]
 ** xref:examples.adoc#examples_numeric_algorithms[`<numeric>` support (Numeric Algorithms)]
+** xref:examples.adoc#examples_checked[Checked Arithmetic]
 ** xref:examples.adoc#examples_mixed_sign[Mixed Signedness Arithmetic]
 ** xref:examples.adoc#examples_to_string[String Conversion (to_string)]
 ** xref:examples.adoc#examples_boost_math_random[Boost Math and Random Integration]
@@ -60,6 +61,9 @@
 * xref:string.adoc[]
 * xref:utilities.adoc[]
 ** xref:utilities.adoc#powm[Modular Exponentiation]
+** xref:utilities.adoc#ipow[Integer Power]
+** xref:utilities.adoc#isqrt[Integer Square Root]
+** xref:utilities.adoc#checked[Checked Arithmetic]
 * Benchmarks
 ** xref:u128_benchmarks.adoc[]
 *** xref:u128_benchmarks.adoc#u128_linux[Linux]
diff --git a/doc/modules/ROOT/pages/api_reference.adoc b/doc/modules/ROOT/pages/api_reference.adoc
index 2963d1a4..4a55dc25 100644
--- a/doc/modules/ROOT/pages/api_reference.adoc
+++ b/doc/modules/ROOT/pages/api_reference.adoc
@@ -286,6 +286,21 @@ Listed by analogous STL header.
 
 | xref:utilities.adoc#powm[`powm`]
 | Modular exponentiation `(base ^ exp) mod m`
+
+| xref:utilities.adoc#ipow[`ipow`]
+| Integer power `base ^ exp` (wraps modulo `2^128`)
+
+| xref:utilities.adoc#isqrt[`isqrt`]
+| Integer square root `floor(sqrt(n))`
+
+| xref:utilities.adoc#checked[`ckd_add`]
+| Checked addition (C23 `<stdckdint.h>` contract)
+
+| xref:utilities.adoc#checked[`ckd_sub`]
+| Checked subtraction (C23 `<stdckdint.h>` contract)
+
+| xref:utilities.adoc#checked[`ckd_mul`]
+| Checked multiplication (C23 `<stdckdint.h>` contract)
 |===
 
 [#api_macros]
diff --git a/doc/modules/ROOT/pages/examples.adoc b/doc/modules/ROOT/pages/examples.adoc
index 8378c30c..18620bbc 100644
--- a/doc/modules/ROOT/pages/examples.adoc
+++ b/doc/modules/ROOT/pages/examples.adoc
@@ -278,6 +278,38 @@ midpoint(-100, -50) = -75
 ----
 ====
 
+[#examples_checked]
+== Checked Arithmetic
+
+.This https://github.com/cppalliance/int128/blob/develop/examples/checked_arithmetic.cpp[example] demonstrates checked addition, subtraction, and multiplication following the C23 checked-integer contract
+====
+[source, c++]
+----
+include::example$checked_arithmetic.cpp[]
+----
+
+.Expected Output
+[listing]
+----
+=== Results That Fit ===
+ckd_add(20, 22): overflow=false, result=42
+
+=== Addition Overflow ===
+ckd_add(UINT128_MAX, 1): overflow=true, wrapped=0
+
+=== Subtraction Underflow ===
+ckd_sub(0, 1): overflow=true, wrapped=340282366920938463463374607431768211455
+
+=== Multiplication Overflow ===
+ckd_mul(INT128_MAX, 2): overflow=true, wrapped=-2
+ckd_mul(INT128_MIN, -1): overflow=true, wrapped=-170141183460469231731687303715884105728
+
+=== Mixed Types ===
+ckd_add<int64_t>(uint128_t{5}, int128_t{-3}): overflow=false, result=2
+ckd_mul<uint8_t>(20, 20): overflow=true, wrapped=144
+----
+====
+
 [#examples_mixed_sign]
 == Mixed Signedness Arithmetic
 
diff --git a/doc/modules/ROOT/pages/utilities.adoc b/doc/modules/ROOT/pages/utilities.adoc
index c633983d..959c87bb 100644
--- a/doc/modules/ROOT/pages/utilities.adoc
+++ b/doc/modules/ROOT/pages/utilities.adoc
@@ -60,6 +60,142 @@ Negative bases are reduced before exponentiation; `(std::numeric_limits<int128_t
 | `base == 0` and `exp > 0`
 | `0`
 
-| Signed overload with `m <= 0` or `exp < 0`
+| Signed overload with non-positive `m` or negative `exp`
 | `0` (modular exponentiation requires a positive modulus; a negative exponent would require a modular inverse, which this interface does not provide)
 |===
+
+[#ipow]
+== Integer Power
+
+Computes `base ^ exp` by exponentiation by squaring, with a non-negative 64-bit exponent.
+Unlike `powm` there is no modulus: the result is the true power reduced modulo `2^128`, which is the same rollover behavior as the library's `operator*`.
+`ipow(base, exp)` is therefore equivalent to multiplying `base` by itself `exp` times.
+
+[source, c++]
+----
+namespace boost {
+namespace int128 {
+
+BOOST_INT128_HOST_DEVICE constexpr uint128_t ipow(uint128_t base, std::uint64_t exp) noexcept;
+
+BOOST_INT128_HOST_DEVICE constexpr int128_t ipow(int128_t base, std::uint64_t exp) noexcept;
+
+} // namespace int128
+} // namespace boost
+----
+
+The exponent is unsigned, so negative powers (which are not integers) cannot be requested.
+Because the result wraps on overflow rather than saturating or reporting an error, `ipow` is appropriate when rollover semantics are intended.
+
+=== Special Cases
+
+[cols="1,1", options="header"]
+|===
+| Input | Result
+
+| `exp == 0`
+| `1` (including `ipow(0, 0) == 1`, following the conventional definition `0^0 == 1`)
+
+| `base == 0` and `exp > 0`
+| `0`
+
+| `base ^ exp` exceeds 128 bits
+| The low 128 bits of the true power, matching the rollover of `operator*`
+|===
+
+[#isqrt]
+== Integer Square Root
+
+Computes the integer square root `floor(sqrt(n))`: the largest integer `r` whose square does not exceed `n`.
+The computation runs entirely in integer arithmetic using Newton's method, so it is exact (no floating-point rounding) and usable in a `constexpr` context.
+
+[source, c++]
+----
+namespace boost {
+namespace int128 {
+
+BOOST_INT128_HOST_DEVICE constexpr uint128_t isqrt(uint128_t n) noexcept;
+
+BOOST_INT128_HOST_DEVICE constexpr int128_t isqrt(int128_t n) noexcept;
+
+} // namespace int128
+} // namespace boost
+----
+
+=== Special Cases
+
+[cols="1,1", options="header"]
+|===
+| Input | Result
+
+| `n < 0` (signed overload)
+| `0` (a real square root does not exist)
+
+| `n >= 0`
+| `floor(sqrt(n))`, the largest `r` whose square does not exceed `n` (so `isqrt(0) == 0` and `isqrt(1) == 1`)
+|===
+
+[#checked]
+== Checked Arithmetic
+
+`ckd_add`, `ckd_sub`, and `ckd_mul` implement the checked integer arithmetic interface introduced by C23's `<stdckdint.h>`, but without requiring a C23 toolchain; they are available in C++14 and later.
+
+Each function computes `a + b`, `a - b`, or `a * b` respectively, as if both operands were represented in a signed integer type with infinite range, and then converts that mathematical result to the type pointed to by `result`.
+The function returns `false` when `*result` correctly represents the mathematical result of the operation.
+Otherwise it returns `true`, and `*result` is set to the mathematical result wrapped around (reduced modulo `2^N`) to the width `N` of `*result`.
+`*result` is always written, whether or not the operation overflowed.
+
+[source, c++]
+----
+namespace boost {
+namespace int128 {
+
+template <typename T1, typename T2, typename T3>
+BOOST_INT128_HOST_DEVICE constexpr bool ckd_add(T1* result, T2 a, T3 b) noexcept;
+
+template <typename T1, typename T2, typename T3>
+BOOST_INT128_HOST_DEVICE constexpr bool ckd_sub(T1* result, T2 a, T3 b) noexcept;
+
+template <typename T1, typename T2, typename T3>
+BOOST_INT128_HOST_DEVICE constexpr bool ckd_mul(T1* result, T2 a, T3 b) noexcept;
+
+} // namespace int128
+} // namespace boost
+----
+
+The three type parameters are independent: the result type and the two operand types may differ in width and signedness.
+The operation always uses the exact mathematical value of each operand, so a negative signed value added to an unsigned value, or a product that needs up to 256 bits internally, is evaluated correctly.
+
+Following the C23 rules, `T1`, `T2`, and `T3` may be any integer type other than `bool`, plain `char`, an enumerated type, or a bit-precise (`_BitInt`) type.
+In addition to the standard and extended integer types, the library's `uint128_t` and `int128_t` are accepted.
+
+The following example exercises all three operations, including the wrap-around, the `INT128_MIN * -1` case, and the mixed-type behavior described above.
+
+.This https://github.com/cppalliance/int128/blob/develop/examples/checked_arithmetic.cpp[example] demonstrates checked addition, subtraction, and multiplication following the C23 checked-integer contract
+====
+[source, c++]
+----
+include::example$checked_arithmetic.cpp[]
+----
+
+.Expected Output
+[listing]
+----
+=== Results That Fit ===
+ckd_add(20, 22): overflow=false, result=42
+
+=== Addition Overflow ===
+ckd_add(UINT128_MAX, 1): overflow=true, wrapped=0
+
+=== Subtraction Underflow ===
+ckd_sub(0, 1): overflow=true, wrapped=340282366920938463463374607431768211455
+
+=== Multiplication Overflow ===
+ckd_mul(INT128_MAX, 2): overflow=true, wrapped=-2
+ckd_mul(INT128_MIN, -1): overflow=true, wrapped=-170141183460469231731687303715884105728
+
+=== Mixed Types ===
+ckd_add<int64_t>(uint128_t{5}, int128_t{-3}): overflow=false, result=2
+ckd_mul<uint8_t>(20, 20): overflow=true, wrapped=144
+----
+====
diff --git a/examples/checked_arithmetic.cpp b/examples/checked_arithmetic.cpp
new file mode 100644
index 00000000..55db9834
--- /dev/null
+++ b/examples/checked_arithmetic.cpp
@@ -0,0 +1,76 @@
+// Copyright 2026 Matt Borland
+// Distributed under the Boost Software License, Version 1.0.
+// https://www.boost.org/LICENSE_1_0.txt
+
+// Individual headers
+
+#include <boost/int128/utilities.hpp>
+#include <boost/int128/iostream.hpp>
+
+// Or you can do a single header
+
+// #include <boost/int128.hpp>
+
+#include <cstdint>
+#include <limits>
+#include <iostream>
+
+int main()
+{
+    using boost::int128::uint128_t;
+    using boost::int128::int128_t;
+    using boost::int128::ckd_add;
+    using boost::int128::ckd_sub;
+    using boost::int128::ckd_mul;
+
+    std::cout << std::boolalpha;
+
+    // ckd_add, ckd_sub, and ckd_mul implement the C23 stdckdint.h contract: the
+    // operation is evaluated as if both operands had infinite range, the result
+    // is written to *result wrapped to that type's width, and the function
+    // returns true when the exact result did not fit.
+    constexpr auto u_max {std::numeric_limits<uint128_t>::max()};
+    constexpr auto i_max {std::numeric_limits<int128_t>::max()};
+    constexpr auto i_min {std::numeric_limits<int128_t>::min()};
+
+    // A result that fits returns false and holds the exact value.
+    std::cout << "=== Results That Fit ===" << std::endl;
+    int128_t r {};
+    bool overflow {ckd_add(&r, int128_t{20}, int128_t{22})};
+    std::cout << "ckd_add(20, 22): overflow=" << overflow << ", result=" << r << std::endl;
+
+    // Addition that exceeds the type wraps modulo 2^128 and reports overflow.
+    std::cout << "\n=== Addition Overflow ===" << std::endl;
+    uint128_t u {};
+    overflow = ckd_add(&u, u_max, uint128_t{1});
+    std::cout << "ckd_add(UINT128_MAX, 1): overflow=" << overflow << ", wrapped=" << u << std::endl;
+
+    // Subtracting below zero in an unsigned type wraps to the top of the range.
+    std::cout << "\n=== Subtraction Underflow ===" << std::endl;
+    overflow = ckd_sub(&u, uint128_t{0}, uint128_t{1});
+    std::cout << "ckd_sub(0, 1): overflow=" << overflow << ", wrapped=" << u << std::endl;
+
+    // Multiplication detects overflow that operator* would silently roll over,
+    // including INT128_MIN * -1, whose true result is not representable.
+    std::cout << "\n=== Multiplication Overflow ===" << std::endl;
+    overflow = ckd_mul(&r, i_max, int128_t{2});
+    std::cout << "ckd_mul(INT128_MAX, 2): overflow=" << overflow << ", wrapped=" << r << std::endl;
+    overflow = ckd_mul(&r, i_min, int128_t{-1});
+    std::cout << "ckd_mul(INT128_MIN, -1): overflow=" << overflow << ", wrapped=" << r << std::endl;
+
+    // The result type and the two operand types are independent: they may differ
+    // in width and signedness, and the exact mathematical value is always used.
+    std::cout << "\n=== Mixed Types ===" << std::endl;
+    std::int64_t narrow {};
+    overflow = ckd_add(&narrow, uint128_t{5}, int128_t{-3});
+    std::cout << "ckd_add<int64_t>(uint128_t{5}, int128_t{-3}): overflow=" << overflow
+              << ", result=" << narrow << std::endl;
+
+    // Narrow targets make the wrap-around easy to see (400 modulo 256 is 144).
+    std::uint8_t byte {};
+    overflow = ckd_mul(&byte, std::uint8_t{20}, std::uint8_t{20});
+    std::cout << "ckd_mul<uint8_t>(20, 20): overflow=" << overflow
+              << ", wrapped=" << static_cast<int>(byte) << std::endl;
+
+    return 0;
+}
diff --git a/include/boost/int128/utilities.hpp b/include/boost/int128/utilities.hpp
index 0464b548..ffb29b66 100644
--- a/include/boost/int128/utilities.hpp
+++ b/include/boost/int128/utilities.hpp
@@ -12,6 +12,8 @@
 #ifndef BOOST_INT128_BUILD_MODULE
 
 #include <cstdint>
+#include <limits>
+#include <type_traits>
 
 #endif
 
@@ -253,6 +255,183 @@ BOOST_INT128_EXPORT BOOST_INT128_HOST_DEVICE constexpr int128_t isqrt(const int1
     return static_cast<int128_t>(isqrt(static_cast<uint128_t>(n)));
 }
 
+namespace detail {
+
+// The C23 checked integer macros accept any integer type for their operands
+// except bool, plain char, enumerated types, and bit-precise (_BitInt) types.
+template <typename T>
+struct valid_checked_type : std::integral_constant<bool, std::is_integral<T>::value &&
+                                                         !std::is_same<T, bool>::value &&
+                                                         !std::is_same<T, char>::value> {};
+
+template <>
+struct valid_checked_type<int128_t> : std::true_type {};
+
+template <>
+struct valid_checked_type<uint128_t> : std::true_type {};
+
+// Widen an integer operand to its 128-bit two's complement bit pattern, returned as a uint128_t
+template <typename T>
+BOOST_INT128_HOST_DEVICE constexpr uint128_t ckd_widen(const T value) noexcept
+{
+    BOOST_INT128_IF_CONSTEXPR (std::numeric_limits<T>::is_signed)
+    {
+        return static_cast<uint128_t>(static_cast<int128_t>(value));
+    }
+    else
+    {
+        return static_cast<uint128_t>(value);
+    }
+}
+
+// Sign and magnitude of an operand together with its 128-bit two's complement
+// image. magnitude is the absolute value; negative records the sign.
+struct ckd_operand
+{
+    uint128_t raw;
+    uint128_t magnitude;
+    bool negative;
+};
+
+template <typename T>
+BOOST_INT128_HOST_DEVICE constexpr ckd_operand ckd_decompose(const T value) noexcept
+{
+    const uint128_t raw {ckd_widen(value)};
+    const bool negative {std::numeric_limits<T>::is_signed && ((raw >> 127) != 0U)};
+    return ckd_operand{raw, negative ? uint128_t{0} - raw : raw, negative};
+}
+
+// Exact signed sum of two operands given as (magnitude, sign). carry marks a
+// 129th bit, which no 128-bit or narrower target can represent.
+struct ckd_sum_result
+{
+    uint128_t magnitude;
+    bool negative;
+    bool carry;
+};
+
+BOOST_INT128_HOST_DEVICE constexpr ckd_sum_result ckd_signed_sum(const uint128_t a_magnitude, const bool a_negative,
+                                                                 const uint128_t b_magnitude, const bool b_negative) noexcept
+{
+    if (a_negative == b_negative)
+    {
+        // Equal signs: magnitudes add and may overflow into a 129th bit.
+        const uint128_t magnitude {a_magnitude + b_magnitude};
+        return ckd_sum_result{magnitude, a_negative, magnitude < a_magnitude};
+    }
+
+    // Opposite signs: the smaller magnitude is subtracted and never carries.
+    if (a_magnitude >= b_magnitude)
+    {
+        return ckd_sum_result{a_magnitude - b_magnitude, a_negative, false};
+    }
+
+    return ckd_sum_result{b_magnitude - a_magnitude, b_negative, false};
+}
+
+// Whether a result of the given sign and magnitude fits in T1. exceeds_width
+// forces overflow when the true magnitude does not even fit in 128 bits.
+template <typename T1>
+BOOST_INT128_HOST_DEVICE constexpr bool ckd_overflows(const uint128_t magnitude, const bool negative, const bool exceeds_width) noexcept
+{
+    if (exceeds_width)
+    {
+        return true;
+    }
+
+    const uint128_t max_magnitude {static_cast<uint128_t>((std::numeric_limits<T1>::max)())};
+
+    if (negative)
+    {
+        const uint128_t min_magnitude {std::numeric_limits<T1>::is_signed ? max_magnitude + uint128_t{1} : uint128_t{0}};
+        return magnitude > min_magnitude;
+    }
+
+    return magnitude > max_magnitude;
+}
+
+} // namespace detail
+
+// Checked addition following the C23 <stdckdint.h> ckd_add contract.
+//
+// Computes a + b as if both operands were represented in a signed integer
+// type of infinite range and then converts that exact result to the type
+// pointed to by result. *result always receives the exact result wrapped
+// around to the width of *result. Returns false when *result represents the
+// exact mathematical sum, and true when the sum did not fit and wrap-around
+// occurred.
+BOOST_INT128_EXPORT template <typename T1, typename T2, typename T3>
+BOOST_INT128_HOST_DEVICE constexpr bool ckd_add(T1* result, const T2 a, const T3 b) noexcept
+{
+    static_assert(detail::valid_checked_type<T1>::value &&
+                  detail::valid_checked_type<T2>::value &&
+                  detail::valid_checked_type<T3>::value,
+                  "ckd_add operands must be integer types other than bool and plain char.");
+
+    const auto op_a {detail::ckd_decompose(a)};
+    const auto op_b {detail::ckd_decompose(b)};
+
+    // The modular sum of the widened images is the exact sum mod 2^128, which
+    // is all the wrapped result needs for any target no wider than 128 bits.
+    *result = static_cast<T1>(op_a.raw + op_b.raw);
+
+    const auto sum {detail::ckd_signed_sum(op_a.magnitude, op_a.negative, op_b.magnitude, op_b.negative)};
+    return detail::ckd_overflows<T1>(sum.magnitude, sum.negative, sum.carry);
+}
+
+// Checked subtraction following the C23 <stdckdint.h> ckd_sub contract.
+//
+// Behaves as ckd_add for a - b: *result receives the exact difference wrapped
+// to its width, and the return value reports whether that difference did not
+// fit.
+BOOST_INT128_EXPORT template <typename T1, typename T2, typename T3>
+BOOST_INT128_HOST_DEVICE constexpr bool ckd_sub(T1* result, const T2 a, const T3 b) noexcept
+{
+    static_assert(detail::valid_checked_type<T1>::value &&
+                  detail::valid_checked_type<T2>::value &&
+                  detail::valid_checked_type<T3>::value,
+                  "ckd_sub operands must be integer types other than bool and plain char.");
+
+    const auto op_a {detail::ckd_decompose(a)};
+    const auto op_b {detail::ckd_decompose(b)};
+
+    *result = static_cast<T1>(op_a.raw - op_b.raw);
+
+    // a - b is a + (-b): negating b flips its sign while keeping its magnitude.
+    const auto difference {detail::ckd_signed_sum(op_a.magnitude, op_a.negative, op_b.magnitude, !op_b.negative)};
+    return detail::ckd_overflows<T1>(difference.magnitude, difference.negative, difference.carry);
+}
+
+// Checked multiplication following the C23 <stdckdint.h> ckd_mul contract.
+//
+// Computes a * b as if both operands had infinite range, stores the result
+// wrapped to the width of *result, and returns true when the exact product did
+// not fit.
+BOOST_INT128_EXPORT template <typename T1, typename T2, typename T3>
+BOOST_INT128_HOST_DEVICE constexpr bool ckd_mul(T1* result, const T2 a, const T3 b) noexcept
+{
+    static_assert(detail::valid_checked_type<T1>::value &&
+                  detail::valid_checked_type<T2>::value &&
+                  detail::valid_checked_type<T3>::value,
+                  "ckd_mul operands must be integer types other than bool and plain char.");
+
+    const auto op_a {detail::ckd_decompose(a)};
+    const auto op_b {detail::ckd_decompose(b)};
+
+    *result = static_cast<T1>(op_a.raw * op_b.raw);
+
+    // The product magnitude needs more than 128 bits exactly when it exceeds
+    // UINT128_MAX. Dividing the maximum by one magnitude tests that without
+    // forming a 256-bit product.
+    const bool exceeds_width {op_a.magnitude != 0U &&
+                              op_b.magnitude > ((std::numeric_limits<uint128_t>::max)() / op_a.magnitude)};
+
+    const uint128_t product_magnitude {op_a.magnitude * op_b.magnitude};
+    const bool product_negative {op_a.negative != op_b.negative};
+
+    return detail::ckd_overflows<T1>(product_magnitude, product_negative, exceeds_width);
+}
+
 } // namespace int128
 } // namespace boost
 
diff --git a/test/Jamfile b/test/Jamfile
index 25e598d7..cae51584 100644
--- a/test/Jamfile
+++ b/test/Jamfile
@@ -82,6 +82,7 @@ run test_midpoint.cpp ;
 run test_powm.cpp ;
 run test_ipow.cpp ;
 run test_isqrt.cpp ;
+run test_ckd.cpp ;
 
 run test_format.cpp ;
 run test_fmt_format.cpp ;
@@ -99,6 +100,7 @@ run test_hash.cpp : : : <toolset>msvc:<cxxflags>/wd4324 ;
 run ../examples/construction.cpp ;
 run ../examples/bit.cpp ;
 run ../examples/saturating_arithmetic.cpp ;
+run ../examples/checked_arithmetic.cpp ;
 run ../examples/mixed_type_arithmetic.cpp ;
 run ../examples/stream.cpp ;
 run ../examples/basic_arithmetic.cpp ;
diff --git a/test/test_ckd.cpp b/test/test_ckd.cpp
new file mode 100644
index 00000000..35b2a84c
--- /dev/null
+++ b/test/test_ckd.cpp
@@ -0,0 +1,548 @@
+// Copyright 2026 Matt Borland
+// Distributed under the Boost Software License, Version 1.0.
+// https://www.boost.org/LICENSE_1_0.txt
+
+#include <boost/int128.hpp>
+#include <boost/core/lightweight_test.hpp>
+#include <random>
+#include <cstdint>
+#include <limits>
+
+using boost::int128::ckd_add;
+using boost::int128::ckd_sub;
+using boost::int128::ckd_mul;
+using boost::int128::int128_t;
+using boost::int128::uint128_t;
+
+constexpr std::size_t N {4096};
+static std::mt19937_64 rng {42};
+static std::uniform_int_distribution<std::uint64_t> dist {0, UINT64_MAX};
+
+// Small magnitudes exercise the no-overflow path for narrow targets, where a
+// purely full-range distribution would almost always overflow.
+static std::uniform_int_distribution<int> small_dist {-1000, 1000};
+
+//
+// Oracle-based testing for the standard integer types. Addition and subtraction
+// are checked against __builtin_add_overflow / __builtin_sub_overflow, which
+// implement the C23 contract exactly (exact result, wrapped into the
+// destination, true on overflow) and so are an independent reference.
+//
+// Multiplication uses a hand-rolled reference instead. __builtin_mul_overflow
+// returns the wrong result for signed operands with an unsigned destination on
+// GCC 7, and on Clang it lowers a 128-bit checked multiply to __muloti4, a
+// compiler-rt symbol that is not always linked. ref_std_mul_overflow forms the
+// exact product from 32-bit limbs (no 128-bit type, no runtime helper) so it is
+// correct and links on every supported toolchain.
+//
+#if defined(__GNUC__) || defined(__clang__)
+
+// 64x64 -> 128 bit unsigned product, returned as hi:lo, built from 32-bit limbs.
+// This needs neither a 128-bit type nor a runtime helper such as __muloti4, so
+// it links on every target including 32-bit ones.
+static void mul_64_to_128(const std::uint64_t a, const std::uint64_t b,
+                          std::uint64_t& hi, std::uint64_t& lo) noexcept
+{
+    const std::uint64_t mask {UINT64_C(0xFFFFFFFF)};
+    const std::uint64_t a0 {a & mask};
+    const std::uint64_t a1 {a >> 32};
+    const std::uint64_t b0 {b & mask};
+    const std::uint64_t b1 {b >> 32};
+
+    const std::uint64_t p00 {a0 * b0};
+    const std::uint64_t p01 {a0 * b1};
+    const std::uint64_t p10 {a1 * b0};
+    const std::uint64_t p11 {a1 * b1};
+
+    const std::uint64_t mid {(p00 >> 32) + (p01 & mask) + (p10 & mask)};
+    lo = (p00 & mask) | (mid << 32);
+    hi = p11 + (p01 >> 32) + (p10 >> 32) + (mid >> 32);
+}
+
+// Signedness usable for the standard integer types and, through the
+// specializations in the 128-bit section below, the native extended types.
+template <typename T>
+struct oracle_is_signed : std::is_signed<T> {};
+
+template <typename T, std::enable_if_t<oracle_is_signed<T>::value, int> = 0>
+std::uint64_t std_magnitude(const T value, bool& negative) noexcept
+{
+    negative = value < 0;
+    const std::uint64_t image {static_cast<std::uint64_t>(value)};
+    return negative ? (std::uint64_t{0} - image) : image;
+}
+
+template <typename T, std::enable_if_t<!oracle_is_signed<T>::value, int> = 0>
+std::uint64_t std_magnitude(const T value, bool& negative) noexcept
+{
+    negative = false;
+    return static_cast<std::uint64_t>(value);
+}
+
+template <typename R, std::enable_if_t<!oracle_is_signed<R>::value, int> = 0>
+bool oracle_overflows_std(const std::uint64_t magnitude, const bool negative) noexcept
+{
+    const std::uint64_t r_max {static_cast<std::uint64_t>((std::numeric_limits<R>::max)())};
+    return negative ? (magnitude != 0U) : (magnitude > r_max);
+}
+
+template <typename R, std::enable_if_t<oracle_is_signed<R>::value, int> = 0>
+bool oracle_overflows_std(const std::uint64_t magnitude, const bool negative) noexcept
+{
+    const std::uint64_t r_max {static_cast<std::uint64_t>((std::numeric_limits<R>::max)())};
+    const std::uint64_t min_magnitude {r_max + 1U};
+    return negative ? (magnitude > min_magnitude) : (magnitude > r_max);
+}
+
+// Independent reference for the C23 ckd_mul contract on the standard integer
+// types: forms the exact product, wraps it into *r, and reports whether the
+// destination cannot represent the exact value.
+template <typename A, typename B, typename R>
+bool ref_std_mul_overflow(const A a, const B b, R* r) noexcept
+{
+    bool a_negative {};
+    bool b_negative {};
+    const std::uint64_t a_magnitude {std_magnitude(a, a_negative)};
+    const std::uint64_t b_magnitude {std_magnitude(b, b_negative)};
+
+    std::uint64_t hi {};
+    std::uint64_t lo {};
+    mul_64_to_128(a_magnitude, b_magnitude, hi, lo);
+
+    const bool negative {a_negative != b_negative};
+    const std::uint64_t wrapped {negative ? (std::uint64_t{0} - lo) : lo};
+    *r = static_cast<R>(wrapped);
+
+    if (hi != 0U)
+    {
+        return true;
+    }
+
+    return oracle_overflows_std<R>(lo, negative);
+}
+
+template <typename T1, typename T2, typename T3, typename Ref, typename Ckd>
+void check_op(const T2 a, const T3 b, Ref ref_overflow, Ckd ckd_overflow)
+{
+    T1 expected {};
+    const bool expected_overflow {ref_overflow(a, b, &expected)};
+
+    T1 got {};
+    const bool got_overflow {ckd_overflow(&got, a, b)};
+
+    BOOST_TEST_EQ(got_overflow, expected_overflow);
+    BOOST_TEST(got == expected);
+}
+
+template <typename T1, typename T2, typename T3, typename Ref, typename Ckd>
+void fuzz_op(Ref ref_overflow, Ckd ckd_overflow)
+{
+    for (std::size_t i {0}; i < N; ++i)
+    {
+        check_op<T1, T2, T3>(static_cast<T2>(dist(rng)),       static_cast<T3>(dist(rng)),       ref_overflow, ckd_overflow);
+        check_op<T1, T2, T3>(static_cast<T2>(small_dist(rng)), static_cast<T3>(small_dist(rng)), ref_overflow, ckd_overflow);
+        check_op<T1, T2, T3>(static_cast<T2>(dist(rng)),       static_cast<T3>(small_dist(rng)), ref_overflow, ckd_overflow);
+        check_op<T1, T2, T3>(static_cast<T2>(small_dist(rng)), static_cast<T3>(dist(rng)),       ref_overflow, ckd_overflow);
+    }
+}
+
+template <typename Ref, typename Ckd>
+void fuzz_all_triples(Ref ref_overflow, Ckd ckd_overflow)
+{
+    fuzz_op<std::int32_t,  std::int32_t,  std::int32_t >(ref_overflow, ckd_overflow);
+    fuzz_op<std::uint32_t, std::uint32_t, std::uint32_t>(ref_overflow, ckd_overflow);
+    fuzz_op<std::int8_t,   std::int32_t,  std::int32_t >(ref_overflow, ckd_overflow);
+    fuzz_op<std::uint8_t,  std::int32_t,  std::int32_t >(ref_overflow, ckd_overflow);
+    fuzz_op<std::int16_t,  std::int16_t,  std::uint16_t>(ref_overflow, ckd_overflow);
+    fuzz_op<std::int64_t,  std::int32_t,  std::uint32_t>(ref_overflow, ckd_overflow);
+    fuzz_op<std::uint64_t, std::int64_t,  std::int64_t >(ref_overflow, ckd_overflow);
+    fuzz_op<std::int32_t,  std::int64_t,  std::int64_t >(ref_overflow, ckd_overflow);
+    fuzz_op<std::uint32_t, std::int8_t,   std::int8_t  >(ref_overflow, ckd_overflow);
+    fuzz_op<std::int64_t,  std::uint64_t, std::uint64_t>(ref_overflow, ckd_overflow);
+    fuzz_op<std::uint16_t, std::int64_t,  std::int32_t >(ref_overflow, ckd_overflow);
+}
+
+void test_standard_oracle()
+{
+    fuzz_all_triples(
+        [](auto a, auto b, auto* r) { return __builtin_add_overflow(a, b, r); },
+        [](auto* r, auto a, auto b) { return ckd_add(r, a, b); });
+
+    fuzz_all_triples(
+        [](auto a, auto b, auto* r) { return __builtin_sub_overflow(a, b, r); },
+        [](auto* r, auto a, auto b) { return ckd_sub(r, a, b); });
+
+    fuzz_all_triples(
+        [](auto a, auto b, auto* r) { return ref_std_mul_overflow(a, b, r); },
+        [](auto* r, auto a, auto b) { return ckd_mul(r, a, b); });
+}
+
+#else
+
+void test_standard_oracle() {}
+
+#endif
+
+//
+// Oracle-based testing at the full 128-bit width using the native compiler
+// type. Addition and subtraction again use the builtins; multiplication uses
+// ref_native_mul_overflow, which assembles the 256-bit product from 64-bit limb
+// products so that no 128-bit multiply (hence no __muloti4) is emitted. This is
+// the only place products genuinely exceed 128 bits, exercising the width check.
+//
+#if defined(__SIZEOF_INT128__) && (defined(__GNUC__) || defined(__clang__))
+
+static uint128_t lib_u(const unsigned __int128 v)
+{
+    return uint128_t{static_cast<std::uint64_t>(v >> 64), static_cast<std::uint64_t>(v)};
+}
+
+static int128_t lib_s(const __int128 v)
+{
+    return static_cast<int128_t>(lib_u(static_cast<unsigned __int128>(v)));
+}
+
+static unsigned __int128 rand_native()
+{
+    return (static_cast<unsigned __int128>(dist(rng)) << 64) | static_cast<unsigned __int128>(dist(rng));
+}
+
+// The extended integer types are not guaranteed entries in std::is_signed under
+// a strict -std flag, so their signedness is stated explicitly.
+template <>
+struct oracle_is_signed<__int128> : std::true_type {};
+
+template <>
+struct oracle_is_signed<unsigned __int128> : std::false_type {};
+
+template <typename T, std::enable_if_t<oracle_is_signed<T>::value, int> = 0>
+unsigned __int128 native_magnitude(const T value, bool& negative) noexcept
+{
+    negative = value < 0;
+    const unsigned __int128 image {static_cast<unsigned __int128>(value)};
+    return negative ? (static_cast<unsigned __int128>(0) - image) : image;
+}
+
+template <typename T, std::enable_if_t<!oracle_is_signed<T>::value, int> = 0>
+unsigned __int128 native_magnitude(const T value, bool& negative) noexcept
+{
+    negative = false;
+    return static_cast<unsigned __int128>(value);
+}
+
+template <typename R, std::enable_if_t<!oracle_is_signed<R>::value, int> = 0>
+bool oracle_overflows_128(const unsigned __int128 magnitude, const bool negative) noexcept
+{
+    // A magnitude that fits in 128 bits fits an unsigned 128-bit target exactly;
+    // only a non-zero negative value is unrepresentable.
+    return negative && magnitude != 0U;
+}
+
+template <typename R, std::enable_if_t<oracle_is_signed<R>::value, int> = 0>
+bool oracle_overflows_128(const unsigned __int128 magnitude, const bool negative) noexcept
+{
+    const unsigned __int128 positive_max {(static_cast<unsigned __int128>(1) << 127) - 1};
+    const unsigned __int128 negative_max {static_cast<unsigned __int128>(1) << 127};
+    return negative ? (magnitude > negative_max) : (magnitude > positive_max);
+}
+
+// Independent reference for the C23 ckd_mul contract at the full 128-bit width.
+// The 256-bit product is assembled from 64-bit limb products so that no 128-bit
+// multiply (and therefore no __muloti4) is emitted; only native add, shift, and
+// compare on unsigned __int128 are used.
+template <typename A, typename B, typename R>
+bool ref_native_mul_overflow(const A a, const B b, R* r) noexcept
+{
+    bool a_negative {};
+    bool b_negative {};
+    const unsigned __int128 a_magnitude {native_magnitude(a, a_negative)};
+    const unsigned __int128 b_magnitude {native_magnitude(b, b_negative)};
+
+    const std::uint64_t a0 {static_cast<std::uint64_t>(a_magnitude)};
+    const std::uint64_t a1 {static_cast<std::uint64_t>(a_magnitude >> 64)};
+    const std::uint64_t b0 {static_cast<std::uint64_t>(b_magnitude)};
+    const std::uint64_t b1 {static_cast<std::uint64_t>(b_magnitude >> 64)};
+
+    std::uint64_t h00 {};
+    std::uint64_t l00 {};
+    std::uint64_t h01 {};
+    std::uint64_t l01 {};
+    std::uint64_t h10 {};
+    std::uint64_t l10 {};
+    std::uint64_t h11 {};
+    std::uint64_t l11 {};
+    mul_64_to_128(a0, b0, h00, l00);
+    mul_64_to_128(a0, b1, h01, l01);
+    mul_64_to_128(a1, b0, h10, l10);
+    mul_64_to_128(a1, b1, h11, l11);
+
+    const unsigned __int128 p00 {(static_cast<unsigned __int128>(h00) << 64) | l00};
+    const unsigned __int128 p01 {(static_cast<unsigned __int128>(h01) << 64) | l01};
+    const unsigned __int128 p10 {(static_cast<unsigned __int128>(h10) << 64) | l10};
+    const unsigned __int128 p11 {(static_cast<unsigned __int128>(h11) << 64) | l11};
+
+    // product = p11 * 2^128 + (p01 + p10) * 2^64 + p00, split into a low and a
+    // high 128-bit half with the carries tracked explicitly.
+    const unsigned __int128 cross {p01 + p10};
+    const bool cross_carry {cross < p01};
+    const unsigned __int128 low128 {p00 + (cross << 64)};
+    const bool low_carry {low128 < p00};
+    const unsigned __int128 high128 {p11 + (cross >> 64) +
+                                     (static_cast<unsigned __int128>(cross_carry) << 64) +
+                                     static_cast<unsigned __int128>(low_carry)};
+
+    const bool negative {a_negative != b_negative};
+    const unsigned __int128 wrapped {negative ? (static_cast<unsigned __int128>(0) - low128) : low128};
+    *r = static_cast<R>(wrapped);
+
+    if (high128 != 0U)
+    {
+        return true;
+    }
+
+    return oracle_overflows_128<R>(low128, negative);
+}
+
+template <typename Ref, typename Ckd>
+void native_fuzz(Ref ref_overflow, Ckd ckd_overflow)
+{
+    for (std::size_t i {0}; i < N; ++i)
+    {
+        const unsigned __int128 ua {rand_native()};
+        const unsigned __int128 ub {rand_native()};
+        const __int128 sa {static_cast<__int128>(ua)};
+        const __int128 sb {static_cast<__int128>(ub)};
+
+        // uint128_t target, unsigned operands
+        {
+            unsigned __int128 ref {};
+            const bool ref_of {ref_overflow(ua, ub, &ref)};
+            uint128_t got {};
+            const bool got_of {ckd_overflow(&got, lib_u(ua), lib_u(ub))};
+            BOOST_TEST_EQ(got_of, ref_of);
+            BOOST_TEST(got == lib_u(ref));
+        }
+
+        // int128_t target, signed operands
+        {
+            __int128 ref {};
+            const bool ref_of {ref_overflow(sa, sb, &ref)};
+            int128_t got {};
+            const bool got_of {ckd_overflow(&got, lib_s(sa), lib_s(sb))};
+            BOOST_TEST_EQ(got_of, ref_of);
+            BOOST_TEST(got == lib_s(ref));
+        }
+
+        // int128_t target, mixed-sign operands (unsigned + signed)
+        {
+            __int128 ref {};
+            const bool ref_of {ref_overflow(ua, sb, &ref)};
+            int128_t got {};
+            const bool got_of {ckd_overflow(&got, lib_u(ua), lib_s(sb))};
+            BOOST_TEST_EQ(got_of, ref_of);
+            BOOST_TEST(got == lib_s(ref));
+        }
+
+        // uint128_t target, mixed-sign operands (signed + unsigned)
+        {
+            unsigned __int128 ref {};
+            const bool ref_of {ref_overflow(sa, ub, &ref)};
+            uint128_t got {};
+            const bool got_of {ckd_overflow(&got, lib_s(sa), lib_u(ub))};
+            BOOST_TEST_EQ(got_of, ref_of);
+            BOOST_TEST(got == lib_u(ref));
+        }
+    }
+}
+
+void test_native_oracle()
+{
+    native_fuzz(
+        [](auto a, auto b, auto* r) { return __builtin_add_overflow(a, b, r); },
+        [](auto* r, auto a, auto b) { return ckd_add(r, a, b); });
+
+    native_fuzz(
+        [](auto a, auto b, auto* r) { return __builtin_sub_overflow(a, b, r); },
+        [](auto* r, auto a, auto b) { return ckd_sub(r, a, b); });
+
+    native_fuzz(
+        [](auto a, auto b, auto* r) { return ref_native_mul_overflow(a, b, r); },
+        [](auto* r, auto a, auto b) { return ckd_mul(r, a, b); });
+}
+
+#else
+
+void test_native_oracle() {}
+
+#endif
+
+//
+// Hand-verified edge cases that run on every platform, including those without
+// a native 128-bit type.
+//
+constexpr auto u_max {(std::numeric_limits<uint128_t>::max)()};
+constexpr auto i_max {(std::numeric_limits<int128_t>::max)()};
+constexpr auto i_min {(std::numeric_limits<int128_t>::min)()};
+
+void test_add_edges()
+{
+    uint128_t u {0};
+    BOOST_TEST_EQ(ckd_add(&u, u_max, uint128_t{1}), true);   // 2^128 wraps to 0
+    BOOST_TEST(u == uint128_t{0});
+    BOOST_TEST_EQ(ckd_add(&u, uint128_t{5}, int128_t{-3}), false);
+    BOOST_TEST(u == uint128_t{2});
+    BOOST_TEST_EQ(ckd_add(&u, uint128_t{3}, int128_t{-5}), true);   // -2 wraps
+    BOOST_TEST(u == u_max - uint128_t{1});
+    BOOST_TEST_EQ(ckd_add(&u, u_max, u_max), true);          // carry past 2^128
+
+    int128_t i {0};
+    BOOST_TEST_EQ(ckd_add(&i, i_max, int128_t{1}), true);    // INT128_MAX + 1 -> INT128_MIN
+    BOOST_TEST(i == i_min);
+    BOOST_TEST_EQ(ckd_add(&i, i_max, i_min), false);
+    BOOST_TEST(i == int128_t{-1});
+    BOOST_TEST_EQ(ckd_add(&i, u_max, uint128_t{0}), true);   // 2^128 - 1 unfit in signed
+    BOOST_TEST(i == int128_t{-1});
+}
+
+void test_sub_edges()
+{
+    uint128_t u {0};
+    BOOST_TEST_EQ(ckd_sub(&u, uint128_t{0}, uint128_t{1}), true);   // -1 wraps to 2^128 - 1
+    BOOST_TEST(u == u_max);
+    BOOST_TEST_EQ(ckd_sub(&u, uint128_t{5}, uint128_t{3}), false);
+    BOOST_TEST(u == uint128_t{2});
+    BOOST_TEST_EQ(ckd_sub(&u, u_max, int128_t{-1}), true);          // 2^128 wraps to 0
+    BOOST_TEST(u == uint128_t{0});
+
+    int128_t i {0};
+    BOOST_TEST_EQ(ckd_sub(&i, i_min, int128_t{1}), true);           // INT128_MIN - 1 -> INT128_MAX
+    BOOST_TEST(i == i_max);
+    BOOST_TEST_EQ(ckd_sub(&i, i_max, int128_t{-1}), true);          // -> INT128_MIN
+    BOOST_TEST(i == i_min);
+    BOOST_TEST_EQ(ckd_sub(&i, i_max, i_max), false);
+    BOOST_TEST(i == int128_t{0});
+    BOOST_TEST_EQ(ckd_sub(&i, i_min, i_min), false);
+    BOOST_TEST(i == int128_t{0});
+
+    // Narrow targets.
+    std::int32_t r32 {0};
+    BOOST_TEST_EQ(ckd_sub(&r32, int128_t{1000}, int128_t{2000}), false);
+    BOOST_TEST_EQ(r32, -1000);
+
+    std::uint8_t r8 {0};
+    BOOST_TEST_EQ(ckd_sub(&r8, uint128_t{0}, uint128_t{1}), true);
+    BOOST_TEST_EQ(static_cast<int>(r8), 255);
+}
+
+void test_mul_edges()
+{
+    int128_t i {0};
+    BOOST_TEST_EQ(ckd_mul(&i, i_min, int128_t{-1}), true);          // 2^127 wraps to INT128_MIN
+    BOOST_TEST(i == i_min);
+    BOOST_TEST_EQ(ckd_mul(&i, i_max, int128_t{2}), true);           // 2^128 - 2 -> -2
+    BOOST_TEST(i == int128_t{-2});
+    BOOST_TEST_EQ(ckd_mul(&i, i_min, int128_t{1}), false);
+    BOOST_TEST(i == i_min);
+    BOOST_TEST_EQ(ckd_mul(&i, int128_t{-3}, int128_t{4}), false);
+    BOOST_TEST(i == int128_t{-12});
+    BOOST_TEST_EQ(ckd_mul(&i, int128_t{0}, int128_t{-7}), false);
+    BOOST_TEST(i == int128_t{0});
+
+    uint128_t u {0};
+    BOOST_TEST_EQ(ckd_mul(&u, u_max, uint128_t{2}), true);          // 2^129 - 2 wraps
+    BOOST_TEST(u == u_max - uint128_t{1});
+    const uint128_t two_64 {1U, 0U};                                // 2^64
+    BOOST_TEST_EQ(ckd_mul(&u, two_64, two_64), true);               // 2^128 wraps to 0
+    BOOST_TEST(u == uint128_t{0});
+    BOOST_TEST_EQ(ckd_mul(&u, uint128_t{0}, u_max), false);
+    BOOST_TEST(u == uint128_t{0});
+    BOOST_TEST_EQ(ckd_mul(&u, uint128_t{6}, uint128_t{7}), false);
+    BOOST_TEST(u == uint128_t{42});
+    BOOST_TEST_EQ(ckd_mul(&u, u_max, int128_t{-1}), true);          // negative result in unsigned
+    BOOST_TEST(u == uint128_t{1});
+
+    // Narrow targets.
+    std::int32_t r32 {0};
+    BOOST_TEST_EQ(ckd_mul(&r32, int128_t{1000}, int128_t{1000}), false);
+    BOOST_TEST_EQ(r32, 1000000);
+
+    std::uint8_t r8 {0};
+    BOOST_TEST_EQ(ckd_mul(&r8, uint128_t{20}, uint128_t{20}), true);    // 400 wraps mod 256
+    BOOST_TEST_EQ(static_cast<int>(r8), 144);
+
+    std::int8_t r8s {0};
+    BOOST_TEST_EQ(ckd_mul(&r8s, int128_t{-5}, int128_t{20}), false);
+    BOOST_TEST_EQ(static_cast<int>(r8s), -100);
+}
+
+//
+// constexpr usability for all three operations.
+//
+
+#if defined(__GNUC__) && __GNUC__ == 7 && !defined(__clang__) && !defined(__SIZEOF_INT128__)
+#  define BOOST_INT128_TEST_CKD_NO_CONSTEXPR_128
+#endif
+
+constexpr bool add_overflows_int_max()
+{
+    int r {0};
+    return ckd_add(&r, (std::numeric_limits<int>::max)(), 1);
+}
+
+constexpr bool sub_overflows_int_min()
+{
+    int r {0};
+    return ckd_sub(&r, (std::numeric_limits<int>::min)(), 1);
+}
+
+constexpr bool mul_overflows_int_max()
+{
+    int r {0};
+    return ckd_mul(&r, (std::numeric_limits<int>::max)(), 2);
+}
+
+constexpr int sub_value()
+{
+    int r {0};
+    ckd_sub(&r, 5, 3);
+    return r;
+}
+
+constexpr int mul_value()
+{
+    int r {0};
+    ckd_mul(&r, 6, 7);
+    return r;
+}
+
+#ifndef BOOST_INT128_TEST_CKD_NO_CONSTEXPR_128
+constexpr bool mul_overflows_i128_min()
+{
+    int128_t r {0};
+    return ckd_mul(&r, (std::numeric_limits<int128_t>::min)(), int128_t{-1});
+}
+#endif
+
+void test_constexpr()
+{
+    static_assert(add_overflows_int_max(),  "INT_MAX + 1 overflows int");
+    static_assert(sub_overflows_int_min(),  "INT_MIN - 1 overflows int");
+    static_assert(mul_overflows_int_max(),  "INT_MAX * 2 overflows int");
+    static_assert(sub_value() == 2,         "5 - 3 == 2");
+    static_assert(mul_value() == 42,        "6 * 7 == 42");
+#ifndef BOOST_INT128_TEST_CKD_NO_CONSTEXPR_128
+    static_assert(mul_overflows_i128_min(), "INT128_MIN * -1 overflows int128_t");
+#endif
+}
+
+int main()
+{
+    test_standard_oracle();
+    test_native_oracle();
+    test_add_edges();
+    test_sub_edges();
+    test_mul_edges();
+    test_constexpr();
+
+    return boost::report_errors();
+}