Merge pull request #1328 from boostorg/rework_sqrt_cbrt

ckormanyos · web-flow · commit e72fd34cea24 · 2026-02-03T10:51:04.000+01:00
Rework sqrt cbrt
diff --git a/include/boost/decimal/detail/cmath/cbrt.hpp b/include/boost/decimal/detail/cmath/cbrt.hpp
@@ -1,5 +1,5 @@
-// Copyright 2023 - 2024 Matt Borland
-// Copyright 2023 - 2024 Christopher Kormanyos
+// Copyright 2023 - 2026 Matt Borland
+// Copyright 2023 - 2026 Christopher Kormanyos
 // Distributed under the Boost Software License, Version 1.0.
 // https://www.boost.org/LICENSE_1_0.txt
 
@@ -103,42 +103,47 @@ constexpr auto cbrt_impl(const T x) noexcept
         }
         else
         {
-            // Scale the argument to the interval 1/10 <= x < 1.
+            // Scale the argument to the interval 1/64 < x <= 1/8.
             T gx { gn, -std::numeric_limits<T>::digits10 };
 
+            const bool is_scaled_by_eight { (gx > T { 125, -3 }) };
+
+            if(is_scaled_by_eight)
+            {
+                gx /= 8;
+            }
+
             exp10val += std::numeric_limits<T>::digits10;
 
             // For this work we perform an order-2 Pade approximation of the cube-root
-            // at argument x = 1/2. This results in slightly more than 2 decimal digits
-            // of accuracy over the interval 1/10 <= x < 1.
+            // at argument x = 1/16. This results in slightly more than 4 decimal digits
+            // of accuracy over the interval 1/64 < x <= 1/8.
 
-            // PadeApproximant[x^(1/3), {x, 1/2, {2, 2}}]
+            // PadeApproximant[x^(1/3), {x, 1/16, {2, 2}}]
             // FullSimplify[%]
 
             // HornerForm[Numerator[Out[2]]]
             // Results in:
-            //   5 + x (70 + 56 x)
+            //   5 + x (560 + 3584 x)
 
             // HornerForm[Denominator[Out[2]]]
             // Results in:
-            //   2^(1/3) (14 + x (70 + 20 x))
+            //   2^(1/3) (28 + x (1120 + 2560 x))
 
-            constexpr T five     {  5 };
-            constexpr T fourteen { 14 };
-            constexpr T seventy  {  7, 1 };
+            constexpr T five { 5 };
 
             result =
-                  (five + gx * (seventy + gx * 56))
-                / (numbers::cbrt2_v<T> * (fourteen + gx * (seventy + gx * 20)));
+                  (five + gx * (560 + 3584 * gx))
+                / (numbers::cbrt2_v<T> * (28 + gx * (1120 + 2560 * gx)));
 
-            // Perform 3, 4 or 5 Newton-Raphson iterations depending on precision.
-            // Note from above, we start with slightly more than 2 decimal digits
+            // Perform 2, 3 or 4 Newton-Raphson iterations depending on precision.
+            // Note from above, we start with slightly more than 4 decimal digits
             // of accuracy.
 
             constexpr int iter_loops
             {
-                  std::numeric_limits<T>::digits10 < 10 ? 3
-                : std::numeric_limits<T>::digits10 < 20 ? 4 : 5
+                  std::numeric_limits<T>::digits10 < 10 ? 2
+                : std::numeric_limits<T>::digits10 < 20 ? 3 : 4
             };
 
             for (int idx = 0; idx < iter_loops; ++idx)
@@ -172,6 +177,11 @@ constexpr auto cbrt_impl(const T x) noexcept
                         break;
                 }
             }
+
+            if(is_scaled_by_eight)
+            {
+                result *= 2;
+            }
         }
     }
 
@@ -181,7 +191,7 @@ constexpr auto cbrt_impl(const T x) noexcept
 } //namespace detail
 
 BOOST_DECIMAL_EXPORT template <typename T>
-constexpr auto cbrt(const T val) noexcept
+constexpr auto cbrt(const T val) noexcept // LCOV_EXCL_LINE
     BOOST_DECIMAL_REQUIRES(detail::is_decimal_floating_point_v, T)
 {
     using evaluation_type = detail::evaluation_type_t<T>;
diff --git a/include/boost/decimal/detail/cmath/sqrt.hpp b/include/boost/decimal/detail/cmath/sqrt.hpp
@@ -1,5 +1,5 @@
-// Copyright 2023 - 2024 Matt Borland
-// Copyright 2023 - 2024 Christopher Kormanyos
+// Copyright 2023 - 2026 Matt Borland
+// Copyright 2023 - 2026 Christopher Kormanyos
 // Distributed under the Boost Software License, Version 1.0.
 // https://www.boost.org/LICENSE_1_0.txt
 
@@ -94,43 +94,52 @@ constexpr auto sqrt_impl(const T x) noexcept
         }
         else
         {
-            // Scale the argument to the interval 1/10 <= x < 1.
+            // Scale the argument to the interval 1/16 < x <= 1/4.
             T gx { gn, -std::numeric_limits<T>::digits10 };
 
+            const bool is_scaled_by_four { (gx > T { 25, -2 }) };
+
+            if(is_scaled_by_four)
+            {
+                gx /= 4;
+            }
+
             exp10val += std::numeric_limits<T>::digits10;
 
             // For this work we perform an order-2 Pade approximation of the square root
-            // at argument x = 1/2. This results in slightly more than 2 decimal digits
-            // of accuracy over the interval 1/10 <= x < 1.
+            // at argument x = 1/8. This results in slightly more than 4 decimal digits
+            // of accuracy over the interval 1/16 < x <= 1/4.
 
             // See also WolframAlpha at:
             //   https://www.wolframalpha.com/input?i=FullSimplify%5BPadeApproximant%5BSqrt%5Bx%5D%2C+%7Bx%2C+1%2F2%2C+%7B2%2C+2%7D%7D%5D%5D
 
-            // PadeApproximant[Sqrt[x], {x, 1/2, {2, 2}}]
+            // PadeApproximant[Sqrt[x], {x, 1/8, {2, 2}}]
             // FullSimplify[%]
 
             // HornerForm[Numerator[Out[2]]]
             // Results in:
-            //   1 + x (20 + 20 x)
+            //   1 + x (80 + 320 x)
 
             // HornerForm[Denominator[Out[2]]]
             // Results in:
-            //   5 Sqrt[2] + x (20 Sqrt[2] + 4 Sqrt[2] x)
+            //   10 Sqrt[2] + x (160 Sqrt[2] + 128 Sqrt[2] x)
+            // which simplifies to
+            //   Sqrt[2] (10 + x (160 + 128 x))
 
-            constexpr T five { 5 };
+            constexpr T ten { 1, 1 };
 
             result =
-                  (one + gx * ((one + gx) * 20))
-                / (numbers::sqrt2_v<T> * ((gx * 4) * (five + gx) + five));
+                  (one + gx * (80 + 320 * gx))
+                / (numbers::sqrt2_v<T> * (ten + gx * (160 + 128 * gx)));
 
-            // Perform 3, 4 or 5 Newton-Raphson iterations depending on precision.
-            // Note from above, we start with slightly more than 2 decimal digits
+            // Perform 2, 3 or 4 Newton-Raphson iterations depending on precision.
+            // Note from above, we start with slightly more than 4 decimal digits
             // of accuracy.
 
             constexpr int iter_loops
             {
-                  std::numeric_limits<T>::digits10 < 10 ? 3
-                : std::numeric_limits<T>::digits10 < 20 ? 4 : 5
+                  std::numeric_limits<T>::digits10 < 10 ? 2
+                : std::numeric_limits<T>::digits10 < 20 ? 3 : 4
             };
 
             for (int idx = 0; idx < iter_loops; ++idx)
@@ -155,6 +164,11 @@ constexpr auto sqrt_impl(const T x) noexcept
                     result /= numbers::sqrt10_v<T>;
                 }
             }
+
+            if(is_scaled_by_four)
+            {
+                result *= 2;
+            }
         }
     }
 
diff --git a/test/test_cbrt.cpp b/test/test_cbrt.cpp
@@ -1,5 +1,5 @@
-// Copyright 2024 Matt Borland
-// Copyright 2024 Christopher Kormanyos
+// Copyright 2024 - 2026 Matt Borland
+// Copyright 2024 - 2026 Christopher Kormanyos
 // Distributed under the Boost Software License, Version 1.0.
 // https://www.boost.org/LICENSE_1_0.txt
 
@@ -50,22 +50,12 @@ namespace local
   {
     using std::fabs;
 
-    auto result_is_ok = bool { };
-
-    NumericType delta { };
-
-    if(b == static_cast<NumericType>(0))
-    {
-      delta = fabs(a - b); // LCOV_EXCL_LINE
-
-      result_is_ok = (delta < tol); // LCOV_EXCL_LINE
-    }
-    else
+    const NumericType delta
     {
-      delta = fabs(1 - (a / b));
+      (b == static_cast<NumericType>(0)) ? fabs(a - b) : fabs(1 - (a / b))
+    };
 
-      result_is_ok = (delta < tol);
-    }
+    const bool result_is_ok { (delta < tol) };
 
     // LCOV_EXCL_START
     if (!result_is_ok)
@@ -110,9 +100,9 @@ namespace local
     auto trials = static_cast<std::uint32_t>(UINT8_C(0));
 
     #if !defined(BOOST_DECIMAL_REDUCE_TEST_DEPTH)
-    constexpr auto count = (sizeof(decimal_type) == static_cast<std::size_t>(UINT8_C(4))) ? UINT32_C(0x400) : UINT32_C(0x40);
+    constexpr std::uint32_t count { ((std::numeric_limits<DecimalType>::digits10 < 10) ? UINT16_C(3200) : UINT16_C(1600)) };
     #else
-    constexpr auto count = (sizeof(decimal_type) == static_cast<std::size_t>(UINT8_C(4))) ? UINT32_C(0x40) : UINT32_C(0x4);
+    constexpr std::uint32_t count { ((std::numeric_limits<DecimalType>::digits10 < 10) ? UINT16_C(320)  : UINT16_C(160)) };
     #endif
 
     for( ; trials < count; ++trials)
@@ -341,9 +331,9 @@ int main()
     using decimal_type = boost::decimal::decimal32_t;
     using float_type   = float;
 
-    const auto result_small_is_ok  = local::test_cbrt<decimal_type, float_type>(INT32_C(16), 1.0E-26L, 1.0E-01L);
-    const auto result_medium_is_ok = local::test_cbrt<decimal_type, float_type>(INT32_C(16), 0.9E-01L, 1.1E+01L);
-    const auto result_large_is_ok  = local::test_cbrt<decimal_type, float_type>(INT32_C(16), 1.0E+01L, 1.0E+26L);
+    const auto result_small_is_ok  = local::test_cbrt<decimal_type, float_type>(16, 1.0E-26L, 1.0E-01L);
+    const auto result_medium_is_ok = local::test_cbrt<decimal_type, float_type>(16, 0.9E-02L, 1.1E+01L);
+    const auto result_large_is_ok  = local::test_cbrt<decimal_type, float_type>(16, 1.0E+01L, 1.0E+26L);
 
     BOOST_TEST(result_small_is_ok);
     BOOST_TEST(result_medium_is_ok);
@@ -361,12 +351,12 @@ int main()
   }
 
   {
-    using decimal_type = boost::decimal::decimal_fast32_t;
-    using float_type   = float;
+    using decimal_type = boost::decimal::decimal64_t;
+    using float_type   = double;
 
-    const auto result_small_is_ok  = local::test_cbrt<decimal_type, float_type>(INT32_C(16), 1.0E-26L, 1.0E-01L);
-    const auto result_medium_is_ok = local::test_cbrt<decimal_type, float_type>(INT32_C(16), 0.9E-01L, 1.1E+01L);
-    const auto result_large_is_ok  = local::test_cbrt<decimal_type, float_type>(INT32_C(16), 1.0E+01L, 1.0E+26L);
+    const auto result_small_is_ok  = local::test_cbrt<decimal_type, float_type>(16, 1.0E-26L, 1.0E-01L);
+    const auto result_medium_is_ok = local::test_cbrt<decimal_type, float_type>(16, 0.9E-01L, 1.1E+01L);
+    const auto result_large_is_ok  = local::test_cbrt<decimal_type, float_type>(16, 1.0E+01L, 1.0E+26L);
 
     BOOST_TEST(result_small_is_ok);
     BOOST_TEST(result_medium_is_ok);
@@ -384,7 +374,7 @@ int main()
   }
 
   {
-    using decimal_type = boost::decimal::decimal64_t;
+    using decimal_type = boost::decimal::decimal_fast64_t;
     using float_type   = double;
 
     const auto result_small_is_ok  = local::test_cbrt<decimal_type, float_type>(INT32_C(16), 1.0E-76L, 1.0E-01L);
@@ -407,14 +397,14 @@ int main()
   }
 
   {
-    const auto result_cbrt128_is_ok = local::test_cbrt_128(96);
+    const auto result_cbrt128_is_ok = local::test_cbrt_128(16);
 
     BOOST_TEST(result_cbrt128_is_ok);
 
     result_is_ok = (result_cbrt128_is_ok && result_is_ok);
   }
 
-  result_is_ok = ((boost::report_errors() == 0) && result_is_ok);
+  BOOST_TEST(result_is_ok);
 
-  return (result_is_ok ? 0 : -1);
+  return boost::report_errors();
 }
diff --git a/test/test_sqrt.cpp b/test/test_sqrt.cpp