@@ -770,18 +770,13 @@ library FixedPointMathLib {
770770 function sqrt (uint256 x ) internal pure returns (uint256 z ) {
771771 /// @solidity memory-safe-assembly
772772 assembly {
773- // Step 1: Get the bit position of the most significant bit
774- // n = floor(log2(x))
775- // For x ≈ 2^n, we know sqrt(x) ≈ 2^(n/2)
776- // We use (n+1)/2 instead of n/2 to round up slightly
777- // This gives a better initial approximation
778- //
779- // Formula: z = 2^((n+1)/2) = 2^(floor((n+1)/2))
780- // Implemented as: z = 1 << ((n+1) >> 1)
773+ // Initial guess z = 2^⌊(n+1)/2⌋ where n = ⌊log₂(x)⌋. The
774+ // alternating-endpoint seed gives ε₁ = 0.0607 after one Babylonian
775+ // step for all inputs. With ε_{n+1} ≈ ε²/2, 6 steps yield 2⁻¹⁶¹
776+ // relative error (>128 correct bits).
781777 z := shl (shr (1 , sub (256 , clz (x))), 1 )
782778
783- /// (x/z + z) / 2
784- z := shr (1 , add (z, div (x, z)))
779+ // 6 Babylonian steps; z = (x/z + z) / 2
785780 z := shr (1 , add (z, div (x, z)))
786781 z := shr (1 , add (z, div (x, z)))
787782 z := shr (1 , add (z, div (x, z)))
@@ -799,23 +794,25 @@ library FixedPointMathLib {
799794 /// @dev Returns the cube root of `x`, rounded down.
800795 /// Credit to bout3fiddy and pcaversaccio under AGPLv3 license:
801796 /// https://github.com/pcaversaccio/snekmate/blob/main/src/snekmate/utils/math.vy
802- /// Formally verified by xuwinnie:
803- /// https://github.com/vectorized/solady/blob/main/audits/xuwinnie-solady-cbrt-proof.pdf
804797 function cbrt (uint256 x ) internal pure returns (uint256 z ) {
805798 /// @solidity memory-safe-assembly
806799 assembly {
807- // Initial guess z = 2^ceil((log2(x) + 2) / 3).
808- // Since log2(x) = 255 - clz(x), the expression shl((257 - clz(x)) / 3, 1)
809- // computes this over-estimate. Guaranteed ≥ cbrt(x) and safe for Newton-Raphson's.
810- z := shl (div (sub (257 , clz (x)), 3 ), 1 )
811- // Newton-Raphson's.
812- z := div (add (add (div (x, mul (z, z)), z), z), 3 )
800+ // Initial guess z ≈ ∛(3/4) · 2𐞥 where q = ⌊(257 − clz(x)) / 3⌋. The
801+ // multiplier 233/256 ≈ 0.910 ≈ ∛(3/4) balances the worst-case
802+ // over/underestimate across each octave triplet (ε_over ≈ 0.445,
803+ // ε_under ≈ −0.278), giving >85 bits of precision after 6 N-R
804+ // iterations. The `lt(0, x)` term ensures z ≥ 1 when x > 0 (the
805+ // `shr` can produce 0 for small `q`)
806+ z := add (shr (8 , shl (div (sub (257 , clz (x)), 3 ), 233 )), lt (0 , x))
807+
808+ // 6 Newton-Raphson iterations.
813809 z := div (add (add (div (x, mul (z, z)), z), z), 3 )
814810 z := div (add (add (div (x, mul (z, z)), z), z), 3 )
815811 z := div (add (add (div (x, mul (z, z)), z), z), 3 )
816812 z := div (add (add (div (x, mul (z, z)), z), z), 3 )
817813 z := div (add (add (div (x, mul (z, z)), z), z), 3 )
818814 z := div (add (add (div (x, mul (z, z)), z), z), 3 )
815+
819816 // Round down.
820817 z := sub (z, lt (div (x, mul (z, z)), z))
821818 }
0 commit comments