Skip to content

Commit d9ad672

Browse files
committed
⚡️Golf sqrt and cbrt by getting a better initial estimate and removing an iteration
1 parent ff6256a commit d9ad672

1 file changed

Lines changed: 15 additions & 18 deletions

File tree

src/utils/clz/FixedPointMathLib.sol

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -770,18 +770,13 @@ library FixedPointMathLib {
770770
function sqrt(uint256 x) internal pure returns (uint256 z) {
771771
/// @solidity memory-safe-assembly
772772
assembly {
773-
// Step 1: Get the bit position of the most significant bit
774-
// n = floor(log2(x))
775-
// For x ≈ 2^n, we know sqrt(x) ≈ 2^(n/2)
776-
// We use (n+1)/2 instead of n/2 to round up slightly
777-
// This gives a better initial approximation
778-
//
779-
// Formula: z = 2^((n+1)/2) = 2^(floor((n+1)/2))
780-
// Implemented as: z = 1 << ((n+1) >> 1)
773+
// Initial guess z = 2^⌊(n+1)/2⌋ where n = ⌊log₂(x)⌋. The
774+
// alternating-endpoint seed gives ε₁ = 0.0607 after one Babylonian
775+
// step for all inputs. With ε_{n+1} ≈ ε²/2, 6 steps yield 2⁻¹⁶¹
776+
// relative error (>128 correct bits).
781777
z := shl(shr(1, sub(256, clz(x))), 1)
782778

783-
/// (x/z + z) / 2
784-
z := shr(1, add(z, div(x, z)))
779+
// 6 Babylonian steps; z = (x/z + z) / 2
785780
z := shr(1, add(z, div(x, z)))
786781
z := shr(1, add(z, div(x, z)))
787782
z := shr(1, add(z, div(x, z)))
@@ -799,23 +794,25 @@ library FixedPointMathLib {
799794
/// @dev Returns the cube root of `x`, rounded down.
800795
/// Credit to bout3fiddy and pcaversaccio under AGPLv3 license:
801796
/// https://github.com/pcaversaccio/snekmate/blob/main/src/snekmate/utils/math.vy
802-
/// Formally verified by xuwinnie:
803-
/// https://github.com/vectorized/solady/blob/main/audits/xuwinnie-solady-cbrt-proof.pdf
804797
function cbrt(uint256 x) internal pure returns (uint256 z) {
805798
/// @solidity memory-safe-assembly
806799
assembly {
807-
// Initial guess z = 2^ceil((log2(x) + 2) / 3).
808-
// Since log2(x) = 255 - clz(x), the expression shl((257 - clz(x)) / 3, 1)
809-
// computes this over-estimate. Guaranteed ≥ cbrt(x) and safe for Newton-Raphson's.
810-
z := shl(div(sub(257, clz(x)), 3), 1)
811-
// Newton-Raphson's.
812-
z := div(add(add(div(x, mul(z, z)), z), z), 3)
800+
// Initial guess z ≈ ∛(3/4) · 2𐞥 where q = ⌊(257 − clz(x)) / 3⌋. The
801+
// multiplier 233/256 ≈ 0.910 ≈ ∛(3/4) balances the worst-case
802+
// over/underestimate across each octave triplet (ε_over ≈ 0.445,
803+
// ε_under ≈ −0.278), giving >85 bits of precision after 6 N-R
804+
// iterations. The `lt(0, x)` term ensures z ≥ 1 when x > 0 (the
805+
// `shr` can produce 0 for small `q`)
806+
z := add(shr(8, shl(div(sub(257, clz(x)), 3), 233)), lt(0, x))
807+
808+
// 6 Newton-Raphson iterations.
813809
z := div(add(add(div(x, mul(z, z)), z), z), 3)
814810
z := div(add(add(div(x, mul(z, z)), z), z), 3)
815811
z := div(add(add(div(x, mul(z, z)), z), z), 3)
816812
z := div(add(add(div(x, mul(z, z)), z), z), 3)
817813
z := div(add(add(div(x, mul(z, z)), z), z), 3)
818814
z := div(add(add(div(x, mul(z, z)), z), z), 3)
815+
819816
// Round down.
820817
z := sub(z, lt(div(x, mul(z, z)), z))
821818
}

0 commit comments

Comments
 (0)