Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
210 changes: 108 additions & 102 deletions lib/elixir/lib/float.ex
Original file line number Diff line number Diff line change
Expand Up @@ -344,15 +344,14 @@ defmodule Float do

"""
@spec round(float, precision_range) :: float
# This implementation is slow since it relies on big integers.
# Faster implementations are available on more recent papers
# and could be implemented in the future.
def round(float, precision \\ 0)

def round(float, 0) when float == 0.0, do: float

def round(float, 0) when is_float(float) do
case float |> :erlang.round() |> :erlang.float() do
rounded = :erlang.round(float) * 1.0

case rounded do
Comment thread
josevalim marked this conversation as resolved.
Outdated
zero when zero == 0.0 and float < 0.0 -> -0.0
rounded -> rounded
end
Expand All @@ -366,133 +365,140 @@ defmodule Float do
raise ArgumentError, invalid_precision_message(precision)
end

# Float rounding via Russ Cox's "unrounded scaling" algorithm specialised for
# fixed-precision rounding. Reference: https://research.swtch.com/fp
#
# 1. Decompose float = (-1)^sign * mantissa * 2^binary_exp
# (mantissa has 53 bits incl. implicit leading 1).
# 2. Bounded multiply `product = mantissa * 10^precision` (≤103 bits, exact).
# 3. Round `product / 2^-binary_exp` to an integer per the requested mode.
# 4. Emit float closest to `rounded_int / 10^precision`:
# - fast path: when rounded_int < 2^53, IEEE float division is correctly rounded.
# - slow path: bignum scaling + manual mantissa extraction.
defp round(num, _precision, _rounding) when is_float(num) and num == 0.0, do: num

defp round(float, precision, rounding) do
<<sign::1, exp::11, significant::52-bitstring>> = <<float::float>>
{num, count} = decompose(significant, 1)
count = count - exp + 1023
defp round(float, precision, mode) do
<<sign::1, exp::11, mantissa::52>> = <<float::float>>

cond do
# Precision beyond 15 digits
count >= 104 ->
case rounding do
:ceil when sign === 0 -> 1 / power_of_10(precision)
:floor when sign === 1 -> -1 / power_of_10(precision)
:ceil when sign === 1 -> -0.0
:half_up when sign === 1 -> -0.0
_ -> 0.0
end
# Subnormal — tiny but non-zero; treat per-mode (ceil(+) and floor(-) bump
# to 10^-precision; everything else rounds to signed zero).
exp == 0 ->
tiny_round(sign, precision, mode)

# We are asking more precision than we have
count <= precision ->
# |float| >= 2^52 — already integer-valued at any precision >= 1.
Comment thread
PJUllrich marked this conversation as resolved.
Outdated
exp - 1075 >= 0 ->
float

true ->
# Difference in precision between float and asked precision
# We subtract 1 because we need to calculate the remainder too
diff = count - precision - 1

# Get up to latest so we calculate the remainder
power_of_10 = power_of_10(diff)

# Convert the numerand to decimal base
num = num * power_of_5(count)

# Move to the given precision - 1
num = div(num, power_of_10)
div = div(num, 10)
num = rounding(rounding, sign, num, div)

# Convert back to float without loss
# https://www.exploringbinary.com/correct-decimal-to-floating-point-using-big-integers/
den = power_of_10(precision)
boundary = den <<< 52

cond do
num == 0 and sign == 1 ->
-0.0

num == 0 ->
0.0

num >= boundary ->
{den, exp} = scale_down(num, boundary, 52)
decimal_to_float(sign, num, den, exp)

true ->
{num, exp} = scale_up(num, boundary, 52)
decimal_to_float(sign, num, den, exp)
end
mantissa = @power_of_2_to_52 ||| mantissa
shift = 1075 - exp
do_round(sign, mantissa, shift, precision, mode)
end
end

defp decompose(significant, initial) do
decompose(significant, 1, 0, initial)
# |float * 10^precision| < 0.5 — integer round is 0; ceil/floor still bump per sign.
defp do_round(sign, _mantissa, shift, precision, mode) when shift >= 104 do
tiny_round(sign, precision, mode)
end

defp decompose(<<1::1, bits::bitstring>>, count, last_count, acc) do
decompose(bits, count + 1, count, (acc <<< (count - last_count)) + 1)
end
defp do_round(sign, mantissa, shift, precision, mode) do
power = power_of_10(precision)
product = mantissa * power
half = 1 <<< (shift - 1)
quotient = product >>> shift
remainder = product - (quotient <<< shift)
rounded_int = round_step(mode, sign, quotient, remainder, half)

defp decompose(<<0::1, bits::bitstring>>, count, last_count, acc) do
decompose(bits, count + 1, last_count, acc)
end

defp decompose(<<>>, _count, last_count, acc) do
{acc, last_count}
end

defp scale_up(num, boundary, exp) when num >= boundary, do: {num, exp}
defp scale_up(num, boundary, exp), do: scale_up(num <<< 1, boundary, exp - 1)
cond do
rounded_int == 0 ->
signed_zero(sign)

defp scale_down(num, den, exp) do
new_den = den <<< 1
rounded_int < @power_of_2_to_52 <<< 1 ->
# Both rounded_int and power fit in 53 bits, so IEEE float division
# is correctly rounded.
result = rounded_int / power
if sign == 1, do: -result, else: result

if num < new_den do
{den >>> 52, exp}
else
scale_down(num, new_den, exp + 1)
true ->
bignum_to_float(sign, rounded_int, power)
end
end

defp decimal_to_float(sign, num, den, exp) do
quo = div(num, den)
rem = num - quo * den
defp round_step(:half_up, _sign, quotient, remainder, half) do
if remainder >= half, do: quotient + 1, else: quotient
end

tmp =
case den >>> 1 do
den when rem > den -> quo + 1
den when rem < den -> quo
_ when (quo &&& 1) === 1 -> quo + 1
_ -> quo
defp round_step(:floor, 0, quotient, _remainder, _half), do: quotient
defp round_step(:floor, 1, quotient, remainder, _half) when remainder > 0, do: quotient + 1
defp round_step(:floor, 1, quotient, _remainder, _half), do: quotient

defp round_step(:ceil, 0, quotient, remainder, _half) when remainder > 0, do: quotient + 1
defp round_step(:ceil, 0, quotient, _remainder, _half), do: quotient
defp round_step(:ceil, 1, quotient, _remainder, _half), do: quotient

defp signed_zero(0), do: 0.0
defp signed_zero(1), do: -0.0

# Result of rounding a non-zero float whose |float * 10^precision| < 0.5.
# ceil(+) → +10^-precision, floor(-) → -10^-precision, others → signed 0.
defp tiny_round(0, precision, :ceil), do: 1.0 / power_of_10(precision)
defp tiny_round(1, precision, :floor), do: -1.0 / power_of_10(precision)
defp tiny_round(sign, _precision, _mode), do: signed_zero(sign)

# Slow path: emit float closest to `sign * rounded_int / power` when
# rounded_int >= 2^53. The binary emission step is always IEEE
# round-to-nearest-even, regardless of the integer-rounding mode.
defp bignum_to_float(sign, rounded_int, power) do
shift_adjust = bit_length(rounded_int) - bit_length(power) - 53
{numerator, denominator, exp} = align(rounded_int, power, shift_adjust)

quotient = div(numerator, denominator)
remainder = numerator - quotient * denominator
half = denominator >>> 1

mantissa =
cond do
remainder > half -> quotient + 1
remainder < half -> quotient
(quotient &&& 1) === 1 -> quotient + 1
true -> quotient
end

tmp = tmp - @power_of_2_to_52
<<tmp::float>> = <<sign::1, exp + 1023::11, tmp::52>>
tmp
<<result::float>> = <<sign::1, exp + 1023::11, mantissa - @power_of_2_to_52::52>>
result
end

defp rounding(:floor, 1, _num, div), do: div + 1
defp rounding(:ceil, 0, _num, div), do: div + 1
# Pick (numerator, denominator, exp) so that numerator/denominator ∈ [2^52, 2^53)
# and the resulting float = numerator/denominator * 2^(exp-52).
defp align(rounded_int, power, shift_adjust) when shift_adjust >= 0 do
new_power = power <<< shift_adjust

defp rounding(:half_up, _sign, num, div) do
case rem(num, 10) do
rem when rem < 5 -> div
rem when rem >= 5 -> div + 1
end
if rounded_int < new_power <<< 53,
do: {rounded_int, new_power, 52 + shift_adjust},
else: {rounded_int, new_power <<< 1, 53 + shift_adjust}
end

defp rounding(_, _, _, div), do: div
defp align(rounded_int, power, shift_adjust) do
shifted = rounded_int <<< -shift_adjust
boundary = power <<< 52

Enum.reduce(0..104, 1, fn x, acc ->
defp power_of_10(unquote(x)), do: unquote(acc)
acc * 10
end)
if shifted >= boundary,
do: {shifted, power, 52 + shift_adjust},
else: {shifted <<< 1, power, 51 + shift_adjust}
end

defp bit_length(0), do: 0
defp bit_length(integer) when integer > 0, do: bit_length(integer, 0)
defp bit_length(integer, acc) when integer >= 1 <<< 64, do: bit_length(integer >>> 64, acc + 64)
defp bit_length(integer, acc) when integer >= 1 <<< 16, do: bit_length(integer >>> 16, acc + 16)
defp bit_length(integer, acc) when integer >= 1 <<< 4, do: bit_length(integer >>> 4, acc + 4)
defp bit_length(integer, acc) when integer >= 1, do: bit_length(integer >>> 1, acc + 1)
defp bit_length(_integer, acc), do: acc

Enum.reduce(0..104, 1, fn x, acc ->
defp power_of_5(unquote(x)), do: unquote(acc)
acc * 5
Enum.reduce(0..15, 1, fn exponent, acc ->
defp power_of_10(unquote(exponent)), do: unquote(acc)
acc * 10
end)

@doc """
Expand Down
84 changes: 84 additions & 0 deletions lib/elixir/test/elixir/float_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,36 @@ defmodule FloatTest do
assert Float.floor(5.0e-324, precision) === 0.0
end
end

test "with already-exact floats does not bump" do
# The integer rounding step must not add 1 when the truncated remainder
# is exactly zero (e.g. -1.5 has no content below the 1st decimal place).
assert Float.floor(-1.5, 1) === -1.5
assert Float.floor(-1.875, 3) === -1.875
assert Float.floor(-3.0, 5) === -3.0
end

test "with extremely small floats" do
# `tiny_round`: ceil(+) and floor(-) must bump to ±10^-precision.
assert Float.floor(-1.0e-200, 5) === -1.0e-5
assert Float.floor(-1.0e-200, 15) === -1.0e-15
assert Float.floor(1.0e-200, 5) === 0.0
end

test "with already-integer floats" do
# |f| >= 2^52 — fast path returns the float unchanged.
assert Float.floor(:math.pow(2, 52), 5) === 4_503_599_627_370_496.0
assert Float.floor(1.0e20, 3) === 1.0e20
assert Float.floor(-1.0e20, 3) === -1.0e20
end

test "with very large floats hits the bignum slow path" do
# n = round(|f| * 10^p) >= 2^53 — exercises bignum_to_float / align.
# The slow path must round-trip representable floats back to themselves.
assert Float.floor(1.234e15, 3) === 1.234e15
assert Float.floor(1.234567e11, 5) === 1.234567e11
assert Float.floor(-1.234567e11, 5) === -1.234567e11
end
end

test "ceil/1" do
Expand Down Expand Up @@ -163,6 +193,30 @@ defmodule FloatTest do
assert Float.ceil(-5.0e-324, precision) === -0.0
end
end

test "with already-exact floats does not bump" do
assert Float.ceil(1.5, 1) === 1.5
assert Float.ceil(1.875, 3) === 1.875
assert Float.ceil(3.0, 5) === 3.0
end

test "with extremely small floats" do
assert Float.ceil(1.0e-200, 5) === 1.0e-5
assert Float.ceil(1.0e-200, 15) === 1.0e-15
assert Float.ceil(-1.0e-200, 5) === -0.0
end

test "with already-integer floats" do
assert Float.ceil(:math.pow(2, 52), 5) === 4_503_599_627_370_496.0
assert Float.ceil(1.0e20, 3) === 1.0e20
assert Float.ceil(-1.0e20, 3) === -1.0e20
end

test "with very large floats hits the bignum slow path" do
assert Float.ceil(1.234e15, 3) === 1.234e15
assert Float.ceil(1.234567e11, 5) === 1.234567e11
assert Float.ceil(-1.234567e11, 5) === -1.234567e11
end
end

describe "round/2" do
Expand Down Expand Up @@ -203,6 +257,36 @@ defmodule FloatTest do
assert Float.round(-5.0e-324, precision) === -0.0
end
end

test "rounds up across a digit boundary" do
# Rounding pushes the integer answer to 10^precision; the float-emission
# step must produce the next-magnitude value cleanly.
assert Float.round(0.9995, 3) === 1.0
assert Float.round(0.9999, 3) === 1.0
assert Float.round(99.9999, 3) === 100.0
assert Float.round(-99.9999, 3) === -100.0
end

test "with already-integer floats" do
assert Float.round(:math.pow(2, 52), 5) === 4_503_599_627_370_496.0
assert Float.round(1.0e20, 3) === 1.0e20
assert Float.round(-1.0e20, 3) === -1.0e20
end

test "with very large floats hits the bignum slow path" do
assert Float.round(1.234e15, 3) === 1.234e15
assert Float.round(1.234567e11, 5) === 1.234567e11
assert Float.round(-1.234567e11, 5) === -1.234567e11
end

test "preserves documented tie behavior" do
# The actual binary representation of 5.5675 is 5.567499999..., so
# half-up rounding correctly gives 5.567 (not 5.568).
assert Float.round(5.5675, 3) === 5.567
assert Float.round(-5.5675, 3) === -5.567
assert Float.round(12.5, 0) === 13.0
assert Float.round(-12.5, 0) === -13.0
end
end

describe "ratio/1" do
Expand Down
Loading