-
Notifications
You must be signed in to change notification settings - Fork 355
Expand file tree
/
Copy pathexactfloat.cc
More file actions
812 lines (720 loc) · 26.8 KB
/
exactfloat.cc
File metadata and controls
812 lines (720 loc) · 26.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
// Copyright 2009 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS-IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Author: ericv@google.com (Eric Veach)
#include "s2/util/math/exactfloat/exactfloat.h"
#include <algorithm>
#include <climits>
#include <cmath>
#include <cstdint>
#include <cstring>
#include <limits>
#include <string>
#include <openssl/bn.h>
#include <openssl/crypto.h> // for OPENSSL_free
#include "absl/log/absl_check.h"
#include "absl/log/absl_log.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_format.h"
#ifndef OPENSSL_IS_BORINGSSL
#include "absl/container/fixed_array.h" // IWYU pragma: keep
#include "absl/numeric/bits.h" // IWYU pragma: keep
#endif
namespace exactfloat {
using std::max;
// To simplify the overflow/underflow logic, we limit the exponent and
// precision range so that (2 * bn_exp_) does not overflow an "int". We take
// advantage of this, for example, by only checking for overflow/underflow
// *after* multiplying two numbers.
static_assert(ExactFloat::kMaxExp <= INT_MAX / 2 &&
ExactFloat::kMinExp - ExactFloat::kMaxPrec >= INT_MIN / 2,
"exactfloat exponent might overflow");
// We define a few simple extensions to the OpenSSL's BIGNUM interface.
// In some cases these depend on BIGNUM internal fields, so they might
// require tweaking if the BIGNUM implementation changes significantly.
// These are just thin wrappers for BoringSSL.
#ifdef OPENSSL_IS_BORINGSSL
inline static void BN_ext_set_uint64(BIGNUM* bn, uint64_t v) {
ABSL_CHECK(BN_set_u64(bn, v));
}
// Return the absolute value of a BIGNUM as a 64-bit unsigned integer.
// Requires that BIGNUM fits into 64 bits.
inline static uint64_t BN_ext_get_uint64(const BIGNUM* bn) {
uint64_t u64;
if (!BN_get_u64(bn, &u64)) {
ABSL_DLOG(FATAL) << "BN has " << BN_num_bits(bn) << " bits";
return 0;
}
return u64;
}
static int BN_ext_count_low_zero_bits(const BIGNUM* bn) {
return BN_count_low_zero_bits(bn);
}
#else // !defined(OPENSSL_IS_BORINGSSL)
// Set a BIGNUM to the given unsigned 64-bit value.
inline static void BN_ext_set_uint64(BIGNUM* bn, uint64_t v) {
#if BN_BITS2 == 64
ABSL_CHECK(BN_set_word(bn, v));
#else
static_assert(BN_BITS2 == 32, "at least 32 bit openssl build needed");
ABSL_CHECK(BN_set_word(bn, static_cast<uint32_t>(v >> 32)));
ABSL_CHECK(BN_lshift(bn, bn, 32));
ABSL_CHECK(BN_add_word(bn, static_cast<uint32_t>(v)));
#endif
}
// Return the absolute value of a BIGNUM as a 64-bit unsigned integer.
// Requires that BIGNUM fits into 64 bits.
inline static uint64_t BN_ext_get_uint64(const BIGNUM* bn) {
ABSL_DCHECK_LE(BN_num_bytes(bn), sizeof(uint64_t));
// Use `BN_get_word` if its return type is large enough.
if constexpr (sizeof(decltype(BN_get_word(bn))) >= sizeof(uint64_t)) {
return BN_get_word(bn);
} else {
uint64_t v;
static_assert(absl::endian::native == absl::endian::little ||
absl::endian::native == absl::endian::big,
"Unsupported endianness.");
if constexpr (absl::endian::native == absl::endian::little) {
ABSL_CHECK_EQ(
BN_bn2lebinpad(bn, reinterpret_cast<unsigned char*>(&v), sizeof(v)),
sizeof(v));
} else {
ABSL_CHECK_EQ(
BN_bn2binpad(bn, reinterpret_cast<unsigned char*>(&v), sizeof(v)),
sizeof(v));
}
return v;
}
}
static int BN_ext_count_low_zero_bits(const BIGNUM* bn) {
// In OpenSSL >= 1.1, BIGNUM is an opaque type, so d and top
// cannot be accessed. The bytes must be copied out at a ~25%
// performance penalty.
absl::FixedArray<unsigned char> bytes(BN_num_bytes(bn));
// "le" indicates little endian.
ABSL_CHECK_EQ(BN_bn2lebinpad(bn, bytes.data(), bytes.size()), bytes.size());
int count = 0;
for (unsigned char c : bytes) {
if (c == 0) {
count += 8;
} else {
count += absl::countr_zero(c);
break;
}
}
return count;
}
#endif // !defined(OPENSSL_IS_BORINGSSL)
ExactFloat::ExactFloat(double v) {
sign_ = std::signbit(v) ? -1 : 1;
if (std::isnan(v)) {
set_nan();
} else if (std::isinf(v)) {
set_inf(sign_);
} else {
// The following code is much simpler than messing about with bit masks,
// has the advantage of handling denormalized numbers and zero correctly,
// and is actually quite efficient (at least compared to the rest of this
// code). "f" is a fraction in the range [0.5, 1), so if we shift it left
// by the number of mantissa bits in a double (53, including the leading
// "1") then the result is always an integer.
int exp;
double f = std::frexp(std::fabs(v), &exp);
uint64_t m = static_cast<uint64_t>(std::ldexp(f, kDoubleMantissaBits));
BN_ext_set_uint64(bn_.get(), m);
bn_exp_ = exp - kDoubleMantissaBits;
Canonicalize();
}
}
ExactFloat::ExactFloat(int v) {
sign_ = (v >= 0) ? 1 : -1;
// Note that this works even for INT_MIN because the parameter type for
// BN_set_word() is unsigned.
ABSL_CHECK(BN_set_word(bn_.get(), std::abs(v)));
bn_exp_ = 0;
Canonicalize();
}
ExactFloat::ExactFloat(const ExactFloat& b)
: sign_(b.sign_), bn_exp_(b.bn_exp_) {
BN_copy(bn_.get(), b.bn_.get());
}
ExactFloat ExactFloat::SignedZero(int sign) {
ExactFloat r;
r.set_zero(sign);
return r;
}
ExactFloat ExactFloat::Infinity(int sign) {
ExactFloat r;
r.set_inf(sign);
return r;
}
ExactFloat ExactFloat::NaN() {
ExactFloat r;
r.set_nan();
return r;
}
int ExactFloat::prec() const { return BN_num_bits(bn_.get()); }
int ExactFloat::exp() const {
ABSL_DCHECK(isnormal(*this));
return bn_exp_ + BN_num_bits(bn_.get());
}
void ExactFloat::set_zero(int sign) {
sign_ = sign;
bn_exp_ = kExpZero;
if (!BN_is_zero(bn_.get())) BN_zero(bn_.get());
}
void ExactFloat::set_inf(int sign) {
sign_ = sign;
bn_exp_ = kExpInfinity;
if (!BN_is_zero(bn_.get())) BN_zero(bn_.get());
}
void ExactFloat::set_nan() {
sign_ = 1;
bn_exp_ = kExpNaN;
if (!BN_is_zero(bn_.get())) BN_zero(bn_.get());
}
int fpclassify(ExactFloat const& x) {
switch (x.bn_exp_) {
case ExactFloat::kExpNaN: return FP_NAN;
case ExactFloat::kExpInfinity: return FP_INFINITE;
case ExactFloat::kExpZero: return FP_ZERO;
// There are no subnormal `ExactFloat`s.
default: return FP_NORMAL;
}
}
ExactFloat::operator double() const {
// If the mantissa has too many bits, we need to round it.
if (prec() <= kDoubleMantissaBits) {
return ToDoubleHelper();
} else {
ExactFloat r = RoundToMaxPrec(kDoubleMantissaBits, kRoundTiesToEven);
return r.ToDoubleHelper();
}
}
double ExactFloat::ToDoubleHelper() const {
ABSL_DCHECK_LE(BN_num_bits(bn_.get()), kDoubleMantissaBits);
if (!isnormal(*this)) {
if (is_zero()) return std::copysign(0, sign_);
if (isinf(*this)) {
return std::copysign(std::numeric_limits<double>::infinity(), sign_);
}
return std::copysign(std::numeric_limits<double>::quiet_NaN(), sign_);
}
uint64_t d_mantissa = BN_ext_get_uint64(bn_.get());
// We rely on ldexp() to handle overflow and underflow. (It will return a
// signed zero or infinity if the result is too small or too large.)
return sign_ * std::ldexp(static_cast<double>(d_mantissa), bn_exp_);
}
ExactFloat ExactFloat::RoundToMaxPrec(int max_prec, RoundingMode mode) const {
// The "kRoundTiesToEven" mode requires at least 2 bits of precision
// (otherwise both adjacent representable values may be odd).
ABSL_DCHECK_GE(max_prec, 2);
ABSL_DCHECK_LE(max_prec, kMaxPrec);
// The following test also catches zero, infinity, and NaN.
int shift = prec() - max_prec;
if (shift <= 0) return *this;
// Round by removing the appropriate number of bits from the mantissa. Note
// that if the value is rounded up to a power of 2, the high-order bit
// position may increase, but in that case Canonicalize() will remove at
// least one zero bit and so the output will still have prec() <= max_prec.
return RoundToPowerOf2(bn_exp_ + shift, mode);
}
ExactFloat ExactFloat::RoundToPowerOf2(int bit_exp, RoundingMode mode) const {
ABSL_DCHECK_GE(bit_exp, kMinExp - kMaxPrec);
ABSL_DCHECK_LE(bit_exp, kMaxExp);
// If the exponent is already large enough, or the value is zero, infinity,
// or NaN, then there is nothing to do.
int shift = bit_exp - bn_exp_;
if (shift <= 0) return *this;
ABSL_DCHECK(isnormal(*this));
// Convert rounding up/down to toward/away from zero, so that we don't need
// to consider the sign of the number from this point onward.
if (mode == kRoundTowardPositive) {
mode = (sign_ > 0) ? kRoundAwayFromZero : kRoundTowardZero;
} else if (mode == kRoundTowardNegative) {
mode = (sign_ > 0) ? kRoundTowardZero : kRoundAwayFromZero;
}
// Rounding consists of right-shifting the mantissa by "shift", and then
// possibly incrementing the result (depending on the rounding mode, the
// bits that were discarded, and sometimes the lowest kept bit). The
// following code figures out whether we need to increment.
ExactFloat r;
bool increment = false;
if (mode == kRoundTowardZero) {
// Never increment.
} else if (mode == kRoundTiesAwayFromZero) {
// Increment if the highest discarded bit is 1.
if (BN_is_bit_set(bn_.get(), shift - 1)) increment = true;
} else if (mode == kRoundAwayFromZero) {
// Increment unless all discarded bits are zero.
if (BN_ext_count_low_zero_bits(bn_.get()) < shift) increment = true;
} else {
ABSL_DCHECK_EQ(mode, kRoundTiesToEven);
// Let "w/xyz" denote a mantissa where "w" is the lowest kept bit and
// "xyz" are the discarded bits. Then using regexp notation:
// ./0.* -> Don't increment (fraction < 1/2)
// 0/10* -> Don't increment (fraction = 1/2, kept part even)
// 1/10* -> Increment (fraction = 1/2, kept part odd)
// ./1.*1.* -> Increment (fraction > 1/2)
if (BN_is_bit_set(bn_.get(), shift - 1) &&
((BN_is_bit_set(bn_.get(), shift) ||
BN_ext_count_low_zero_bits(bn_.get()) < shift - 1))) {
increment = true;
}
}
r.bn_exp_ = bn_exp_ + shift;
ABSL_CHECK(BN_rshift(r.bn_.get(), bn_.get(), shift));
if (increment) {
ABSL_CHECK(BN_add_word(r.bn_.get(), 1));
}
r.sign_ = sign_;
r.Canonicalize();
return r;
}
int ExactFloat::NumSignificantDigitsForPrec(int prec) {
// The simplest bound is
//
// d <= 1 + ceil(prec * log10(2))
//
// The following bound is tighter by 0.5 digits on average, but requires
// the exponent to be known as well:
//
// d <= ceil(exp * log10(2)) - floor((exp - prec) * log10(2))
//
// Since either of these bounds can be too large by 0, 1, or 2 digits, we
// stick with the simpler first bound.
return static_cast<int>(1 + std::ceil(prec * (M_LN2 / M_LN10)));
}
// Numbers are always formatted with at least this many significant digits.
// This prevents small integers from being formatted in exponential notation
// (e.g. 1024 formatted as 1e+03), and also avoids the confusion of having
// supposedly "high precision" numbers formatted with just 1 or 2 digits
// (e.g. 1/512 == 0.001953125 formatted as 0.002).
static const int kMinSignificantDigits = 10;
std::string ExactFloat::ToString() const {
int max_digits =
max(kMinSignificantDigits, NumSignificantDigitsForPrec(prec()));
return ToStringWithMaxDigits(max_digits);
}
std::string ExactFloat::ToStringWithMaxDigits(int max_digits) const {
ABSL_DCHECK_GT(max_digits, 0);
if (!isnormal(*this)) {
if (isnan(*this)) return "nan";
if (is_zero()) return (sign_ < 0) ? "-0" : "0";
return (sign_ < 0) ? "-inf" : "inf";
}
std::string digits;
int exp10 = GetDecimalDigits(max_digits, &digits);
std::string str;
if (sign_ < 0) str.push_back('-');
// We use the standard '%g' formatting rules. If the exponent is less than
// -4 or greater than or equal to the requested precision (i.e., max_digits)
// then we use exponential notation.
//
// But since "exp10" is the base-10 exponent corresponding to a mantissa in
// the range [0.1, 1), whereas the '%g' rules assume a mantissa in the range
// [1.0, 10), we need to adjust these parameters by 1.
if (exp10 <= -4 || exp10 > max_digits) {
// Use exponential format.
str.push_back(digits[0]);
if (digits.size() > 1) {
str.push_back('.');
str.append(digits.begin() + 1, digits.end());
}
absl::StrAppendFormat(&str, "e%+02d", exp10 - 1);
} else {
// Use fixed format. We split this into two cases depending on whether
// the integer portion is non-zero or not.
if (exp10 > 0) {
if (static_cast<size_t>(exp10) >= digits.size()) {
absl::StrAppend(&str, digits);
for (int i = exp10 - digits.size(); i > 0; --i) {
str.push_back('0');
}
} else {
str.append(digits.begin(), digits.begin() + exp10);
str.push_back('.');
str.append(digits.begin() + exp10, digits.end());
}
} else {
absl::StrAppend(&str, "0.");
for (int i = exp10; i < 0; ++i) {
str.push_back('0');
}
absl::StrAppend(&str, digits);
}
}
return str;
}
// Increment an unsigned integer represented as a string of ASCII digits.
static void IncrementDecimalDigits(std::string* digits) {
std::string::iterator pos = digits->end();
while (--pos >= digits->begin()) {
if (*pos < '9') {
++*pos;
return;
}
*pos = '0';
}
digits->insert(0, "1");
}
int ExactFloat::GetDecimalDigits(int max_digits, std::string* digits) const {
ABSL_DCHECK(isnormal(*this));
// Convert the value to the form (bn * (10 ** bn_exp10)) where "bn" is a
// positive integer (BIGNUM).
BigNum bn;
int bn_exp10;
if (bn_exp_ >= 0) {
// The easy case: bn = bn_ * (2 ** bn_exp_)), bn_exp10 = 0.
ABSL_CHECK(BN_lshift(bn.get(), bn_.get(), bn_exp_));
bn_exp10 = 0;
} else {
// Set bn = bn_ * (5 ** -bn_exp_) and bn_exp10 = bn_exp_. This is
// equivalent to the original value of (bn_ * (2 ** bn_exp_)).
BigNum power;
ABSL_CHECK(BN_set_word(power.get(), -bn_exp_));
ABSL_CHECK(BN_set_word(bn.get(), 5));
BN_CTX* ctx = BN_CTX_new();
ABSL_CHECK(BN_exp(bn.get(), bn.get(), power.get(), ctx));
ABSL_CHECK(BN_mul(bn.get(), bn.get(), bn_.get(), ctx));
BN_CTX_free(ctx);
bn_exp10 = bn_exp_;
}
// Now convert "bn" to a decimal string.
char* all_digits = BN_bn2dec(bn.get());
ABSL_DCHECK(all_digits != nullptr);
// Check whether we have too many digits and round if necessary.
int num_digits = strlen(all_digits);
if (num_digits <= max_digits) {
*digits = all_digits;
} else {
digits->assign(all_digits, max_digits);
// Standard "printf" formatting rounds ties to an even number. This means
// that we round up (away from zero) if highest discarded digit is '5' or
// more, unless all other discarded digits are zero in which case we round
// up only if the lowest kept digit is odd.
if (all_digits[max_digits] >= '5' &&
((all_digits[max_digits - 1] & 1) == 1 ||
strpbrk(all_digits + max_digits + 1, "123456789") != nullptr)) {
// This can increase the number of digits by 1, but in that case at
// least one trailing zero will be stripped off below.
IncrementDecimalDigits(digits);
}
// Adjust the base-10 exponent to reflect the digits we have removed.
bn_exp10 += num_digits - max_digits;
}
OPENSSL_free(all_digits);
// Now strip any trailing zeros.
ABSL_DCHECK_NE((*digits)[0], '0');
std::string::size_type pos = digits->find_last_not_of('0') + 1;
bn_exp10 += digits->size() - pos;
digits->erase(pos);
ABSL_DCHECK_LE(digits->size(), max_digits);
// Finally, we adjust the base-10 exponent so that the mantissa is a
// fraction in the range [0.1, 1) rather than an integer.
return bn_exp10 + digits->size();
}
std::string ExactFloat::ToUniqueString() const {
return absl::StrFormat("%s<%d>", ToString(), prec());
}
ExactFloat& ExactFloat::operator=(const ExactFloat& b) {
if (this != &b) {
sign_ = b.sign_;
bn_exp_ = b.bn_exp_;
BN_copy(bn_.get(), b.bn_.get());
}
return *this;
}
ExactFloat ExactFloat::operator-() const { return CopyWithSign(-sign_); }
ExactFloat operator+(const ExactFloat& a, const ExactFloat& b) {
return ExactFloat::SignedSum(a.sign_, &a, b.sign_, &b);
}
ExactFloat operator-(const ExactFloat& a, const ExactFloat& b) {
return ExactFloat::SignedSum(a.sign_, &a, -b.sign_, &b);
}
ExactFloat ExactFloat::SignedSum(int a_sign, const ExactFloat* a, int b_sign,
const ExactFloat* b) {
if (!isnormal(*a) || !isnormal(*b)) {
// Handle zero, infinity, and NaN according to IEEE 754-2008.
if (isnan(*a)) return *a;
if (isnan(*b)) return *b;
if (isinf(*a)) {
// Adding two infinities with opposite sign yields NaN.
if (isinf(*b) && a_sign != b_sign) return NaN();
return Infinity(a_sign);
}
if (isinf(*b)) return Infinity(b_sign);
if (a->is_zero()) {
if (!b->is_zero()) return b->CopyWithSign(b_sign);
// Adding two zeros with the same sign preserves the sign.
if (a_sign == b_sign) return SignedZero(a_sign);
// Adding two zeros of opposite sign produces +0.
return SignedZero(+1);
}
ABSL_DCHECK(b->is_zero());
return a->CopyWithSign(a_sign);
}
// Swap the numbers if necessary so that "a" has the larger bn_exp_.
if (a->bn_exp_ < b->bn_exp_) {
using std::swap;
swap(a_sign, b_sign);
swap(a, b);
}
// Shift "a" if necessary so that both values have the same bn_exp_.
ExactFloat r;
if (a->bn_exp_ > b->bn_exp_) {
ABSL_CHECK(BN_lshift(r.bn_.get(), a->bn_.get(), a->bn_exp_ - b->bn_exp_));
a = &r; // The only field of "a" used below is bn_.
}
r.bn_exp_ = b->bn_exp_;
if (a_sign == b_sign) {
ABSL_CHECK(BN_add(r.bn_.get(), a->bn_.get(), b->bn_.get()));
r.sign_ = a_sign;
} else {
// Note that the BIGNUM documentation is out of date -- all methods now
// allow the result to be the same as any input argument, so it is okay if
// (a == &r) due to the shift above.
ABSL_CHECK(BN_sub(r.bn_.get(), a->bn_.get(), b->bn_.get()));
if (BN_is_zero(r.bn_.get())) {
r.sign_ = +1;
} else if (BN_is_negative(r.bn_.get())) {
// The magnitude of "b" was larger.
r.sign_ = b_sign;
BN_set_negative(r.bn_.get(), false);
} else {
// They were equal, or the magnitude of "a" was larger.
r.sign_ = a_sign;
}
}
r.Canonicalize();
return r;
}
void ExactFloat::Canonicalize() {
if (!isnormal(*this)) return;
// Underflow/overflow occurs if exp() is not in [kMinExp, kMaxExp].
// We also convert a zero mantissa to signed zero.
int my_exp = exp();
if (my_exp < kMinExp || BN_is_zero(bn_.get())) {
set_zero(sign_);
} else if (my_exp > kMaxExp) {
set_inf(sign_);
} else if (!BN_is_odd(bn_.get())) {
// Remove any low-order zero bits from the mantissa.
ABSL_DCHECK(!BN_is_zero(bn_.get()));
int shift = BN_ext_count_low_zero_bits(bn_.get());
if (shift > 0) {
ABSL_CHECK(BN_rshift(bn_.get(), bn_.get(), shift));
bn_exp_ += shift;
}
}
// If the mantissa has too many bits, we replace it by NaN to indicate
// that an inexact calculation has occurred.
if (prec() > kMaxPrec) {
set_nan();
}
}
ExactFloat operator*(const ExactFloat& a, const ExactFloat& b) {
int result_sign = a.sign_ * b.sign_;
if (!isnormal(a) || !isnormal(b)) {
// Handle zero, infinity, and NaN according to IEEE 754-2008.
if (isnan(a)) return a;
if (isnan(b)) return b;
if (isinf(a)) {
// Infinity times zero yields NaN.
if (b.is_zero()) return ExactFloat::NaN();
return ExactFloat::Infinity(result_sign);
}
if (isinf(b)) {
if (a.is_zero()) return ExactFloat::NaN();
return ExactFloat::Infinity(result_sign);
}
ABSL_DCHECK(a.is_zero() || b.is_zero());
return ExactFloat::SignedZero(result_sign);
}
ExactFloat r;
r.sign_ = result_sign;
r.bn_exp_ = a.bn_exp_ + b.bn_exp_;
BN_CTX* ctx = BN_CTX_new();
ABSL_CHECK(BN_mul(r.bn_.get(), a.bn_.get(), b.bn_.get(), ctx));
BN_CTX_free(ctx);
r.Canonicalize();
return r;
}
bool operator==(const ExactFloat& a, const ExactFloat& b) {
// NaN is not equal to anything, not even itself.
if (isnan(a) || isnan(b)) return false;
// Since Canonicalize() strips low-order zero bits, all other cases
// (including non-normal values) require bn_exp_ to be equal.
if (a.bn_exp_ != b.bn_exp_) return false;
// Positive and negative zero are equal.
if (a.is_zero() && b.is_zero()) return true;
// Otherwise, the signs and mantissas must match. Note that non-normal
// values such as infinity have a mantissa of zero.
return a.sign_ == b.sign_ && BN_ucmp(a.bn_.get(), b.bn_.get()) == 0;
}
int ExactFloat::ScaleAndCompare(const ExactFloat& b) const {
ABSL_DCHECK(isnormal(*this) && isnormal(b) && bn_exp_ >= b.bn_exp_);
ExactFloat tmp = *this;
ABSL_CHECK(BN_lshift(tmp.bn_.get(), tmp.bn_.get(), bn_exp_ - b.bn_exp_));
return BN_ucmp(tmp.bn_.get(), b.bn_.get());
}
bool ExactFloat::UnsignedLess(const ExactFloat& b) const {
// Handle the zero/infinity cases (NaN has already been done).
if (isinf(*this) || b.is_zero()) return false;
if (is_zero() || isinf(b)) return true;
// If the high-order bit positions differ, we are done.
int cmp = exp() - b.exp();
if (cmp != 0) return cmp < 0;
// Otherwise shift one of the two values so that they both have the same
// bn_exp_ and then compare the mantissas.
return (bn_exp_ >= b.bn_exp_ ? ScaleAndCompare(b) < 0
: b.ScaleAndCompare(*this) > 0);
}
bool operator<(const ExactFloat& a, const ExactFloat& b) {
// NaN is unordered compared to everything, including itself.
if (isnan(a) || isnan(b)) return false;
// Positive and negative zero are equal.
if (a.is_zero() && b.is_zero()) return false;
// Otherwise, anything negative is less than anything positive.
if (a.sign_ != b.sign_) return a.sign_ < b.sign_;
// Now we just compare absolute values.
return (a.sign_ > 0) ? a.UnsignedLess(b) : b.UnsignedLess(a);
}
ExactFloat fabs(const ExactFloat& a) { return abs(a); }
ExactFloat abs(const ExactFloat& a) { return a.CopyWithSign(+1); }
ExactFloat fmax(const ExactFloat& a, const ExactFloat& b) {
// If one argument is NaN, return the other argument.
if (isnan(a)) return b;
if (isnan(b)) return a;
// Not required by IEEE 754, but we prefer +0 over -0.
if (a.sign_ != b.sign_) {
return (a.sign_ < b.sign_) ? b : a;
}
return (a < b) ? b : a;
}
ExactFloat fmin(const ExactFloat& a, const ExactFloat& b) {
// If one argument is NaN, return the other argument.
if (isnan(a)) return b;
if (isnan(b)) return a;
// Not required by IEEE 754, but we prefer -0 over +0.
if (a.sign_ != b.sign_) {
return (a.sign_ < b.sign_) ? a : b;
}
return (a < b) ? a : b;
}
ExactFloat fdim(const ExactFloat& a, const ExactFloat& b) {
// This formulation has the correct behavior for NaNs.
return (a <= b) ? 0 : (a - b);
}
ExactFloat ceil(const ExactFloat& a) {
return a.RoundToPowerOf2(0, ExactFloat::kRoundTowardPositive);
}
ExactFloat floor(const ExactFloat& a) {
return a.RoundToPowerOf2(0, ExactFloat::kRoundTowardNegative);
}
ExactFloat trunc(const ExactFloat& a) {
return a.RoundToPowerOf2(0, ExactFloat::kRoundTowardZero);
}
ExactFloat round(const ExactFloat& a) {
return a.RoundToPowerOf2(0, ExactFloat::kRoundTiesAwayFromZero);
}
ExactFloat rint(const ExactFloat& a) {
return a.RoundToPowerOf2(0, ExactFloat::kRoundTiesToEven);
}
template <class T>
T ExactFloat::ToInteger(RoundingMode mode) const {
using std::numeric_limits;
static_assert(sizeof(T) <= sizeof(uint64_t), "max 64 bits supported");
static_assert(numeric_limits<T>::is_signed, "only signed types supported");
const int64_t kMinValue = numeric_limits<T>::min();
const int64_t kMaxValue = numeric_limits<T>::max();
ExactFloat r = RoundToPowerOf2(0, mode);
if (isnan(r)) return kMaxValue;
if (r.is_zero()) return 0;
if (!isinf(r)) {
// If the unsigned value has more than 63 bits it is always clamped.
if (r.exp() < 64) {
int64_t value = BN_ext_get_uint64(r.bn_.get()) << r.bn_exp_;
if (r.sign_ < 0) value = -value;
return std::clamp(value, kMinValue, kMaxValue);
}
}
return (r.sign_ < 0) ? kMinValue : kMaxValue;
}
// NOLINTBEGIN(runtime/int, google-runtime-int)
long lrint(const ExactFloat& a) {
return a.ToInteger<long>(ExactFloat::kRoundTiesToEven);
}
long long llrint(const ExactFloat& a) {
return a.ToInteger<long long>(ExactFloat::kRoundTiesToEven);
}
long lround(const ExactFloat& a) {
return a.ToInteger<long>(ExactFloat::kRoundTiesAwayFromZero);
}
long long llround(const ExactFloat& a) {
return a.ToInteger<long long>(ExactFloat::kRoundTiesAwayFromZero);
}
// NOLINTEND(runtime/int, google-runtime-int)
ExactFloat copysign(const ExactFloat& a, const ExactFloat& b) {
return a.CopyWithSign(b.sign_);
}
ExactFloat frexp(const ExactFloat& a, int* exp) {
if (!isnormal(a)) {
// If a == 0, exp should be zero. If a.is_inf() or a.is_nan(), exp is not
// defined but the glibc implementation returns zero.
*exp = 0;
return a;
}
*exp = a.exp();
return ldexp(a, -a.exp());
}
ExactFloat ldexp(const ExactFloat& a, int exp) {
if (!isnormal(a)) return a;
// To prevent integer overflow, we first clamp "exp" so that
// (kMinExp - 1) <= (a_exp + exp) <= (kMaxExp + 1).
int a_exp = a.exp();
exp = std::clamp(exp, //
ExactFloat::kMinExp - 1 + a_exp,
ExactFloat::kMaxExp + 1 - a_exp);
// Now modify the exponent and check for overflow/underflow.
ExactFloat r = a;
r.bn_exp_ += exp;
r.Canonicalize();
return r;
}
ExactFloat scalbln(const ExactFloat& a, long exp) {
// Clamp the exponent to the range of "int" in order to avoid truncation.
// NOLINTNEXTLINE(runtime/int, google-runtime-int)
exp = std::clamp<long>(exp, INT_MIN, INT_MAX);
return ldexp(a, static_cast<int>(exp));
}
int ilogb(const ExactFloat& a) {
if (a.is_zero()) return FP_ILOGB0;
if (isinf(a)) return INT_MAX;
if (isnan(a)) return FP_ILOGBNAN;
// a.exp() assumes the significand is in the range [0.5, 1).
return a.exp() - 1;
}
ExactFloat logb(const ExactFloat& a) {
if (a.is_zero()) return ExactFloat::Infinity(-1);
if (isinf(a)) return ExactFloat::Infinity(+1); // Even if a < 0.
if (isnan(a)) return a;
// exp() assumes the significand is in the range [0.5,1).
return ExactFloat(a.exp() - 1);
}
ExactFloat ExactFloat::Unimplemented() {
ABSL_LOG(FATAL) << "Unimplemented ExactFloat method called";
return NaN();
}
} // namespace exactfloat