Skip to content

Commit 2fdc7a1

Browse files
authored
Fix #10, mantissa overflow (#11)
- **warning: breaking changes!** - Fix #10, mantissa overflow - Fix convert to subnormal numbers ( < 0.000061035...) - Fix printing subnormal numbers - update unit tests with **test_all** - update GitHub/actions to version 4 - remove DATE from examples as it has no added value. - minor edits
1 parent 9dc24d2 commit 2fdc7a1

21 files changed

Lines changed: 372 additions & 95 deletions

File tree

.github/workflows/arduino-lint.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ jobs:
66
lint:
77
runs-on: ubuntu-latest
88
steps:
9-
- uses: actions/checkout@v3
9+
- uses: actions/checkout@v4
1010
- uses: arduino/arduino-lint-action@v1
1111
with:
1212
library-manager: update

.github/workflows/arduino_test_runner.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ jobs:
88
runs-on: ubuntu-latest
99

1010
steps:
11-
- uses: actions/checkout@v3
11+
- uses: actions/checkout@v4
1212
- uses: ruby/setup-ruby@v1
1313
with:
1414
ruby-version: 2.6

.github/workflows/jsoncheck.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ jobs:
1010
test:
1111
runs-on: ubuntu-latest
1212
steps:
13-
- uses: actions/checkout@v3
13+
- uses: actions/checkout@v4
1414
- name: json-syntax-check
1515
uses: limitusus/json-syntax-check@v1
1616
with:

CHANGELOG.md

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,23 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/)
66
and this project adheres to [Semantic Versioning](http://semver.org/).
77

88

9+
## [0.2.0] - 2024-03-05
10+
- **warning: breaking changes!**
11+
- Fix #10, mantissa overflow
12+
- Fix convert to subnormal numbers ( < 0.000061035...)
13+
- Fix printing subnormal numbers
14+
- update unit tests with **test_all**
15+
- update GitHub/actions to version 4
16+
- remove DATE from examples as it has no added value.
17+
- minor edits
18+
19+
----
20+
921
## [0.1.8] - 2023-11-02
1022
- update readme.md
1123
- add **isNan()** (experimental).
1224
- minor edits.
1325

14-
1526
## [0.1.7] - 2022-11-07
1627
- add changelog.md
1728
- add rp2040 to build-CI

LICENSE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
MIT License
22

3-
Copyright (c) 2015-2023 Rob Tillaart
3+
Copyright (c) 2015-2024 Rob Tillaart
44

55
Permission is hereby granted, free of charge, to any person obtaining a copy
66
of this software and associated documentation files (the "Software"), to deal

README.md

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,21 @@ a floating point number. As it uses only 2 bytes where float and double have typ
2626
4 and 8 bytes, gains can be made at the price of range and precision.
2727

2828

29+
#### Breaking change 0.2.0
30+
31+
Version 0.2.0 has a breaking change as a conversion bug has been found.
32+
See for details in issue #10.
33+
For some specific values the mantissa overflowed when the float 16 was
34+
assigned a value to. This overflow was not detected / corrected.
35+
36+
During the analysis of this bug it became clear that the sub-normal numbers
37+
were also implemented correctly. This is fixed too in 0.2.0.
38+
39+
There is still an issue 0 versus -0
40+
41+
**This makes all pre-0.2.0 version obsolete.**
42+
43+
2944
## Specifications
3045

3146

@@ -34,14 +49,16 @@ a floating point number. As it uses only 2 bytes where float and double have typ
3449
| size | 2 bytes | layout s eeeee mmmmmmmmmm (1,5,10)
3550
| sign | 1 bit |
3651
| exponent | 5 bit |
37-
| mantissa | 11 bit | ~ 3 digits
52+
| mantissa | 10 bit | ~ 3 digits
3853
| minimum | 5.96046 E−8 | smallest positive number.
39-
| | 1.0009765625 | 1 + 2^−10 = smallest nr larger than 1.
54+
| | 1.0009765625 | 1 + 2^−10 = smallest number larger than 1.
4055
| maximum | 65504 |
4156
| | |
4257

4358

44-
#### example values
59+
#### Example values
60+
61+
Source: https://en.wikipedia.org/wiki/Half-precision_floating-point_format
4562

4663
```cpp
4764
/*
@@ -151,9 +168,18 @@ negation operator.
151168
#### Should
152169

153170
- unit tests of the above.
171+
- how to handle 0 == -0 (0x0000 == 0x8000)
172+
- investigate ARM alternative half-precision
173+
_ARM processors support (via a floating point control register bit)
174+
an "alternative half-precision" format, which does away with the
175+
special case for an exponent value of 31 (111112).[10] It is almost
176+
identical to the IEEE format, but there is no encoding for infinity or NaNs;
177+
instead, an exponent of 31 encodes normalized numbers in the range 65536 to 131008._
178+
154179

155180
#### Could
156181

182+
- copy constructor?
157183
- update documentation.
158184
- error handling.
159185
- divide by zero errors.

examples/float16_test0/float16_test0.ino

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,8 @@
22
// FILE: float16_test0.ino
33
// AUTHOR: Rob Tillaart
44
// PURPOSE: test float16
5-
// DATE: 2015-03-11
65
// URL: https://github.com/RobTillaart/float16
7-
//
6+
87

98
/*
109
SIGN EXP MANTISSA
@@ -152,4 +151,4 @@ void test_numbers()
152151
}
153152

154153

155-
// -- END OF FILE --
154+
// -- END OF FILE --

examples/float16_test1/float16_test1.ino

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,7 @@
22
// FILE: float16_test1.ino
33
// AUTHOR: Rob Tillaart
44
// PURPOSE: test float16
5-
// DATE: 2015-03-11
65
// URL: https://github.com/RobTillaart/float16
7-
//
86

97

108
#include "float16.h"
@@ -40,6 +38,6 @@ void loop()
4038
}
4139

4240

43-
// -- END OF FILE --
41+
// -- END OF FILE --
4442

4543

examples/float16_test_all/float16_test_all.ino

Lines changed: 45 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,13 @@
22
// FILE: float16_test_all.ino
33
// AUTHOR: Rob Tillaart
44
// PURPOSE: test float16
5-
// DATE: 2021-11-27
65
// URL: https://github.com/RobTillaart/float16
7-
//
8-
9-
// test all values except the NAN
10-
// test_1 takes ~ 2 minutes on UNO @ 115200baud
116

12-
// https://github.com/RobTillaart/float16/issues/2
7+
// test all values except the NAN
8+
// test_1 takes ~ 2 minutes on UNO @ 115200baud
9+
// https://github.com/RobTillaart/float16/issues/2
10+
//
11+
// test_3 is related to issue #10
1312

1413

1514
#include "float16.h"
@@ -34,6 +33,7 @@ void setup()
3433

3534
test_1();
3635
test_2();
36+
test_3();
3737
}
3838

3939

@@ -42,8 +42,41 @@ void loop()
4242
}
4343

4444

45+
// test for issue #10
46+
void test_3()
47+
{
48+
Serial.println(__FUNCTION__);
49+
uint16_t y = 0;
50+
uint16_t last = 0;
51+
52+
start = millis();
53+
for (int32_t x = 1; x < 65535; x++) // test positive integers.
54+
// for (int32_t x = -1; x > -65535; x--) // test negative integers.
55+
{
56+
last = y;
57+
f16 = x;
58+
y = f16.getBinary();
59+
if (y < last)
60+
{
61+
Serial.print("| ");
62+
Serial.print(x);
63+
Serial.print(" | ");
64+
Serial.print(y, HEX);
65+
Serial.print(" | ");
66+
Serial.print(last, HEX);
67+
Serial.println(" |");
68+
}
69+
}
70+
stop = millis();
71+
Serial.println();
72+
Serial.print(" TIME: ");
73+
Serial.println(stop - start);
74+
}
75+
76+
4577
void test_2()
4678
{
79+
Serial.println(__FUNCTION__);
4780
start = millis();
4881
for (uint32_t x = 0x0001; x < 0x7C01; x++)
4982
{
@@ -64,7 +97,8 @@ void test_2()
6497

6598
void test_1()
6699
{
67-
// POSITIVE NUMBERS
100+
Serial.println(__FUNCTION__);
101+
// POSITIVE NUMBERS
68102
prev = 0;
69103
errors = 0;
70104
start = millis();
@@ -76,7 +110,7 @@ void test_1()
76110
Serial.print('\t');
77111
float current = f16.toDouble();
78112
Serial.print(current, 8);
79-
if (prev > current) // numbers should be increasing.
113+
if (prev > current) // numbers should be increasing.
80114
{
81115
Serial.print("\t\tERROR");
82116
errors++;
@@ -94,7 +128,7 @@ void test_1()
94128
Serial.println();
95129

96130

97-
// NEGATIVE NUMBERS
131+
// NEGATIVE NUMBERS
98132
prev = 0;
99133
errors = 0;
100134
start = millis();
@@ -106,7 +140,7 @@ void test_1()
106140
Serial.print('\t');
107141
float current = f16.toDouble();
108142
Serial.print(current, 8);
109-
if (prev < current) // negative numbers should be decreasing.
143+
if (prev < current) // negative numbers should be decreasing.
110144
{
111145
Serial.print("\t\tERROR");
112146
errors++;
@@ -129,4 +163,4 @@ void test_1()
129163

130164

131165

132-
// -- END OF FILE --
166+
// -- END OF FILE --
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
//
2+
// FILE: float16_test_all_2.ino
3+
// AUTHOR: alecelular, Rob Tillaart
4+
// PURPOSE: test float16
5+
// URL: https://github.com/RobTillaart/float16
6+
// URL: https://github.com/RobTillaart/float16/issues/10
7+
8+
9+
#include "float16.h"
10+
11+
// issue 10 detected an Error -> 32760 / 32767
12+
13+
float16 f16;
14+
15+
16+
void setup()
17+
{
18+
delay(500);
19+
Serial.begin(115200);
20+
while (!Serial) delay(1);
21+
22+
Serial.println();
23+
Serial.println(__FILE__);
24+
Serial.print("FLOAT16_LIB_VERSION: ");
25+
Serial.println(FLOAT16_LIB_VERSION);
26+
27+
f16.setDecimals(6);
28+
29+
test_all();
30+
31+
Serial.println("\ndone");
32+
}
33+
34+
35+
void loop()
36+
{
37+
}
38+
39+
40+
void test_all()
41+
{
42+
Serial.println(__FUNCTION__);
43+
44+
// test all possible positive patterns
45+
// test both the conversion to and from float.
46+
for (uint32_t x = 0x0000; x < 0x7C01; x++)
47+
{
48+
f16.setBinary(x);
49+
float16 f17 = f16.toDouble();
50+
51+
// internal format should be equal. Except for -0 and 0.
52+
if (x - f17.getBinary() != 0)
53+
{
54+
Serial.print(f16.toDouble(), 10);
55+
Serial.print("\t");
56+
Serial.print(f16.getBinary(), HEX);
57+
Serial.print("\t");
58+
Serial.print(f17.getBinary(), HEX);
59+
Serial.print("\t");
60+
Serial.print(x - f17.getBinary(), HEX);
61+
Serial.println();
62+
}
63+
}
64+
Serial.println();
65+
66+
67+
// test all possible negative patterns
68+
for (uint32_t x = 0x8000; x < 0xFC01; x++)
69+
{
70+
f16.setBinary(x);
71+
float16 f17 = f16.toDouble();
72+
73+
if (x - f17.getBinary() != 0)
74+
{
75+
Serial.print(f16.toDouble(), 10);
76+
Serial.print("\t");
77+
Serial.print(f16.getBinary(), HEX);
78+
Serial.print("\t");
79+
Serial.print(f17.getBinary(), HEX);
80+
Serial.print("\t");
81+
Serial.print(x - f17.getBinary(), HEX);
82+
Serial.println();
83+
}
84+
}
85+
Serial.println();
86+
87+
}
88+
89+
90+
void test_0()
91+
{
92+
Serial.println(__FUNCTION__);
93+
for (uint32_t x = 32750; x < 32770; x++)
94+
// for (uint32_t x = 8175; x < 8205; x++)
95+
{
96+
f16 = x;
97+
Serial.print(x);
98+
Serial.print("\t");
99+
Serial.print(f16);
100+
Serial.print("\t");
101+
Serial.print(f16.toDouble(), 2);
102+
Serial.print("\t");
103+
Serial.println();
104+
yield();
105+
}
106+
107+
f16.setBinary(0x7800);
108+
Serial.print(f16.toDouble(), 2);
109+
Serial.println();
110+
}
111+
112+
113+
// -- END OF FILE --

0 commit comments

Comments
 (0)