-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfp_custom.sus
More file actions
170 lines (140 loc) · 4.68 KB
/
fp_custom.sus
File metadata and controls
170 lines (140 loc) · 4.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
module fp32_neg {
interface fp32_neg : float v -> float o {
bool[32] v_bits = ToBits(v)
bool[32] o_bits
o_bits[0:31] = v_bits[0:31]
o_bits[31] = !v_bits[31]
o = FromBits(o_bits)
}
}
module fp64_neg {
interface fp64_neg : double v -> double o {
bool[64] v_bits = ToBits(v)
bool[64] o_bits
o_bits[0:63] = v_bits[0:63]
o_bits[63] = !v_bits[63]
o = FromBits(o_bits)
}
}
module fp32_abs {
interface fp32_abs : float v -> float o {
bool[32] v_bits = ToBits(v)
bool[32] o_bits
o_bits[0:31] = v_bits[0:31]
o_bits[31] = false
o = FromBits(o_bits)
}
}
module fp32_to_fp64 {
interface fp32_to_fp64 : float v -> double o
bool[32] v_bits = ToBits(v)
int exp32 = BitsToUInt(v_bits[23:31])
int frac32 = BitsToUInt(v_bits[0:23])
bool[64] o_bits
// Exponent conversion
bool is_zero = exp32 == 0 & frac32 == 0
bool is_inf_nan = exp32 == 255
int exp64
when is_zero {
exp64 = 0
} else when is_inf_nan {
exp64 = 2047
} else {
exp64 = exp32 + (1023 - 127)
}
// Mantissa (zero-extend)
int frac64 = BitwiseIntConcat#(LOWER_BITS: 52 - 23)(frac32, 0)
o_bits[0:52] = UIntToBits(frac64)
o_bits[52:63] = UIntToBits(exp64)
// Sign
o_bits[63] = v_bits[31]
o = FromBits(o_bits)
}
module fp64_to_fp32 {
interface fp64_to_fp32 : double v -> float o
bool[64] v_bits = ToBits(v)
int exp64 = BitsToUInt(v_bits[52:63])
int frac64 = BitsToUInt(v_bits[0:52])
bool[32] o_bits
// Detect special cases
bool is_zero = exp64 == 0 & frac64 == 0
bool is_inf_nan = exp64 == 2047
int exp32
when is_zero {
exp32 = 0
} else when is_inf_nan {
exp32 = 255
} else {
// Bias conversion
int exp_unbiased = exp64 + (127 - 1023)
when exp_unbiased <= 0 {
// Underflow → zero (no subnormal handling here)
exp32 = 0
} else when exp_unbiased >= 255 {
// Overflow → infinity
exp32 = 255
} else {
exp32 = IntNarrow#(FROM: 0, TO: 255)(exp_unbiased)
}
}
// Mantissa (truncate)
o_bits[0:23] = v_bits[52-23:52]
o_bits[23:31] = UIntToBits(exp32)
// Sign
o_bits[31] = v_bits[63]
o = FromBits(o_bits)
}
module fp64_abs {
interface fp64_abs : double v -> double o {
bool[64] v_bits = ToBits(v)
bool[64] o_bits
o_bits[0:63] = v_bits[0:63]
o_bits[63] = false
o = FromBits(o_bits)
}
}
module fp_mul_pow2_bitwise#(int MANTISSA_BITS, int EXPONENT_BITS, int FROM, int TO) {
gen int EXPONENT_INF = pow2#(E: EXPONENT_BITS) - 1
gen bool[MANTISSA_BITS] ZEROS = RepeatGen#(T: type bool, SIZE: MANTISSA_BITS, V: false)
interface fp_mul_pow2_bitwise : bool[1+MANTISSA_BITS+EXPONENT_BITS] v_bits, int#(FROM, TO) power -> bool[1+MANTISSA_BITS+EXPONENT_BITS] o_bits
int exponent = BitsToUInt(v_bits[MANTISSA_BITS+:EXPONENT_BITS])
bool[MANTISSA_BITS] mantissa = v_bits[0:MANTISSA_BITS]
int new_exp
bool[MANTISSA_BITS] new_mantissa
when exponent == EXPONENT_INF { // or already infinity or NaN, then we won't change it. Ignore
new_exp = EXPONENT_INF
new_mantissa = mantissa
} else when exponent == 0 { // If it's already zero, get rid of denormalization.
new_exp = 0
new_mantissa = mantissa
} else {
int new_exponent = exponent + power
when new_exponent <= 0 {
new_exp = 0
new_mantissa = ZEROS
} else when new_exponent >= EXPONENT_INF {
new_exp = EXPONENT_INF
new_mantissa = ZEROS
} else {
new_exp = IntNarrow#(FROM: 1, TO: EXPONENT_INF)(new_exponent)
new_mantissa = mantissa
}
}
o_bits[0:MANTISSA_BITS] = new_mantissa
o_bits[MANTISSA_BITS+:EXPONENT_BITS] = UIntToBits(new_exp)
o_bits[MANTISSA_BITS + EXPONENT_BITS] = v_bits[MANTISSA_BITS + EXPONENT_BITS]
}
/// Cheaply implement v * 2^power by adjusting floating point exponents.
module fp32_mul_pow2#(int FROM, int TO) {
interface fp32_mul_pow2 : float v, int#(FROM, TO) power -> float o
bool[32] v_bits = ToBits(v)
bool[32] o_bits = fp_mul_pow2_bitwise#(MANTISSA_BITS: 23, EXPONENT_BITS: 8)(v_bits, power)
o = FromBits(o_bits)
}
/// Cheaply implement v * 2^power by adjusting floating point exponents.
module fp64_mul_pow2#(int FROM, int TO) {
interface fp64_mul_pow2 : double v, int#(FROM, TO) power -> double o
bool[64] v_bits = ToBits(v)
bool[64] o_bits = fp_mul_pow2_bitwise#(MANTISSA_BITS: 52, EXPONENT_BITS: 11)(v_bits, power)
o = FromBits(o_bits)
}