sus-float/fp_custom.sus at master · pc2/sus-float · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170

module fp32_neg {
    interface fp32_neg : float v -> float o {
        bool[32] v_bits = ToBits(v)
        bool[32] o_bits
        o_bits[0:31] = v_bits[0:31]
        o_bits[31] = !v_bits[31]
        o = FromBits(o_bits)
    }
}

module fp64_neg {
    interface fp64_neg : double v -> double o {
        bool[64] v_bits = ToBits(v)
        bool[64] o_bits
        o_bits[0:63] = v_bits[0:63]
        o_bits[63] = !v_bits[63]
        o = FromBits(o_bits)
    }
}

module fp32_abs {
    interface fp32_abs : float v -> float o {
        bool[32] v_bits = ToBits(v)
        bool[32] o_bits
        o_bits[0:31] = v_bits[0:31]
        o_bits[31] = false
        o = FromBits(o_bits)
    }
}

module fp32_to_fp64 {
    interface fp32_to_fp64 : float v -> double o

    bool[32] v_bits = ToBits(v)

    int exp32 = BitsToUInt(v_bits[23:31])
    int frac32 = BitsToUInt(v_bits[0:23])

    bool[64] o_bits

    // Exponent conversion
    bool is_zero = exp32 == 0 & frac32 == 0
    bool is_inf_nan = exp32 == 255

    int exp64
    when is_zero {
        exp64 = 0
    } else when is_inf_nan {
        exp64 = 2047
    } else {
        exp64 = exp32 + (1023 - 127)
    }

    // Mantissa (zero-extend)
    int frac64 = BitwiseIntConcat#(LOWER_BITS: 52 - 23)(frac32, 0)

    o_bits[0:52] = UIntToBits(frac64)
    o_bits[52:63] = UIntToBits(exp64)

    // Sign
    o_bits[63] = v_bits[31]

    o = FromBits(o_bits)
}

module fp64_to_fp32 {
    interface fp64_to_fp32 : double v -> float o
    bool[64] v_bits = ToBits(v)

    int exp64 = BitsToUInt(v_bits[52:63])
    int frac64 = BitsToUInt(v_bits[0:52])

    bool[32] o_bits

    // Detect special cases
    bool is_zero = exp64 == 0 & frac64 == 0
    bool is_inf_nan = exp64 == 2047

    int exp32

    when is_zero {
        exp32 = 0
    } else when is_inf_nan {
        exp32 = 255
    } else {
        // Bias conversion
        int exp_unbiased = exp64 + (127 - 1023)

        when exp_unbiased <= 0 {
            // Underflow → zero (no subnormal handling here)
            exp32 = 0
        } else when exp_unbiased >= 255 {
            // Overflow → infinity
            exp32 = 255
        } else {
            exp32 = IntNarrow#(FROM: 0, TO: 255)(exp_unbiased)
        }
    }

    // Mantissa (truncate)
    o_bits[0:23] = v_bits[52-23:52]
    o_bits[23:31] = UIntToBits(exp32)

    // Sign
    o_bits[31] = v_bits[63]

    o = FromBits(o_bits)
}

module fp64_abs {
    interface fp64_abs : double v -> double o {
        bool[64] v_bits = ToBits(v)
        bool[64] o_bits
        o_bits[0:63] = v_bits[0:63]
        o_bits[63] = false
        o = FromBits(o_bits)
    }
}

module fp_mul_pow2_bitwise#(int MANTISSA_BITS, int EXPONENT_BITS, int FROM, int TO) {
    gen int EXPONENT_INF = pow2#(E: EXPONENT_BITS) - 1
    gen bool[MANTISSA_BITS] ZEROS = RepeatGen#(T: type bool, SIZE: MANTISSA_BITS, V: false)
    interface fp_mul_pow2_bitwise : bool[1+MANTISSA_BITS+EXPONENT_BITS] v_bits, int#(FROM, TO) power -> bool[1+MANTISSA_BITS+EXPONENT_BITS] o_bits

    int exponent = BitsToUInt(v_bits[MANTISSA_BITS+:EXPONENT_BITS])
    bool[MANTISSA_BITS] mantissa = v_bits[0:MANTISSA_BITS]

    int new_exp
    bool[MANTISSA_BITS] new_mantissa
    when exponent == EXPONENT_INF { // or already infinity or NaN, then we won't change it. Ignore
        new_exp = EXPONENT_INF
        new_mantissa = mantissa
    } else when exponent == 0 { // If it's already zero, get rid of denormalization.
        new_exp = 0
        new_mantissa = mantissa
    } else {
        int new_exponent = exponent + power
        when new_exponent <= 0 {
            new_exp = 0
            new_mantissa = ZEROS
        } else when new_exponent >= EXPONENT_INF {
            new_exp = EXPONENT_INF
            new_mantissa = ZEROS
        } else {
            new_exp = IntNarrow#(FROM: 1, TO: EXPONENT_INF)(new_exponent)
            new_mantissa = mantissa
        }
    }
    o_bits[0:MANTISSA_BITS] = new_mantissa
    o_bits[MANTISSA_BITS+:EXPONENT_BITS] = UIntToBits(new_exp)
    o_bits[MANTISSA_BITS + EXPONENT_BITS] = v_bits[MANTISSA_BITS + EXPONENT_BITS]
}
/// Cheaply implement v * 2^power by adjusting floating point exponents.
module fp32_mul_pow2#(int FROM, int TO) {
    interface fp32_mul_pow2 : float v, int#(FROM, TO) power -> float o
    bool[32] v_bits = ToBits(v)
    bool[32] o_bits = fp_mul_pow2_bitwise#(MANTISSA_BITS: 23, EXPONENT_BITS: 8)(v_bits, power)

    o = FromBits(o_bits)
}

/// Cheaply implement v * 2^power by adjusting floating point exponents.
module fp64_mul_pow2#(int FROM, int TO) {
    interface fp64_mul_pow2 : double v, int#(FROM, TO) power -> double o
    bool[64] v_bits = ToBits(v)
    bool[64] o_bits = fp_mul_pow2_bitwise#(MANTISSA_BITS: 52, EXPONENT_BITS: 11)(v_bits, power)

    o = FromBits(o_bits)
}