Skip to content

Commit 6bfc241

Browse files
author
Dmitrii Vasilev
committed
fix(rtl_gen): format-ID collision in gf_formats + degenerate gf4_add normalization
Audit of the rest of the canonical RTL master set (after gf16_mul/gf16_add) found two more defects in the GoldenFloat sources, under issue #129: gf_formats.v: the format-ID registry declared all GF_ID_* as 4'dN, but the IDs run 0..19 -- so GF_ID_INT8 (4'd16), GF_ID_NF4 (17), GF_ID_POSIT16 (18), GF_ID_BINARY16 (19) silently WRAPPED to 0..3 and COLLIDED with GF_ID_GF4/GF8/GF12/GF16. Any future dispatch on these IDs would mis-select the format. Widened all GF_ID_* to 5 bits -> 20 distinct IDs (verilator: no more "value too large for 4 bit"). They are currently declared-but-unused, so this is a latent trap, not an active fault. gf4_add.v: the degenerate GF4 (E1/M2) adder had a normalization width bug (3-bit norm assigned a 4-bit replicate -> truncation). Replaced with the verified gf4_add (round-to-nearest over the {0,+-1.25,+-1.5,+-1.75,+-Inf,NaN} grid; 256/256 exhaustive). Both units now verilator -Wall clean; the whole rtl_gen master set (gf4/gf8/gf16 add, gf16 mul, gf_formats, nf4/int4 quantizers) is clean. Fixes #129
1 parent 5849643 commit 6bfc241

3 files changed

Lines changed: 73 additions & 106 deletions

File tree

.trinity/current_task/activity.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -364,3 +364,7 @@
364364
- **Commit:** fix(rtl_gen): root-cause fix of canonical gf16_add + make golden_tests.py runnable
365365
- **Files:** .trinity/current_task/activity.md,rtl_gen/gf16_add.v,rtl_gen/golden_tests.py
366366

367+
## 2026-06-03T17:53:43Z — trinity-rust-rings
368+
- **Commit:** fix(rtl_gen): root-cause fix of canonical gf16_add + make golden_tests.py runnable
369+
- **Files:** rtl_gen/gf4_add.v,rtl_gen/gf_formats.v
370+

rtl_gen/gf4_add.v

Lines changed: 45 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,17 @@
22
// t27/rtl_gen/gf4_add.v
33
// GoldenFloat4 Addition Unit - Extreme Compression
44
// Layout: [S(1) | E(1) | M(2)] - BIAS = 0
5-
// φ-distance: 0.118 (not optimal, but minimal bits)
5+
//
6+
// GF4 is degenerate: with bias 0 the only finite exponent is e=0 (e=1 is the
7+
// special code), so EVERY finite value shares exponent 2^0 and the representable
8+
// set is just {+-0, +-1.25, +-1.5, +-1.75, +-Inf, NaN} -- the significand is
9+
// {1,mant} = 4+mant in quarter units (e0m0 is zero, so 1.0 is NOT representable;
10+
// the smallest nonzero magnitude is 1.25). This makes "add" a round-to-nearest
11+
// into that tiny grid (spacing 0.25, overflow above 1.875). Rewritten 2026-06 to
12+
// do exactly that and verified exhaustively (256/256 pairs) by test/gf4_exhaustive
13+
// .py; the generic exponent probe (test/gf_arith_xcheck.py) skips gf4 because its
14+
// "1.0 = exp=bias" assumption collides with the zero code when bias=0.
15+
// Encodings: +0=0x0 -0=0x8 +Inf=0x4 -Inf=0xC NaN=0xE; 1.25/1.5/1.75 = m=1/2/3.
616

717
`default_nettype none
818
module gf4_add (
@@ -11,114 +21,64 @@ module gf4_add (
1121
output reg [3:0] result
1222
);
1323

14-
localparam BIAS = 1'd0;
15-
localparam EXP_MAX = 1'd1;
16-
1724
wire sign_a = a[3];
1825
wire exp_a = a[2];
1926
wire [1:0] mant_a = a[1:0];
2027
wire sign_b = b[3];
2128
wire exp_b = b[2];
2229
wire [1:0] mant_b = b[1:0];
2330

24-
wire is_zero_a = (exp_a == 1'd0) && (mant_a == 2'd0);
25-
wire is_zero_b = (exp_b == 1'd0) && (mant_b == 2'd0);
26-
wire is_special_a = (exp_a == EXP_MAX);
27-
wire is_special_b = (exp_b == EXP_MAX);
28-
wire is_inf_a = is_special_a && (mant_a == 2'd0);
29-
wire is_inf_b = is_special_b && (mant_b == 2'd0);
30-
wire is_nan_a = is_special_a && (mant_a != 2'd0);
31-
wire is_nan_b = is_special_b && (mant_b != 2'd0);
32-
33-
wire a_larger = (exp_a > exp_b) || ((exp_a == exp_b) && (mant_a >= mant_b));
31+
wire is_zero_a = (exp_a == 1'b0) && (mant_a == 2'd0);
32+
wire is_zero_b = (exp_b == 1'b0) && (mant_b == 2'd0);
33+
wire is_inf_a = (exp_a == 1'b1) && (mant_a == 2'd0);
34+
wire is_inf_b = (exp_b == 1'b1) && (mant_b == 2'd0);
35+
wire is_nan_a = (exp_a == 1'b1) && (mant_a != 2'd0);
36+
wire is_nan_b = (exp_b == 1'b1) && (mant_b != 2'd0);
3437

35-
reg [1:0] big_exp, result_exp;
36-
reg [2:0] big_fm, small_fm;
37-
reg [3:0] sum_m;
38-
reg big_sign, small_sign, result_sign;
39-
reg [2:0] norm;
40-
reg cancel;
38+
// Signed significands in quarter units (4 + mant -> 4..7); range -7..7.
39+
reg signed [5:0] sva, svb, sum;
40+
reg [5:0] mag; // |sum| in quarter units, 0..14
41+
reg rsign;
4142

4243
always @(*) begin
43-
cancel = 0;
44-
result_exp = 0;
45-
norm = 0;
46-
result_sign = 0;
47-
big_exp = 0;
48-
big_fm = 0;
49-
big_sign = 0;
50-
small_fm = 0;
51-
small_sign = 0;
52-
sum_m = 0;
44+
sva = 0; svb = 0; sum = 0; mag = 0; rsign = 0;
5345

5446
if (is_nan_a || is_nan_b)
55-
result = 4'hE; // NaN pattern for GF4
47+
result = 4'hE;
5648
else if (is_inf_a && is_inf_b && (sign_a != sign_b))
57-
result = 4'hE; // NaN
49+
result = 4'hE; // Inf - Inf = NaN
5850
else if (is_inf_a)
59-
result = sign_a ? 4'hC : 4'h4; // Inf
51+
result = sign_a ? 4'hC : 4'h4;
6052
else if (is_inf_b)
61-
result = sign_b ? 4'hC : 4'h4; // Inf
53+
result = sign_b ? 4'hC : 4'h4;
6254
else if (is_zero_a && is_zero_b)
63-
result = 4'h0; // Zero
55+
result = 4'h0;
6456
else if (is_zero_a)
6557
result = b;
6658
else if (is_zero_b)
6759
result = a;
6860
else begin
69-
if (a_larger) begin
70-
big_exp = exp_a;
71-
big_fm = {1'b1, mant_a};
72-
big_sign = sign_a;
73-
small_fm = {1'b1, mant_b};
74-
small_sign = sign_b;
75-
end else begin
76-
big_exp = exp_b;
77-
big_fm = {1'b1, mant_b};
78-
big_sign = sign_b;
79-
small_fm = {1'b1, mant_a};
80-
small_sign = sign_a;
81-
end
82-
83-
result_exp = big_exp;
84-
85-
if (big_sign == small_sign) begin
86-
sum_m = {1'b0, big_fm} + {1'b0, small_fm};
87-
result_sign = big_sign;
61+
sva = sign_a ? -$signed({3'b000, 1'b1, mant_a}) : $signed({3'b000, 1'b1, mant_a});
62+
svb = sign_b ? -$signed({3'b000, 1'b1, mant_b}) : $signed({3'b000, 1'b1, mant_b});
63+
sum = sva + svb;
64+
if (sum == 0) begin
65+
result = 4'h0;
8866
end else begin
89-
sum_m = {1'b0, big_fm} - {1'b0, small_fm};
90-
result_sign = big_sign;
91-
if (sum_m == 4'd0)
92-
cancel = 1;
93-
end
94-
95-
if (!cancel) begin
96-
if (sum_m[3]) begin
97-
norm = sum_m[2:0];
98-
end else if (sum_m[2]) begin
99-
norm = {sum_m[2:1], 1'b0};
100-
result_exp = result_exp - 1'b1;
101-
end else if (sum_m[1]) begin
102-
norm = {sum_m[1], 2'b00};
103-
result_exp = result_exp - 2'b10;
104-
end else if (sum_m[0]) begin
105-
norm = {1'b1, 3'b000};
106-
result_exp = result_exp - 2'b11;
107-
end else begin
108-
norm = 3'b0;
109-
result_exp = result_exp - 2'b10;
110-
end
111-
112-
if (result_exp[1])
113-
result = result_sign ? 4'h8 : 4'h0; // Underflow
114-
else if (result_exp[0] >= EXP_MAX)
115-
result = result_sign ? 4'hC : 4'h4; // Overflow to Inf
67+
rsign = sum[5];
68+
mag = sum[5] ? $unsigned(-sum) : $unsigned(sum); // 0..14 quarters
69+
// round to nearest grid point {0,5,6,7} quarters; >=8 -> Inf
70+
if (mag <= 6'd2)
71+
result = rsign ? 4'h8 : 4'h0; // -> 0
72+
else if (mag <= 6'd5)
73+
result = {rsign, 1'b0, 2'b01}; // 1.25
74+
else if (mag == 6'd6)
75+
result = {rsign, 1'b0, 2'b10}; // 1.5
76+
else if (mag == 6'd7)
77+
result = {rsign, 1'b0, 2'b11}; // 1.75
11678
else
117-
result = {result_sign, result_exp, norm[1:0]};
118-
end else begin
119-
result = 4'h0; // Cancel to zero
79+
result = rsign ? 4'hC : 4'h4; // overflow -> Inf
12080
end
12181
end
12282
end
12383

124-
endmodule
84+
endmodule

rtl_gen/gf_formats.v

Lines changed: 24 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -87,27 +87,30 @@ localparam GF_CATEGORY_BINARY = 3'd5;
8787
// GF Format ID for routing
8888
// ============================================================
8989

90-
localparam GF_ID_GF4 = 4'd0;
91-
localparam GF_ID_GF8 = 4'd1;
92-
localparam GF_ID_GF12 = 4'd2;
93-
localparam GF_ID_GF16 = 4'd3;
94-
localparam GF_ID_GF20 = 4'd4;
95-
localparam GF_ID_GF24 = 4'd5;
96-
localparam GF_ID_GF32 = 4'd6;
97-
localparam GF_ID_GF64 = 4'd7;
98-
localparam GF_ID_GF128 = 4'd8;
99-
localparam GF_ID_GF256 = 4'd9;
100-
101-
localparam GF_ID_FP32 = 4'd10;
102-
localparam GF_ID_FP16 = 4'd11;
103-
localparam GF_ID_BF16 = 4'd12;
104-
localparam GF_ID_FP8_E4 = 4'd13;
105-
localparam GF_ID_FP8_E5 = 4'd14;
106-
localparam GF_ID_INT4 = 4'd15;
107-
localparam GF_ID_INT8 = 4'd16;
108-
localparam GF_ID_NF4 = 4'd17;
109-
localparam GF_ID_POSIT16 = 4'd18;
110-
localparam GF_ID_BINARY16 = 4'd19;
90+
// NB: format IDs run 0..19, which needs 5 bits. They were declared as 4'dN, so
91+
// 16..19 (INT8/NF4/POSIT16/BINARY16) silently wrapped to 0..3 and COLLIDED with
92+
// GF4/GF8/GF12/GF16. Widened to 5 bits so all 20 IDs are distinct. (FIX 2026-06)
93+
localparam [4:0] GF_ID_GF4 = 5'd0;
94+
localparam [4:0] GF_ID_GF8 = 5'd1;
95+
localparam [4:0] GF_ID_GF12 = 5'd2;
96+
localparam [4:0] GF_ID_GF16 = 5'd3;
97+
localparam [4:0] GF_ID_GF20 = 5'd4;
98+
localparam [4:0] GF_ID_GF24 = 5'd5;
99+
localparam [4:0] GF_ID_GF32 = 5'd6;
100+
localparam [4:0] GF_ID_GF64 = 5'd7;
101+
localparam [4:0] GF_ID_GF128 = 5'd8;
102+
localparam [4:0] GF_ID_GF256 = 5'd9;
103+
104+
localparam [4:0] GF_ID_FP32 = 5'd10;
105+
localparam [4:0] GF_ID_FP16 = 5'd11;
106+
localparam [4:0] GF_ID_BF16 = 5'd12;
107+
localparam [4:0] GF_ID_FP8_E4 = 5'd13;
108+
localparam [4:0] GF_ID_FP8_E5 = 5'd14;
109+
localparam [4:0] GF_ID_INT4 = 5'd15;
110+
localparam [4:0] GF_ID_INT8 = 5'd16;
111+
localparam [4:0] GF_ID_NF4 = 5'd17;
112+
localparam [4:0] GF_ID_POSIT16 = 5'd18;
113+
localparam [4:0] GF_ID_BINARY16 = 5'd19;
111114

112115
// ============================================================
113116
// Special encodings

0 commit comments

Comments
 (0)