Skip to content

Commit e2c100d

Browse files
authored
feat(gf16): implement φ-optimized FMA/FMS operations (#57)
Add three new fused operations to GF16: - phiFma(a, b, c): φ-dequantize inputs, compute a*b+c, φ-quantize result - phiFms(a, b, c): φ-dequantize inputs, compute a*b-c, φ-quantize result - fma(a, b, c): standard fused multiply-add (no φ scaling) Also adds C-ABI exports (gf16_phi_fma, gf16_phi_fms) and C header declarations, plus 6 new tests (3 GF16 core, 3 C-ABI). Closes #4
1 parent be611c7 commit e2c100d

3 files changed

Lines changed: 99 additions & 1 deletion

File tree

src/c/gf16.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,32 @@ gf16_t gf16_max(gf16_t a, gf16_t b);
347347
*/
348348
gf16_t gf16_fma(gf16_t a, gf16_t b, gf16_t c);
349349

350+
/**
351+
* φ-optimized fused multiply-add
352+
*
353+
* Dequantizes inputs from φ-space, computes a × b + c in f32,
354+
* then φ-quantizes the result back.
355+
*
356+
* @param a First operand (φ-quantized)
357+
* @param b Second operand (φ-quantized)
358+
* @param c Third operand (φ-quantized)
359+
* @return φ-quantized result of a × b + c
360+
*/
361+
gf16_t gf16_phi_fma(gf16_t a, gf16_t b, gf16_t c);
362+
363+
/**
364+
* φ-optimized fused multiply-subtract
365+
*
366+
* Dequantizes inputs from φ-space, computes a × b - c in f32,
367+
* then φ-quantizes the result back.
368+
*
369+
* @param a First operand (φ-quantized)
370+
* @param b Second operand (φ-quantized)
371+
* @param c Third operand (φ-quantized)
372+
* @return φ-quantized result of a × b - c
373+
*/
374+
gf16_t gf16_phi_fms(gf16_t a, gf16_t b, gf16_t c);
375+
350376
/*======================================================================
351377
* Constants
352378
*======================================================================*/

src/c_abi.zig

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,13 +201,20 @@ export fn gf16_max(a: gf16_t, b: gf16_t) callconv(.c) gf16_t {
201201
}
202202

203203
export fn gf16_fma(a: gf16_t, b: gf16_t, c: gf16_t) callconv(.c) gf16_t {
204-
// Compute a * b + c in f32, then round to GF16
205204
const fa = rawToGf16(a).toF32();
206205
const fb = rawToGf16(b).toF32();
207206
const fc = rawToGf16(c).toF32();
208207
return gf16ToRaw(golden.GF16.fromF32(fa * fb + fc));
209208
}
210209

210+
export fn gf16_phi_fma(a: gf16_t, b: gf16_t, c: gf16_t) callconv(.c) gf16_t {
211+
return gf16ToRaw(golden.GF16.phiFma(rawToGf16(a), rawToGf16(b), rawToGf16(c)));
212+
}
213+
214+
export fn gf16_phi_fms(a: gf16_t, b: gf16_t, c: gf16_t) callconv(.c) gf16_t {
215+
return gf16ToRaw(golden.GF16.phiFms(rawToGf16(a), rawToGf16(b), rawToGf16(c)));
216+
}
217+
211218
// ═══════════════════════════════════════════════════════════════════
212219
// Library Info
213220
// ═════════════════════════════════════════════════════════════════════
@@ -309,6 +316,24 @@ test "C-ABI: gf16_fma" {
309316
try std.testing.expectApproxEqAbs(@as(f32, 10.0), val, 0.05);
310317
}
311318

319+
test "C-ABI: gf16_phi_fma" {
320+
const a = gf16_phi_quantize(2.0);
321+
const b = gf16_phi_quantize(3.0);
322+
const c = gf16_phi_quantize(4.0);
323+
const result = gf16_phi_fma(a, b, c);
324+
const deq = gf16_phi_dequantize(result);
325+
try std.testing.expectApproxEqAbs(@as(f32, 10.0), deq, 1.5);
326+
}
327+
328+
test "C-ABI: gf16_phi_fms" {
329+
const a = gf16_phi_quantize(5.0);
330+
const b = gf16_phi_quantize(3.0);
331+
const c = gf16_phi_quantize(4.0);
332+
const result = gf16_phi_fms(a, b, c);
333+
const deq = gf16_phi_dequantize(result);
334+
try std.testing.expectApproxEqAbs(@as(f32, 11.0), deq, 2.0);
335+
}
336+
312337
test "C-ABI: library version" {
313338
const version = std.mem.span(goldenfloat_version());
314339
try std.testing.expectEqualStrings("1.1.0", version);

src/formats/golden_float16.zig

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,27 @@ pub const GF16 = packed struct(u16) {
190190
pub fn phiDequantize(gf: GF16) f32 {
191191
return gf.toF32() * PHI_SQ;
192192
}
193+
194+
/// φ-optimized fused multiply-add: dequantize(a)*dequantize(b) + dequantize(c), then φ-quantize
195+
pub fn phiFma(a: GF16, b: GF16, c: GF16) GF16 {
196+
const fa = phiDequantize(a);
197+
const fb = phiDequantize(b);
198+
const fc = phiDequantize(c);
199+
return phiQuantize(fa * fb + fc);
200+
}
201+
202+
/// φ-optimized fused multiply-subtract: dequantize(a)*dequantize(b) - dequantize(c), then φ-quantize
203+
pub fn phiFms(a: GF16, b: GF16, c: GF16) GF16 {
204+
const fa = phiDequantize(a);
205+
const fb = phiDequantize(b);
206+
const fc = phiDequantize(c);
207+
return phiQuantize(fa * fb - fc);
208+
}
209+
210+
/// Standard fused multiply-add (no φ scaling): a*b + c in f32, rounded to GF16
211+
pub fn fma(a: GF16, b: GF16, c: GF16) GF16 {
212+
return fromF32(a.toF32() * b.toF32() + c.toF32());
213+
}
193214
};
194215

195216
// ═════════════════════════════════════════════════════════════════════════════
@@ -417,4 +438,30 @@ test "PHI_SQ + 1/PHI_SQ equals 3" {
417438
try std.testing.expectApproxEqAbs(@as(f32, 3.0), computed, 1e-10);
418439
}
419440

441+
test "GF16 phi-fused multiply-add" {
442+
const a = GF16.phiQuantize(2.0);
443+
const b = GF16.phiQuantize(3.0);
444+
const c = GF16.phiQuantize(4.0);
445+
const result = GF16.phiFma(a, b, c);
446+
const deq = GF16.phiDequantize(result);
447+
try std.testing.expectApproxEqAbs(@as(f32, 10.0), deq, 1.5);
448+
}
449+
450+
test "GF16 phi-fused multiply-subtract" {
451+
const a = GF16.phiQuantize(5.0);
452+
const b = GF16.phiQuantize(3.0);
453+
const c = GF16.phiQuantize(4.0);
454+
const result = GF16.phiFms(a, b, c);
455+
const deq = GF16.phiDequantize(result);
456+
try std.testing.expectApproxEqAbs(@as(f32, 11.0), deq, 2.0);
457+
}
458+
459+
test "GF16 standard fused multiply-add" {
460+
const a = GF16.fromF32(2.0);
461+
const b = GF16.fromF32(3.0);
462+
const c = GF16.fromF32(4.0);
463+
const result = GF16.fma(a, b, c);
464+
try std.testing.expectApproxEqAbs(@as(f32, 10.0), result.toF32(), 0.5);
465+
}
466+
420467
// φ² + 1/φ² = 3 | TRINITY

0 commit comments

Comments
 (0)