|
| 1 | +BENCH-010: Format Analysis Suite (post BUG-001-BF16 fix) |
| 2 | +========================================================= |
| 3 | +seed=42, n=10000 per distribution |
| 4 | +Cross-reference: issue #23 |
| 5 | + |
| 6 | +Distribution: GAUSS_001 (n=10000) |
| 7 | +-------------------------------------------------------------------------------- |
| 8 | +Format MSE MaxAbsErr ULP_obs ULP_th Status |
| 9 | +-------------------------------------------------------------------------------- |
| 10 | +fp32 6.34e-20 1.83e-9 1.83e-9 9.31e-10 FAIL |
| 11 | +fp16 7.68e-8 1.56e-2 1.56e-2 7.63e-6 FAIL |
| 12 | +bf16 2.69e-10 1.09e-4 1.09e-4 6.10e-5 FAIL |
| 13 | +gf16 1.74e-11 2.91e-5 2.91e-5 1.53e-5 FAIL |
| 14 | +ternary 1.00e-4 4.61e-2 4.61e-2 1.00e0 FAIL |
| 15 | + |
| 16 | +Distribution: GAUSS_01 (n=10000) |
| 17 | +-------------------------------------------------------------------------------- |
| 18 | +Format MSE MaxAbsErr ULP_obs ULP_th Status |
| 19 | +-------------------------------------------------------------------------------- |
| 20 | +fp32 6.26e-18 1.47e-8 1.47e-8 7.45e-9 FAIL |
| 21 | +fp16 9.97e-8 3.12e-2 3.12e-2 6.10e-5 FAIL |
| 22 | +bf16 2.78e-8 9.63e-4 9.63e-4 4.88e-4 FAIL |
| 23 | +gf16 1.74e-9 2.44e-4 2.44e-4 1.22e-4 FAIL |
| 24 | +ternary 1.00e-2 4.61e-1 4.61e-1 1.00e0 FAIL |
| 25 | + |
| 26 | +Distribution: GAUSS_10 (n=10000) |
| 27 | +-------------------------------------------------------------------------------- |
| 28 | +Format MSE MaxAbsErr ULP_obs ULP_th Status |
| 29 | +-------------------------------------------------------------------------------- |
| 30 | +fp32 6.55e-16 2.31e-7 2.31e-7 5.96e-8 FAIL |
| 31 | +fp16 1.85e-7 7.81e-3 7.81e-3 4.88e-4 FAIL |
| 32 | +bf16 2.83e-6 1.32e-2 1.32e-2 3.91e-3 FAIL |
| 33 | +gf16 1.72e-7 2.46e-3 2.46e-3 9.77e-4 FAIL |
| 34 | +ternary 2.04e-1 3.61e0 3.61e0 1.00e0 FAIL |
| 35 | + |
| 36 | +Distribution: GAUSS_100 (n=10000) |
| 37 | +-------------------------------------------------------------------------------- |
| 38 | +Format MSE MaxAbsErr ULP_obs ULP_th Status |
| 39 | +-------------------------------------------------------------------------------- |
| 40 | +fp32 6.44e-14 1.74e-6 1.74e-6 9.54e-7 FAIL |
| 41 | +fp16 1.80e-5 2.67e-2 2.67e-2 7.81e-3 FAIL |
| 42 | +bf16 2.81e-4 1.23e-1 1.23e-1 6.25e-2 FAIL |
| 43 | +gf16 1.69e-5 2.67e-2 2.67e-2 1.56e-2 FAIL |
| 44 | +ternary 8.53e1 4.51e1 4.51e1 1.00e0 FAIL |
| 45 | + |
| 46 | +Distribution: UNIFORM_1 (n=10000) |
| 47 | +-------------------------------------------------------------------------------- |
| 48 | +Format MSE MaxAbsErr ULP_obs ULP_th Status |
| 49 | +-------------------------------------------------------------------------------- |
| 50 | +fp32 1.67e-16 2.98e-8 2.98e-8 2.98e-8 PASS |
| 51 | +fp16 4.45e-8 4.88e-4 4.88e-4 2.44e-4 FAIL |
| 52 | +bf16 7.26e-7 1.95e-3 1.95e-3 1.95e-3 PASS |
| 53 | +gf16 4.48e-8 4.88e-4 4.88e-4 4.88e-4 PASS |
| 54 | +ternary 8.47e-2 5.00e-1 5.00e-1 1.00e0 FAIL |
| 55 | + |
| 56 | +Distribution: UNIFORM_100 (n=10000) |
| 57 | +-------------------------------------------------------------------------------- |
| 58 | +Format MSE MaxAbsErr ULP_obs ULP_th Status |
| 59 | +-------------------------------------------------------------------------------- |
| 60 | +fp32 2.18e-12 3.81e-6 3.81e-6 3.81e-6 PASS |
| 61 | +fp16 5.88e-4 6.25e-2 6.25e-2 3.12e-2 FAIL |
| 62 | +bf16 9.37e-3 2.50e-1 2.50e-1 2.50e-1 PASS |
| 63 | +gf16 5.77e-4 6.24e-2 6.24e-2 6.25e-2 PASS |
| 64 | +ternary 3.19e3 9.90e1 9.90e1 1.00e0 FAIL |
| 65 | + |
| 66 | +=== Hypothesis Tests === |
| 67 | + |
| 68 | +H1: bf16 vs gf16 on Uniform [-100,+100] |
| 69 | + bf16 MSE = 9.369421e-3 |
| 70 | + gf16 MSE = 5.772505e-4 |
| 71 | + DIVERGED (ratio=0.9384) -> H1 CONFIRMED (fix resolved collision) |
| 72 | + |
| 73 | +H2: bf16 vs gf16 on Gaussian σ=0.1 |
| 74 | + bf16 MSE = 2.784927e-8 |
| 75 | + gf16 MSE = 1.740400e-9 |
| 76 | + DIVERGED (ratio=0.9375) -> H2 FAILED (was genuine bug) |
| 77 | + |
| 78 | +=== Full Result Log === |
| 79 | +RESULT=fp32 @ GAUSS_001 | MSE=6.34e-20 ULP_th=9.31e-10 ULP_obs=1.83e-9 status=FAIL |
| 80 | +RESULT=fp16 @ GAUSS_001 | MSE=7.68e-8 ULP_th=7.63e-6 ULP_obs=1.56e-2 status=FAIL |
| 81 | +RESULT=bf16 @ GAUSS_001 | MSE=2.69e-10 ULP_th=6.10e-5 ULP_obs=1.09e-4 status=FAIL |
| 82 | +RESULT=gf16 @ GAUSS_001 | MSE=1.74e-11 ULP_th=1.53e-5 ULP_obs=2.91e-5 status=FAIL |
| 83 | +RESULT=ternary @ GAUSS_001 | MSE=1.00e-4 ULP_th=1.00e0 ULP_obs=4.61e-2 status=FAIL |
| 84 | +RESULT=fp32 @ GAUSS_01 | MSE=6.26e-18 ULP_th=7.45e-9 ULP_obs=1.47e-8 status=FAIL |
| 85 | +RESULT=fp16 @ GAUSS_01 | MSE=9.97e-8 ULP_th=6.10e-5 ULP_obs=3.12e-2 status=FAIL |
| 86 | +RESULT=bf16 @ GAUSS_01 | MSE=2.78e-8 ULP_th=4.88e-4 ULP_obs=9.63e-4 status=FAIL |
| 87 | +RESULT=gf16 @ GAUSS_01 | MSE=1.74e-9 ULP_th=1.22e-4 ULP_obs=2.44e-4 status=FAIL |
| 88 | +RESULT=ternary @ GAUSS_01 | MSE=1.00e-2 ULP_th=1.00e0 ULP_obs=4.61e-1 status=FAIL |
| 89 | +RESULT=fp32 @ GAUSS_10 | MSE=6.55e-16 ULP_th=5.96e-8 ULP_obs=2.31e-7 status=FAIL |
| 90 | +RESULT=fp16 @ GAUSS_10 | MSE=1.85e-7 ULP_th=4.88e-4 ULP_obs=7.81e-3 status=FAIL |
| 91 | +RESULT=bf16 @ GAUSS_10 | MSE=2.83e-6 ULP_th=3.91e-3 ULP_obs=1.32e-2 status=FAIL |
| 92 | +RESULT=gf16 @ GAUSS_10 | MSE=1.72e-7 ULP_th=9.77e-4 ULP_obs=2.46e-3 status=FAIL |
| 93 | +RESULT=ternary @ GAUSS_10 | MSE=2.04e-1 ULP_th=1.00e0 ULP_obs=3.61e0 status=FAIL |
| 94 | +RESULT=fp32 @ GAUSS_100 | MSE=6.44e-14 ULP_th=9.54e-7 ULP_obs=1.74e-6 status=FAIL |
| 95 | +RESULT=fp16 @ GAUSS_100 | MSE=1.80e-5 ULP_th=7.81e-3 ULP_obs=2.67e-2 status=FAIL |
| 96 | +RESULT=bf16 @ GAUSS_100 | MSE=2.81e-4 ULP_th=6.25e-2 ULP_obs=1.23e-1 status=FAIL |
| 97 | +RESULT=gf16 @ GAUSS_100 | MSE=1.69e-5 ULP_th=1.56e-2 ULP_obs=2.67e-2 status=FAIL |
| 98 | +RESULT=ternary @ GAUSS_100 | MSE=8.53e1 ULP_th=1.00e0 ULP_obs=4.51e1 status=FAIL |
| 99 | +RESULT=fp32 @ UNIFORM_1 | MSE=1.67e-16 ULP_th=2.98e-8 ULP_obs=2.98e-8 status=PASS |
| 100 | +RESULT=fp16 @ UNIFORM_1 | MSE=4.45e-8 ULP_th=2.44e-4 ULP_obs=4.88e-4 status=FAIL |
| 101 | +RESULT=bf16 @ UNIFORM_1 | MSE=7.26e-7 ULP_th=1.95e-3 ULP_obs=1.95e-3 status=PASS |
| 102 | +RESULT=gf16 @ UNIFORM_1 | MSE=4.48e-8 ULP_th=4.88e-4 ULP_obs=4.88e-4 status=PASS |
| 103 | +RESULT=ternary @ UNIFORM_1 | MSE=8.47e-2 ULP_th=1.00e0 ULP_obs=5.00e-1 status=FAIL |
| 104 | +RESULT=fp32 @ UNIFORM_100 | MSE=2.18e-12 ULP_th=3.81e-6 ULP_obs=3.81e-6 status=PASS |
| 105 | +RESULT=fp16 @ UNIFORM_100 | MSE=5.88e-4 ULP_th=3.12e-2 ULP_obs=6.25e-2 status=FAIL |
| 106 | +RESULT=bf16 @ UNIFORM_100 | MSE=9.37e-3 ULP_th=2.50e-1 ULP_obs=2.50e-1 status=PASS |
| 107 | +RESULT=gf16 @ UNIFORM_100 | MSE=5.77e-4 ULP_th=6.25e-2 ULP_obs=6.24e-2 status=PASS |
| 108 | +RESULT=ternary @ UNIFORM_100 | MSE=3.19e3 ULP_th=1.00e0 ULP_obs=9.90e1 status=FAIL |
| 109 | + |
| 110 | +Results: .trinity/results/bench_010.log |
0 commit comments