fix(simd): preserve NaN in simd_exp_f32 (codex review on PR #142)

claude · claude · commit 4d28884daf26 · 2026-05-13T14:44:04.000Z
The pre-clamp via simd_clamp silently destroyed NaN inputs. simd_clamp is implemented as max(lo).min(hi); _mm512_max_ps returns the SECOND operand when the first is NaN (per Intel SDM § MAXPS), so NaN got clamped to lo (-87.336) and exp(-87.336) ≈ 1.4e-38 — a tiny finite value pretending to be valid. Fix: capture NaN lanes via x.simd_ne(x) (NaN ≠ itself per IEEE 754) BEFORE the clamp, then mask-select NaN back into those lanes after the polynomial. NaN propagates per-lane; finite lanes are unchanged. Two regression tests: simd_exp_f32_propagates_nan — full-NaN vector returns full-NaN simd_exp_f32_propagates_nan_per_lane — mixed NaN/0.0 input; NaN lanes propagate, finite lanes compute exp(0)=1 unaffected 1788 passed (+2 from 1786). Reported-by: codex review on PR #142.
diff --git a/src/simd.rs b/src/simd.rs
@@ -1598,15 +1598,27 @@ pub fn f32_to_bf16_batch(input: &[f32], output: &mut [u16]) {
 /// integer exponent `n` stays within the IEEE 754 f32 representable range.
 /// Beyond the upper bound we'd hit `i32` overflow in `pow2n_from_int` and
 /// silently return ~0.5 instead of +Inf (release) or panic (debug).
-/// NaN passes through the polynomial as NaN (NaN comparisons in `simd_clamp`
-/// take neither branch on standard implementations).
+///
+/// NaN handling: `simd_clamp` is `max(lo).min(hi)`, and `_mm512_max_ps` /
+/// `_mm512_min_ps` return the SECOND operand when the first is NaN (per
+/// Intel SDM § MAXPS/MINPS). That would silently clamp NaN inputs to `lo`
+/// (-87.336) producing `exp(-87.336) ≈ 1.4e-38` — a finite tiny value
+/// masquerading as valid output. Caught by codex review on PR #142.
+///
+/// Fix: capture NaN lanes via `x.simd_ne(x)` (NaN ≠ itself per IEEE 754)
+/// before the clamp, then mask-select NaN back into those lanes after
+/// the polynomial. NaN lanes propagate as NaN; finite lanes are unchanged.
 #[inline(always)]
 #[allow(dead_code)]
 pub fn simd_exp_f32(x: F32x16) -> F32x16 {
     let ln2 = F32x16::splat(core::f32::consts::LN_2);
     let inv_ln2 = F32x16::splat(1.0 / core::f32::consts::LN_2);
     let one = F32x16::splat(1.0);
 
+    // NaN-preservation mask: bit set wherever x is NaN. IEEE 754: NaN ≠ NaN.
+    // Captured BEFORE the clamp because simd_clamp destroys NaN lanes.
+    let nan_mask = x.simd_ne(x);
+
     // Pre-clamp to the safe domain. Outside this band exp() is non-representable
     // anyway (overflow → +Inf at ~88.7, underflow → +0 at ~-87.3) so the clamp
     // is observable only at the saturation boundary.
@@ -1625,7 +1637,10 @@ pub fn simd_exp_f32(x: F32x16) -> F32x16 {
     let poly = one + r * (one + r * (c2 + r * (c3 + r * (c4 + r * c5))));
 
     // Reconstruct: exp(x) = 2^n * poly
-    poly * pow2n_from_int(n)
+    let result = poly * pow2n_from_int(n);
+
+    // Restore NaN in lanes where the input was NaN (clamp had destroyed them).
+    nan_mask.select(F32x16::splat(f32::NAN), result)
 }
 
 /// Compute 2^n where n is an integer stored as f32.
@@ -1842,6 +1857,40 @@ mod tests {
         }
     }
 
+    #[test]
+    fn simd_exp_f32_propagates_nan() {
+        // simd_clamp is max(lo).min(hi); _mm512_max_ps returns the SECOND
+        // operand on NaN, so without the nan_mask save/restore, NaN would
+        // be silently clamped to -87.336 → exp ≈ 1.4e-38 (a tiny finite
+        // value pretending to be valid). With the mask, NaN propagates.
+        // Per codex review on PR #142.
+        let nan = F32x16::splat(f32::NAN);
+        let result = simd_exp_f32(nan);
+        let arr = result.to_array();
+        for &v in &arr {
+            assert!(v.is_nan(), "exp(NaN) must propagate NaN, got {}", v);
+        }
+    }
+
+    #[test]
+    fn simd_exp_f32_propagates_nan_per_lane() {
+        // Mixed input: lanes 0,4,8,12 are NaN; rest are 0.0. Verify that
+        // NaN propagates only in those lanes; the others compute exp(0)=1.
+        let mut data = [0.0f32; 16];
+        for i in (0..16).step_by(4) {
+            data[i] = f32::NAN;
+        }
+        let result = simd_exp_f32(F32x16::from_array(data));
+        let arr = result.to_array();
+        for (i, &v) in arr.iter().enumerate() {
+            if i % 4 == 0 {
+                assert!(v.is_nan(), "lane {} should be NaN, got {}", i, v);
+            } else {
+                assert!((v - 1.0).abs() < 1e-4, "lane {} should be exp(0)=1, got {}", i, v);
+            }
+        }
+    }
+
     #[test]
     fn simd_exp_f32_handles_large_positive() {
         // Without the clamp, x = 200 produced n = 288, ni + 127 = 415 which