Skip to content

Commit 07c2fcd

Browse files
committed
fix(helix): address CodeRabbit review — NaN guard + f32 clamp epsilon
Two valid review findings on PR #459: 1. placement.rs: HemispherePoint::lift produced NaN for n >= total (u > 1 -> sqrt(1-u) = NaN). lift is public API, so an external caller got a silent NaN. Clamp u to <= 1.0 so out-of-range n saturates to the rim (r=1, y=0), matching the documented no-NaN contract. Added the lift_out_of_range_n_saturates_to_rim test. 2. simd.rs: the batch Fisher-Z clamp epsilon 1e-9 is below the f32 ULP near 1.0 (~1.19e-7), so 1.0 - 1e-9 rounded to 1.0f32 and the clamp was a no-op -- s = +/-1 produced ln(0) = -inf. Bumped EPS to 1e-6 (the f64 Similarity::fisher_z keeps 1e-9). Added the batch_fisher_z_boundary_inputs_are_finite test (+/-1, out-of-range). The third comment (ndarray::simd::U8x64 "missing") is a false positive: the ndarray-hpc feature resolves ndarray to the AdaWorldAPI fork at ../../../ndarray (which provides ::simd), not crates.io ndarray 0.16.1 -- the feature builds and tests green. Added a Cargo.toml note documenting the sibling-fork requirement. 63 unit + 6 doctests pass on both feature configs; clippy -D warnings and rustfmt clean. https://claude.ai/code/session_013rjF2Dvo1DnBACpbpYSffE
1 parent 4b70c8a commit 07c2fcd

3 files changed

Lines changed: 46 additions & 5 deletions

File tree

crates/helix/Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,11 @@ ndarray = { path = "../../../ndarray", optional = true, default-features = false
2222
default = []
2323
# Activate the ndarray-accelerated hot path (simd_ln_f32 / U8x64 batch kernels).
2424
# The always-present scalar path is used when this is disabled.
25+
#
26+
# NOTE: this feature resolves `ndarray` to the AdaWorldAPI fork at the sibling
27+
# path `../../../ndarray` (NOT crates.io `ndarray`, which has no `::simd` module).
28+
# It therefore requires that sibling repo to be checked out — the same convention
29+
# `lance-graph` core uses. The default build is zero-dep and needs none of this.
2530
ndarray-hpc = ["dep:ndarray"]
2631

2732
[dev-dependencies]

crates/helix/src/placement.rs

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,8 @@ impl HemispherePoint {
6060
/// # Arguments
6161
///
6262
/// * `n` — zero-based residue index (0 ≤ n < total is the intended range;
63-
/// larger values are accepted and produce `r > 1`, which the Fisher-Z step
64-
/// will clip or reject).
63+
/// `n ≥ total` saturates to the rim — `u` is clamped to 1.0, giving
64+
/// `r = 1`, `y = 0` (a finite point, never NaN)).
6565
/// * `total` — the total number of residue slots N. Clamped to 1 if zero.
6666
///
6767
/// # Examples
@@ -75,7 +75,7 @@ impl HemispherePoint {
7575
/// ```
7676
pub fn lift(n: usize, total: usize) -> Self {
7777
let total = total.max(1);
78-
let u = (n as f64 + 0.5) / total as f64;
78+
let u = ((n as f64 + 0.5) / total as f64).min(1.0);
7979
let r = u.sqrt();
8080
let y = (1.0 - u).sqrt();
8181
let azimuth = n as f64 * GOLDEN_RATIO;
@@ -252,6 +252,27 @@ mod tests {
252252
assert!((sum - 1.0).abs() < 1e-12, "r²+y² should be 1, got {sum}");
253253
}
254254

255+
// ── out-of-range n saturates to the rim (no NaN) ─────────────────────────
256+
257+
#[test]
258+
fn lift_out_of_range_n_saturates_to_rim() {
259+
// n ≥ total clamps u to 1.0 → r = 1, y = 0 (a finite rim point, not NaN).
260+
let p = HemispherePoint::lift(10, 4);
261+
assert!(
262+
p.r.is_finite() && p.y.is_finite(),
263+
"rim point must be finite"
264+
);
265+
assert!(
266+
(p.r - 1.0).abs() < 1e-12,
267+
"r should saturate to 1, got {}",
268+
p.r
269+
);
270+
assert!(p.y.abs() < 1e-12, "y should saturate to 0, got {}", p.y);
271+
// n == total is the boundary and must also be safe
272+
let q = HemispherePoint::lift(4, 4);
273+
assert!(q.r.is_finite() && q.y.is_finite());
274+
}
275+
255276
// ── cartesian: x² + z² = r² and overall unit sphere ────────────────────
256277

257278
#[test]

crates/helix/src/simd.rs

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,13 @@
1212
//! Both functions are correctness-equivalent across the two builds — the feature
1313
//! is a pure accelerator, never a behaviour change.
1414
15-
const EPS: f32 = 1e-9;
15+
// f32 clamp guard. Must exceed the f32 ULP near 1.0 (≈1.19e-7) so that
16+
// `1.0 - EPS` is strictly < 1.0 in f32 — otherwise `ln(1 - s) = ln(0) = -inf`
17+
// leaks through for `s = ±1`. (The f64 `Similarity::fisher_z` can use 1e-9.)
18+
const EPS: f32 = 1e-6;
1619

1720
/// Batch Fisher-Z `z = ½·(ln(1+s) − ln(1−s))` over `similarities` into `out`
18-
/// (each input clamped to `±(1 − 1e-9)`). `out.len()` must equal
21+
/// (each input clamped to `±(1 − 1e-6)`). `out.len()` must equal
1922
/// `similarities.len()`. (ndarray `simd_ln_f32` path.)
2023
#[cfg(feature = "ndarray-hpc")]
2124
pub fn batch_fisher_z(similarities: &[f32], out: &mut [f32]) {
@@ -117,6 +120,18 @@ mod tests {
117120
}
118121
}
119122

123+
#[test]
124+
fn batch_fisher_z_boundary_inputs_are_finite() {
125+
// ±1 and out-of-range inputs must clamp to a finite result — the f32 EPS
126+
// must exceed the f32 ULP near 1.0, else ln(0) = -inf leaks through.
127+
let s = [1.0f32, -1.0, 2.0, -2.0, 0.999_999, -0.999_999];
128+
let mut out = vec![0f32; s.len()];
129+
batch_fisher_z(&s, &mut out);
130+
for (i, o) in out.iter().enumerate() {
131+
assert!(o.is_finite(), "out[{i}] = {o} must be finite");
132+
}
133+
}
134+
120135
#[test]
121136
fn batch_l1_u8_is_abs_diff() {
122137
let a: Vec<u8> = (0..130u16).map(|x| x as u8).collect();

0 commit comments

Comments
 (0)