fix tolerance and benchmarks

connortsui20 · connortsui20 · commit 94cdaf2accae · 2026-04-08T15:15:25.000-04:00
Signed-off-by: Connor Tsui &lt;connor.tsui20@gmail.com&gt;
diff --git a/vortex-tensor/src/encodings/turboquant/scheme/compress.rs b/vortex-tensor/src/encodings/turboquant/scheme/compress.rs
@@ -18,6 +18,7 @@ use vortex_array::arrays::FixedSizeListArray;
 use vortex_array::arrays::PrimitiveArray;
 use vortex_array::arrays::extension::ExtensionArrayExt;
 use vortex_array::arrays::fixed_size_list::FixedSizeListArrayExt;
+use vortex_array::dtype::DType;
 use vortex_array::dtype::Nullability;
 use vortex_array::dtype::PType;
 use vortex_array::match_each_float_ptype;
@@ -39,9 +40,20 @@ use crate::scalar_fns::ApproxOptions;
 use crate::scalar_fns::l2_norm::L2Norm;
 use crate::vector::AnyVector;
 
-/// Tolerance for the unit-norm check in [`turboquant_encode`]. Each row's L2 norm must be within
-/// this distance of 1.0 (or be exactly 0.0 for zero vectors).
-const UNIT_NORM_TOLERANCE: f64 = 1e-10;
+/// Returns the acceptable unit-norm drift for the given element precision.
+///
+/// The checked encode path validates the post-normalization storage values, so the tolerance has
+/// to account for quantization back into the vector element type.
+///
+/// These numbers are somewhat arbitrary and are derived from testing reasonable values.
+fn unit_norm_tolerance(element_ptype: PType) -> f64 {
+    match element_ptype {
+        PType::F16 => 2e-3,
+        PType::F32 => 1e-6,
+        PType::F64 => 1e-10,
+        _ => unreachable!("TurboQuant requires float elements, got {element_ptype:?}"),
+    }
+}
 
 /// Configuration for TurboQuant encoding.
 #[derive(Clone, Debug)]
@@ -165,7 +177,7 @@ fn turboquant_quantize_core(
 fn build_turboquant(
     num_rows: usize,
     core: QuantizationResult,
-    ext_dtype: &vortex_array::dtype::DType,
+    ext_dtype: &DType,
 ) -> VortexResult<TurboQuantArray> {
     let padded_dim = core.padded_dim;
     let padded_dim_u32 =
@@ -199,9 +211,9 @@ fn build_turboquant(
 /// **Null vectors are not supported.** The caller must normalize and strip nullability before
 /// calling this function, for example via [`normalize_as_l2_denorm`].
 ///
-/// This function validates that every row has L2 norm within `UNIT_NORM_TOLERANCE` of 1.0 (or is
-/// exactly 0.0). Use [`turboquant_encode_unchecked`] to skip this check when the caller has just
-/// performed normalization.
+/// This function validates that every row has L2 norm within a storage-precision-aware tolerance
+/// of 1.0 (or is exactly 0.0). Use [`turboquant_encode_unchecked`] to skip this check when the
+/// caller has just performed normalization.
 ///
 /// The returned array is a plain [`TurboQuantArray`] that decompresses to unit-norm vectors.
 /// The caller is responsible for wrapping it in an [`L2Denorm`] ScalarFnArray if the original
@@ -232,12 +244,13 @@ pub fn turboquant_encode(
             .as_extension()
             .metadata::<AnyVector>()
             .element_ptype();
+        let tolerance = unit_norm_tolerance(element_ptype);
 
         match_each_float_ptype!(element_ptype, |T| {
             for (i, &norm) in norms.as_slice::<T>().iter().enumerate() {
                 let norm_f64: f64 = ToPrimitive::to_f64(&norm).unwrap_or(f64::NAN);
                 vortex_ensure!(
-                    norm_f64 == 0.0 || (norm_f64 - 1.0).abs() < UNIT_NORM_TOLERANCE,
+                    norm_f64 == 0.0 || (norm_f64 - 1.0).abs() < tolerance,
                     "TurboQuant requires unit-norm input, but row {i} has L2 norm {norm_f64:.6} \
                      (expected 1.0 or 0.0)",
                 );
diff --git a/vortex-tensor/src/encodings/turboquant/tests.rs b/vortex-tensor/src/encodings/turboquant/tests.rs
@@ -220,6 +220,17 @@ fn empty_turboquant_parts(
     ))
 }
 
+fn normalized_child(
+    ext: &ExtensionArray,
+    ctx: &mut vortex_array::ExecutionCtx,
+) -> VortexResult<ArrayRef> {
+    Ok(
+        normalize_as_l2_denorm(&ApproxOptions::Exact, ext.as_ref().clone(), ctx)?
+            .child_at(0)
+            .clone(),
+    )
+}
+
 // -----------------------------------------------------------------------
 // Roundtrip tests
 // -----------------------------------------------------------------------
@@ -399,6 +410,44 @@ fn rejects_dimension_below_128(#[case] dim: usize) {
     assert!(turboquant_encode(ext.as_view(), &config, &mut ctx).is_err());
 }
 
+#[test]
+fn checked_encode_accepts_normalized_f16_input() -> VortexResult<()> {
+    let num_rows = 10;
+    let dim = 128;
+    let mut rng = StdRng::seed_from_u64(99);
+    let normal = Normal::new(0.0f32, 1.0).unwrap();
+
+    let mut buf = BufferMut::<half::f16>::with_capacity(num_rows * dim);
+    for _ in 0..(num_rows * dim) {
+        buf.push(half::f16::from_f32(normal.sample(&mut rng)));
+    }
+    let elements = PrimitiveArray::new::<half::f16>(buf.freeze(), Validity::NonNullable);
+    let fsl = FixedSizeListArray::try_new(
+        elements.into_array(),
+        dim.try_into()
+            .expect("somehow got dimension greater than u32::MAX"),
+        Validity::NonNullable,
+        num_rows,
+    )?;
+
+    let ext = make_vector_ext(&fsl);
+    let config = TurboQuantConfig {
+        bit_width: 3,
+        seed: Some(42),
+        num_rounds: 3,
+    };
+
+    let mut ctx = SESSION.create_execution_ctx();
+    let normalized = normalized_child(&ext, &mut ctx)?;
+    let normalized_ext = normalized
+        .as_opt::<Extension>()
+        .vortex_expect("normalized child should be an Extension array");
+
+    let encoded = turboquant_encode(normalized_ext, &config, &mut ctx)?;
+    assert_eq!(encoded.len(), num_rows);
+    Ok(())
+}
+
 fn make_fsl_small(dim: usize) -> FixedSizeListArray {
     let mut buf = BufferMut::<f32>::with_capacity(dim);
     for i in 0..dim {
@@ -1092,8 +1141,9 @@ fn nullable_slice_preserves_validity() -> VortexResult<()> {
 // -----------------------------------------------------------------------
 
 /// Verify that a TurboQuant array (extracted from the L2Denorm wrapper) survives
-/// serialize/deserialize. ScalarFnArray cannot be serialized yet, so we test the TQ child
-/// directly.
+/// serialize/deserialize.
+///
+/// TODO(connor): ScalarFnArray cannot be serialized yet, so we test the TQ child directly.
 #[test]
 fn serde_roundtrip() -> VortexResult<()> {
     use vortex_array::ArrayContext;
diff --git a/vortex-tensor/src/encodings/turboquant/vtable.rs b/vortex-tensor/src/encodings/turboquant/vtable.rs
@@ -26,6 +26,7 @@ use vortex_array::serde::ArrayChildren;
 use vortex_array::validity::Validity;
 use vortex_array::vtable::VTable;
 use vortex_array::vtable::ValidityVTable;
+#[cfg(debug_assertions)]
 use vortex_error::VortexExpect;
 use vortex_error::VortexResult;
 use vortex_error::vortex_ensure;
diff --git a/vortex/benches/single_encoding_throughput.rs b/vortex/benches/single_encoding_throughput.rs
@@ -439,16 +439,20 @@ mod turboquant_benches {
     use rand::SeedableRng;
     use rand::rngs::StdRng;
     use vortex::array::IntoArray;
+    use vortex::array::arrays::Extension;
     use vortex::array::arrays::ExtensionArray;
     use vortex::array::arrays::FixedSizeListArray;
     use vortex::array::arrays::PrimitiveArray;
+    use vortex::array::arrays::scalar_fn::ScalarFnArrayExt;
     use vortex::array::dtype::extension::ExtDType;
     use vortex::array::extension::EmptyMetadata;
     use vortex::array::validity::Validity;
     use vortex_array::VortexSessionExecute;
     use vortex_buffer::BufferMut;
     use vortex_tensor::encodings::turboquant::TurboQuantConfig;
-    use vortex_tensor::encodings::turboquant::turboquant_encode;
+    use vortex_tensor::encodings::turboquant::turboquant_encode_unchecked;
+    use vortex_tensor::scalar_fns::ApproxOptions;
+    use vortex_tensor::scalar_fns::l2_denorm::normalize_as_l2_denorm;
     use vortex_tensor::vector::Vector;
 
     use super::SESSION;
@@ -492,18 +496,35 @@ mod turboquant_benches {
         }
     }
 
+    fn setup_normalized_vector_ext(dim: usize) -> ExtensionArray {
+        let ext = setup_vector_ext(dim);
+        let mut ctx = SESSION.create_execution_ctx();
+        let normalized = normalize_as_l2_denorm(&ApproxOptions::Exact, ext.into_array(), &mut ctx)
+            .unwrap()
+            .child_at(0)
+            .clone();
+        normalized.execute::<ExtensionArray>(&mut ctx).unwrap()
+    }
+
     macro_rules! turboquant_bench {
         (compress, $dim:literal, $bits:literal, $name:ident) => {
             paste! {
                 #[divan::bench(name = concat!("turboquant_compress_dim", stringify!($dim), "_", stringify!($bits), "bit"))]
                 fn $name(bencher: Bencher) {
-                    let ext = setup_vector_ext($dim);
+                    let normalized_ext = setup_normalized_vector_ext($dim);
                     let config = turboquant_config($bits);
                     with_byte_counter(bencher, (NUM_VECTORS * $dim * 4) as u64)
-                        .with_inputs(|| ext.clone())
+                        .with_inputs(|| normalized_ext.clone())
                         .bench_refs(|a| {
                             let mut ctx = SESSION.create_execution_ctx();
-                            turboquant_encode(a.as_view(), &config, &mut ctx).unwrap()
+                            let normalized = a
+                                .as_ref()
+                                .as_opt::<Extension>()
+                                .expect("normalized benchmark input should be an Extension array");
+                            // SAFETY: Benchmark inputs are normalized once up front so the timed
+                            // region measures only TurboQuant encoding.
+                            unsafe { turboquant_encode_unchecked(normalized, &config, &mut ctx) }
+                                .unwrap()
                         });
                 }
             }
@@ -512,10 +533,13 @@ mod turboquant_benches {
             paste! {
                 #[divan::bench(name = concat!("turboquant_decompress_dim", stringify!($dim), "_", stringify!($bits), "bit"))]
                 fn $name(bencher: Bencher) {
-                    let ext = setup_vector_ext($dim);
+                    let normalized_ext = setup_normalized_vector_ext($dim);
                     let config = turboquant_config($bits);
                     let mut ctx = SESSION.create_execution_ctx();
-                    let compressed = turboquant_encode(ext.as_view(), &config, &mut ctx).unwrap();
+                    let compressed = unsafe {
+                        turboquant_encode_unchecked(normalized_ext.as_view(), &config, &mut ctx)
+                    }
+                    .unwrap();
                     with_byte_counter(bencher, (NUM_VECTORS * $dim * 4) as u64)
                         .with_inputs(|| &compressed)
                         .bench_refs(|a| {