fix tq norm validation and other logic

connortsui20 · connortsui20 · commit eb802bf82a23 · 2026-04-08T14:47:10.000-04:00
Signed-off-by: Connor Tsui &lt;connor.tsui20@gmail.com&gt;
diff --git a/vortex-tensor/public-api.lock b/vortex-tensor/public-api.lock
@@ -16,6 +16,8 @@ pub const vortex_tensor::encodings::turboquant::TurboQuant::MAX_CENTROIDS: usize
 
 pub const vortex_tensor::encodings::turboquant::TurboQuant::MIN_DIMENSION: u32
 
+pub unsafe fn vortex_tensor::encodings::turboquant::TurboQuant::new_array_unchecked(dtype: vortex_array::dtype::DType, codes: vortex_array::array::erased::ArrayRef, centroids: vortex_array::array::erased::ArrayRef, rotation_signs: vortex_array::array::erased::ArrayRef) -> vortex_tensor::encodings::turboquant::TurboQuantArray
+
 pub fn vortex_tensor::encodings::turboquant::TurboQuant::try_new_array(dtype: vortex_array::dtype::DType, codes: vortex_array::array::erased::ArrayRef, centroids: vortex_array::array::erased::ArrayRef, rotation_signs: vortex_array::array::erased::ArrayRef) -> vortex_error::VortexResult<vortex_tensor::encodings::turboquant::TurboQuantArray>
 
 pub fn vortex_tensor::encodings::turboquant::TurboQuant::validate_dtype(dtype: &vortex_array::dtype::DType) -> vortex_error::VortexResult<vortex_tensor::vector::VectorMatcherMetadata>
@@ -176,6 +178,8 @@ pub fn T::rotation_signs(&self) -> &vortex_array::array::erased::ArrayRef
 
 pub fn vortex_tensor::encodings::turboquant::turboquant_encode(ext: vortex_array::array::view::ArrayView<'_, vortex_array::arrays::extension::vtable::Extension>, config: &vortex_tensor::encodings::turboquant::TurboQuantConfig, ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::array::erased::ArrayRef>
 
+pub unsafe fn vortex_tensor::encodings::turboquant::turboquant_encode_unchecked(ext: vortex_array::array::view::ArrayView<'_, vortex_array::arrays::extension::vtable::Extension>, config: &vortex_tensor::encodings::turboquant::TurboQuantConfig, ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::array::erased::ArrayRef>
+
 pub type vortex_tensor::encodings::turboquant::TurboQuantArray = vortex_array::array::typed::Array<vortex_tensor::encodings::turboquant::TurboQuant>
 
 pub mod vortex_tensor::fixed_shape
@@ -454,6 +458,8 @@ pub fn vortex_tensor::scalar_fns::l2_denorm::L2Denorm::return_dtype(&self, _opti
 
 pub fn vortex_tensor::scalar_fns::l2_denorm::L2Denorm::validity(&self, _options: &Self::Options, expression: &vortex_array::expr::expression::Expression) -> vortex_error::VortexResult<core::option::Option<vortex_array::expr::expression::Expression>>
 
+pub fn vortex_tensor::scalar_fns::l2_denorm::normalize_as_l2_denorm(options: &vortex_tensor::scalar_fns::ApproxOptions, input: vortex_array::array::erased::ArrayRef, ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::arrays::scalar_fn::vtable::ScalarFnArray>
+
 pub mod vortex_tensor::scalar_fns::l2_norm
 
 pub struct vortex_tensor::scalar_fns::l2_norm::L2Norm
diff --git a/vortex-tensor/src/encodings/turboquant/mod.rs b/vortex-tensor/src/encodings/turboquant/mod.rs
@@ -86,13 +86,17 @@
 //! use vortex_array::arrays::ExtensionArray;
 //! use vortex_array::arrays::FixedSizeListArray;
 //! use vortex_array::arrays::PrimitiveArray;
+//! use vortex_array::arrays::Extension;
+//! use vortex_array::arrays::scalar_fn::ScalarFnArrayExt;
 //! use vortex_array::dtype::extension::ExtDType;
 //! use vortex_array::extension::EmptyMetadata;
 //! use vortex_array::validity::Validity;
 //! use vortex_buffer::BufferMut;
 //! use vortex_array::session::ArraySession;
 //! use vortex_session::VortexSession;
-//! use vortex_tensor::encodings::turboquant::{TurboQuantConfig, turboquant_encode};
+//! use vortex_tensor::encodings::turboquant::{TurboQuantConfig, turboquant_encode_unchecked};
+//! use vortex_tensor::scalar_fns::ApproxOptions;
+//! use vortex_tensor::scalar_fns::l2_denorm::normalize_as_l2_denorm;
 //! use vortex_tensor::vector::Vector;
 //!
 //! // Create a Vector extension array of 100 random 128-d vectors.
@@ -110,14 +114,23 @@
 //!     .unwrap().erased();
 //! let ext = ExtensionArray::new(ext_dtype, fsl.into_array());
 //!
-//! // Quantize at 2 bits per coordinate.
-//! let config = TurboQuantConfig { bit_width: 2, seed: Some(42), num_rounds: 3 };
+//! // Normalize, then quantize the normalized child at 2 bits per coordinate.
 //! let session = VortexSession::empty().with::<ArraySession>();
 //! let mut ctx = session.create_execution_ctx();
-//! let encoded = turboquant_encode(ext.as_view(), &config, &mut ctx).unwrap();
+//! let l2_denorm = normalize_as_l2_denorm(
+//!     &ApproxOptions::Exact, ext.into_array(), &mut ctx,
+//! ).unwrap();
+//! let normalized = l2_denorm.child_at(0).clone();
+//!
+//! let normalized_ext = normalized.as_opt::<Extension>().unwrap();
+//! let config = TurboQuantConfig { bit_width: 2, seed: Some(42), num_rounds: 3 };
+//! // SAFETY: We just normalized the input.
+//! let tq = unsafe {
+//!     turboquant_encode_unchecked(normalized_ext, &config, &mut ctx).unwrap()
+//! };
 //!
 //! // Verify compression: 100 vectors x 128 dims x 4 bytes = 51200 bytes input.
-//! assert!(encoded.nbytes() < 51200);
+//! assert!(tq.nbytes() < 51200);
 //! ```
 
 mod array;
@@ -137,6 +150,7 @@ mod scheme;
 pub use scheme::TurboQuantScheme;
 pub use scheme::compress::TurboQuantConfig;
 pub use scheme::compress::turboquant_encode;
+pub use scheme::compress::turboquant_encode_unchecked;
 
 #[cfg(test)]
 mod tests;
diff --git a/vortex-tensor/src/encodings/turboquant/scheme/compress.rs b/vortex-tensor/src/encodings/turboquant/scheme/compress.rs
@@ -8,6 +8,7 @@
 //! externally by [`normalize_as_l2_denorm`](crate::scalar_fns::l2_denorm::normalize_as_l2_denorm),
 //! which the [`TurboQuantScheme`](super::TurboQuantScheme) calls before invoking this function.
 
+use num_traits::ToPrimitive;
 use vortex_array::ArrayRef;
 use vortex_array::ArrayView;
 use vortex_array::ExecutionCtx;
@@ -19,6 +20,7 @@ use vortex_array::arrays::extension::ExtensionArrayExt;
 use vortex_array::arrays::fixed_size_list::FixedSizeListArrayExt;
 use vortex_array::dtype::Nullability;
 use vortex_array::dtype::PType;
+use vortex_array::match_each_float_ptype;
 use vortex_array::validity::Validity;
 use vortex_buffer::BufferMut;
 use vortex_error::VortexExpect;
@@ -33,6 +35,13 @@ use crate::encodings::turboquant::array::centroids::find_nearest_centroid;
 use crate::encodings::turboquant::array::centroids::get_centroids;
 use crate::encodings::turboquant::array::rotation::RotationMatrix;
 use crate::encodings::turboquant::vtable::TurboQuantArray;
+use crate::scalar_fns::ApproxOptions;
+use crate::scalar_fns::l2_norm::L2Norm;
+use crate::vector::AnyVector;
+
+/// Tolerance for the unit-norm check in [`turboquant_encode`]. Each row's L2 norm must be within
+/// this distance of 1.0 (or be exactly 0.0 for zero vectors).
+const UNIT_NORM_TOLERANCE: f64 = 1e-10;
 
 /// Configuration for TurboQuant encoding.
 #[derive(Clone, Debug)]
@@ -99,8 +108,9 @@ struct QuantizationResult {
 
 /// Core quantization: rotate and quantize already-normalized rows.
 ///
-/// The input `fsl` must contain unit-norm vectors (already L2-normalized). The rotation and
-/// centroid lookup happen in f32.
+/// The input `fsl` must contain non-nullable, unit-norm vectors (already L2-normalized). Null
+/// vectors are not supported and must be zeroed out before reaching this function. The rotation
+/// and centroid lookup happen in f32.
 fn turboquant_quantize_core(
     fsl: &FixedSizeListArray,
     seed: u64,
@@ -186,7 +196,12 @@ fn build_turboquant(
 /// [`TurboQuantArray`].
 ///
 /// The input must be a non-nullable Vector extension array whose rows are already unit-norm.
-/// Normalization is handled externally (e.g. by [`normalize_as_l2_denorm`]).
+/// **Null vectors are not supported.** The caller must normalize and strip nullability before
+/// calling this function, for example via [`normalize_as_l2_denorm`].
+///
+/// This function validates that every row has L2 norm within `UNIT_NORM_TOLERANCE` of 1.0 (or is
+/// exactly 0.0). Use [`turboquant_encode_unchecked`] to skip this check when the caller has just
+/// performed normalization.
 ///
 /// The returned array is a plain [`TurboQuantArray`] that decompresses to unit-norm vectors.
 /// The caller is responsible for wrapping it in an [`L2Denorm`] ScalarFnArray if the original
@@ -200,13 +215,61 @@ pub fn turboquant_encode(
     ctx: &mut ExecutionCtx,
 ) -> VortexResult<ArrayRef> {
     let ext_dtype = ext.dtype().clone();
-    let storage = ext.storage_array();
-    let fsl = storage.clone().execute::<FixedSizeListArray>(ctx)?;
 
     vortex_ensure!(
         !ext_dtype.is_nullable(),
         "TurboQuant input must be non-nullable (normalize first via L2Denorm), got {ext_dtype}",
     );
+
+    // Validate that all rows are unit-norm (or zero).
+    let num_rows = ext.as_ref().len();
+    if num_rows > 0 {
+        let norms_sfn =
+            L2Norm::try_new_array(&ApproxOptions::Exact, ext.as_ref().clone(), num_rows)?;
+        let norms: PrimitiveArray = norms_sfn.into_array().execute(ctx)?;
+
+        let element_ptype = ext_dtype
+            .as_extension()
+            .metadata::<AnyVector>()
+            .element_ptype();
+
+        match_each_float_ptype!(element_ptype, |T| {
+            for (i, &norm) in norms.as_slice::<T>().iter().enumerate() {
+                let norm_f64: f64 = ToPrimitive::to_f64(&norm).unwrap_or(f64::NAN);
+                vortex_ensure!(
+                    norm_f64 == 0.0 || (norm_f64 - 1.0).abs() < UNIT_NORM_TOLERANCE,
+                    "TurboQuant requires unit-norm input, but row {i} has L2 norm {norm_f64:.6} \
+                     (expected 1.0 or 0.0)",
+                );
+            }
+        });
+    }
+
+    // SAFETY: We just validated that the input is non-nullable and all rows are unit-norm.
+    unsafe { turboquant_encode_unchecked(ext, config, ctx) }
+}
+
+/// Encode a non-nullable, L2-normalized [`Vector`](crate::vector::Vector) extension array into a
+/// [`TurboQuantArray`], without validating the unit-norm precondition.
+///
+/// # Safety
+///
+/// The caller must ensure:
+///
+/// - The input dtype is non-nullable.
+/// - Every row is L2-normalized (unit norm) or is a zero vector.
+///
+/// Passing non-unit-norm vectors will not cause memory unsafety, but will produce silently
+/// incorrect quantization results.
+pub unsafe fn turboquant_encode_unchecked(
+    ext: ArrayView<Extension>,
+    config: &TurboQuantConfig,
+    ctx: &mut ExecutionCtx,
+) -> VortexResult<ArrayRef> {
+    let ext_dtype = ext.dtype().clone();
+    let storage = ext.storage_array();
+    let fsl = storage.clone().execute::<FixedSizeListArray>(ctx)?;
+
     vortex_ensure!(
         config.bit_width >= 1 && config.bit_width <= TurboQuant::MAX_BIT_WIDTH,
         "bit_width must be 1-{}, got {}",
diff --git a/vortex-tensor/src/encodings/turboquant/scheme/mod.rs b/vortex-tensor/src/encodings/turboquant/scheme/mod.rs
@@ -27,7 +27,7 @@ use vortex_error::VortexResult;
 
 use crate::encodings::turboquant::TurboQuant;
 use crate::encodings::turboquant::TurboQuantConfig;
-use crate::encodings::turboquant::turboquant_encode;
+use crate::encodings::turboquant::turboquant_encode_unchecked;
 use crate::scalar_fns::ApproxOptions;
 use crate::scalar_fns::l2_denorm::L2Denorm;
 use crate::scalar_fns::l2_denorm::normalize_as_l2_denorm;
@@ -112,7 +112,9 @@ impl Scheme for TurboQuantScheme {
             .as_opt::<Extension>()
             .vortex_expect("normalized child should be an Extension array");
         let config = TurboQuantConfig::default();
-        let tq = turboquant_encode(normalized_ext, &config, &mut ctx)?;
+        // SAFETY: We just normalized the input via `normalize_as_l2_denorm`, so all rows are
+        // guaranteed to be unit-norm (or zero for originally-null rows).
+        let tq = unsafe { turboquant_encode_unchecked(normalized_ext, &config, &mut ctx)? };
 
         // Reassemble L2Denorm(TurboQuant, norms).
         Ok(L2Denorm::try_new_array(&ApproxOptions::Exact, tq, norms, num_rows)?.into_array())
diff --git a/vortex-tensor/src/encodings/turboquant/tests.rs b/vortex-tensor/src/encodings/turboquant/tests.rs
@@ -11,6 +11,7 @@ use rstest::rstest;
 use vortex_array::ArrayRef;
 use vortex_array::IntoArray;
 use vortex_array::VortexSessionExecute;
+use vortex_array::arrays::Extension;
 use vortex_array::arrays::ExtensionArray;
 use vortex_array::arrays::FixedSizeListArray;
 use vortex_array::arrays::PrimitiveArray;
@@ -24,6 +25,7 @@ use vortex_array::extension::EmptyMetadata;
 use vortex_array::session::ArraySession;
 use vortex_array::validity::Validity;
 use vortex_buffer::BufferMut;
+use vortex_error::VortexExpect;
 use vortex_error::VortexResult;
 use vortex_session::VortexSession;
 
@@ -32,6 +34,7 @@ use crate::encodings::turboquant::TurboQuantArrayExt;
 use crate::encodings::turboquant::TurboQuantConfig;
 use crate::encodings::turboquant::array::rotation::RotationMatrix;
 use crate::encodings::turboquant::turboquant_encode;
+use crate::encodings::turboquant::turboquant_encode_unchecked;
 use crate::scalar_fns::ApproxOptions;
 use crate::scalar_fns::l2_denorm::L2Denorm;
 use crate::scalar_fns::l2_denorm::normalize_as_l2_denorm;
@@ -100,7 +103,7 @@ fn make_vector_ext(fsl: &FixedSizeListArray) -> ExtensionArray {
 /// Full encode pipeline: normalize, then TQ-encode, then wrap in L2Denorm.
 ///
 /// This mirrors what `TurboQuantScheme::compress()` does: normalize via `normalize_as_l2_denorm`,
-/// then quantize the normalized child via `turboquant_encode`, then reassemble.
+/// then quantize the normalized child via `turboquant_encode_unchecked`, then reassemble.
 fn normalize_and_encode(
     ext: &ExtensionArray,
     config: &TurboQuantConfig,
@@ -112,9 +115,10 @@ fn normalize_and_encode(
     let num_rows = l2_denorm.len();
 
     let normalized_ext = normalized
-        .as_opt::<vortex_array::arrays::Extension>()
-        .expect("normalized child should be an Extension array");
-    let tq = turboquant_encode(normalized_ext, config, ctx)?;
+        .as_opt::<Extension>()
+        .vortex_expect("normalized child should be an Extension array");
+    // SAFETY: We just normalized the input via `normalize_as_l2_denorm`.
+    let tq = unsafe { turboquant_encode_unchecked(normalized_ext, config, ctx)? };
 
     Ok(L2Denorm::try_new_array(&ApproxOptions::Exact, tq, norms, num_rows)?.into_array())
 }
diff --git a/vortex-tensor/src/encodings/turboquant/vtable.rs b/vortex-tensor/src/encodings/turboquant/vtable.rs
@@ -26,6 +26,7 @@ use vortex_array::serde::ArrayChildren;
 use vortex_array::validity::Validity;
 use vortex_array::vtable::VTable;
 use vortex_array::vtable::ValidityVTable;
+use vortex_error::VortexExpect;
 use vortex_error::VortexResult;
 use vortex_error::vortex_ensure;
 use vortex_error::vortex_ensure_eq;
@@ -89,7 +90,8 @@ impl TurboQuant {
     /// Nullability is handled externally by the [`L2Denorm`](crate::scalar_fns::l2_denorm::L2Denorm)
     /// ScalarFnArray wrapper.
     ///
-    /// Internally calls [`TurboQuantData::validate`] and [`TurboQuantData::try_new`].
+    /// Internally calls [`TurboQuantData::validate`] and [`TurboQuantData::try_new`], then
+    /// delegates to [`new_array_unchecked`](Self::new_array_unchecked).
     pub fn try_new_array(
         dtype: DType,
         codes: ArrayRef,
@@ -98,25 +100,66 @@ impl TurboQuant {
     ) -> VortexResult<TurboQuantArray> {
         TurboQuantData::validate(&dtype, &codes, &centroids, &rotation_signs)?;
 
+        Ok(unsafe { Self::new_array_unchecked(dtype, codes, centroids, rotation_signs) })
+    }
+
+    /// Creates a new [`TurboQuantArray`] without validation.
+    ///
+    /// # Safety
+    ///
+    /// The caller must ensure all invariants required by [`TurboQuantData::validate`] hold:
+    ///
+    /// - `dtype` is a non-nullable [`Vector`](crate::vector::Vector) extension type with
+    ///   dimension >= [`MIN_DIMENSION`](Self::MIN_DIMENSION).
+    /// - `codes` is a non-nullable `FixedSizeList<u8>` with `list_size == padded_dim`.
+    /// - `centroids` is a non-nullable `Primitive<f32>` with a power-of-2 length in
+    ///   `[2, MAX_CENTROIDS]` (or empty for degenerate arrays).
+    /// - `rotation_signs` is a non-nullable `FixedSizeList<u8>` with `list_size == padded_dim`.
+    ///
+    /// Violating these invariants may produce incorrect results during decompression or panics
+    /// during array access.
+    pub unsafe fn new_array_unchecked(
+        dtype: DType,
+        codes: ArrayRef,
+        centroids: ArrayRef,
+        rotation_signs: ArrayRef,
+    ) -> TurboQuantArray {
+        #[cfg(debug_assertions)]
+        TurboQuantData::validate(&dtype, &codes, &centroids, &rotation_signs)
+            .vortex_expect("[DEBUG ASSERTION]: TurboQuantData arrays are invalid");
+
         let len = codes.len();
-        let vector_metadata = TurboQuant::validate_dtype(&dtype)?;
+
+        let dimension = dtype
+            .as_extension_opt()
+            .and_then(|ext| ext.metadata_opt::<AnyVector>())
+            .map(|m| m.dimensions())
+            .unwrap_or(0);
 
         let bit_width = if centroids.is_empty() {
             0
         } else {
-            u8::try_from(centroids.len().trailing_zeros())
-                .map_err(|_| vortex_err!("centroids bit_width does not fit in u8"))?
+            #[expect(
+                clippy::cast_possible_truncation,
+                reason = "bit_width is guaranteed <= 8"
+            )]
+            (centroids.len().trailing_zeros() as u8)
         };
 
-        // Derive num_rounds from the FSL rotation_signs length (0 for degenerate arrays).
-        let num_rounds = u8::try_from(rotation_signs.len())
-            .map_err(|_| vortex_err!("rotation_signs num_rounds does not fit in u8"))?;
+        #[expect(
+            clippy::cast_possible_truncation,
+            reason = "num_rounds fits in u8 by the caller's invariants"
+        )]
+        let num_rounds = rotation_signs.len() as u8;
 
-        let data = TurboQuantData::try_new(vector_metadata.dimensions(), bit_width, num_rounds)?;
+        // SAFETY: The caller guarantees that dimension, bit_width, and num_rounds satisfy the
+        // invariants documented on `TurboQuantData::new_unchecked`.
+        let data = unsafe { TurboQuantData::new_unchecked(dimension, bit_width, num_rounds) };
         let parts = ArrayParts::new(TurboQuant, dtype, len, data)
             .with_slots(TurboQuantData::make_slots(codes, centroids, rotation_signs));
 
-        Array::try_from_parts(parts)
+        // SAFETY: The caller guarantees the parts are logically consistent.
+        unsafe { Array::from_parts_unchecked(parts) }
     }
 }
 
diff --git a/vortex-tensor/src/scalar_fns/l2_denorm.rs b/vortex-tensor/src/scalar_fns/l2_denorm.rs