fix cosine similarity and dot product

connortsui20 · connortsui20 · commit 849d6a5f534e · 2026-04-03T18:54:02.000-04:00
Signed-off-by: Connor Tsui &lt;connor.tsui20@gmail.com&gt;
diff --git a/vortex-tensor/src/encodings/turboquant/compute/cosine_similarity.rs b/vortex-tensor/src/encodings/turboquant/compute/cosine_similarity.rs
@@ -27,60 +27,56 @@
 //! distortion: at 4 bits the error is typically < 0.1, at 8 bits < 0.001.
 //!
 //! For approximate nearest neighbor (ANN) search, biased-but-accurate ranking is
-//! usually sufficient — the relative ordering of cosine similarities is preserved
+//! usually sufficient -- the relative ordering of cosine similarities is preserved
 //! even if the absolute values have bounded error.
 
+use num_traits::FromPrimitive;
+use num_traits::Zero;
 use vortex_array::ArrayRef;
 use vortex_array::ArrayView;
 use vortex_array::ExecutionCtx;
 use vortex_array::IntoArray;
 use vortex_array::arrays::FixedSizeListArray;
 use vortex_array::arrays::PrimitiveArray;
+use vortex_array::match_each_float_ptype;
 use vortex_array::validity::Validity;
 use vortex_buffer::BufferMut;
 use vortex_error::VortexResult;
-use vortex_error::vortex_ensure;
+use vortex_error::vortex_ensure_eq;
 
 use crate::encodings::turboquant::TurboQuant;
+use crate::utils::extension_element_ptype;
 
-/// Shared helper: read codes, norms, and centroids from two TurboQuant arrays,
-/// then compute per-row quantized unit-norm dot products.
+/// Convert an f32 value to `T`, returning `T::zero()` if the conversion fails.
 ///
-/// Both arrays must have the same dimension (vector length) and row count.
-/// They may have different codebooks (e.g., different bit widths), in which
-/// case each array's own centroids are used for its code lookups.
+/// This helper exists because `half::f16` has an inherent `from_f32` method that shadows
+/// the [`FromPrimitive`] trait method, causing compilation errors when used inside
+/// [`match_each_float_ptype!`].
+#[inline]
+fn f32_to_t<T: FromPrimitive + Zero>(v: f32) -> T {
+    FromPrimitive::from_f32(v).unwrap_or_else(T::zero)
+}
+
+/// Compute the per-row unit-norm dot products in f32 (centroids are always f32).
 ///
-/// Returns `(norms_a, norms_b, unit_dots)` where `unit_dots[i]` is the dot product
-/// of the unit-norm quantized vectors for row i.
-fn quantized_unit_dots(
-    lhs: ArrayView<TurboQuant>,
-    rhs: ArrayView<TurboQuant>,
+/// Returns a `Vec<f32>` of length `num_rows`.
+fn compute_unit_dots(
+    lhs: &ArrayView<TurboQuant>,
+    rhs: &ArrayView<TurboQuant>,
     ctx: &mut ExecutionCtx,
-) -> VortexResult<(Vec<f32>, Vec<f32>, Vec<f32>)> {
-    vortex_ensure!(
-        lhs.dimension() == rhs.dimension(),
-        "TurboQuant quantized dot product requires matching dimensions, got {} and {}",
-        lhs.dimension(),
-        rhs.dimension()
-    );
-
+) -> VortexResult<Vec<f32>> {
     let pd = lhs.padded_dim() as usize;
     let num_rows = lhs.norms().len();
 
-    let lhs_norms: PrimitiveArray = lhs.norms().clone().execute(ctx)?;
-    let rhs_norms: PrimitiveArray = rhs.norms().clone().execute(ctx)?;
-    let na = lhs_norms.as_slice::<f32>();
-    let nb = rhs_norms.as_slice::<f32>();
-
     let lhs_codes_fsl: FixedSizeListArray = lhs.codes().clone().execute(ctx)?;
     let rhs_codes_fsl: FixedSizeListArray = rhs.codes().clone().execute(ctx)?;
     let lhs_codes = lhs_codes_fsl.elements().to_canonical()?.into_primitive();
     let rhs_codes = rhs_codes_fsl.elements().to_canonical()?.into_primitive();
     let ca = lhs_codes.as_slice::<u8>();
     let cb = rhs_codes.as_slice::<u8>();
 
-    // Read centroids from both arrays — they may have different codebooks
-    // (e.g., different bit widths).
+    // Read centroids from both arrays. They may have different codebooks (e.g., different bit
+    // widths).
     let lhs_centroids: PrimitiveArray = lhs.centroids().clone().execute(ctx)?;
     let rhs_centroids: PrimitiveArray = rhs.centroids().clone().execute(ctx)?;
     let cl = lhs_centroids.as_slice::<f32>();
@@ -98,49 +94,75 @@ fn quantized_unit_dots(
         dots.push(dot);
     }
 
-    Ok((na.to_vec(), nb.to_vec(), dots))
+    Ok(dots)
 }
 
 /// Compute approximate cosine similarity for all rows between two TurboQuant
 /// arrays (same rotation matrix and codebook) without full decompression.
+///
+/// Since TurboQuant stores unit-normalized rotated vectors, the dot product of the quantized
+/// codes directly approximates cosine similarity without needing the stored norms.
+///
+/// The output dtype matches the Vector's element type (f16, f32, or f64).
 pub fn cosine_similarity_quantized_column(
     lhs: ArrayView<TurboQuant>,
     rhs: ArrayView<TurboQuant>,
     ctx: &mut ExecutionCtx,
 ) -> VortexResult<ArrayRef> {
-    let num_rows = lhs.norms().len();
-    let (na, nb, dots) = quantized_unit_dots(lhs, rhs, ctx)?;
+    vortex_ensure_eq!(
+        lhs.dimension(),
+        rhs.dimension(),
+        "TurboQuant quantized dot product requires matching dimensions",
+    );
 
-    let mut result = BufferMut::<f32>::with_capacity(num_rows);
-    for row in 0..num_rows {
-        if na[row] == 0.0 || nb[row] == 0.0 {
-            result.push(0.0);
-        } else {
-            // Unit-norm dot product IS the cosine similarity.
-            result.push(dots[row]);
-        }
-    }
+    let element_ptype = extension_element_ptype(lhs.dtype().as_extension())?;
+    let dots = compute_unit_dots(&lhs, &rhs, ctx)?;
 
-    Ok(PrimitiveArray::new::<f32>(result.freeze(), Validity::NonNullable).into_array())
+    // The unit-norm dot product IS the cosine similarity. Cast from f32 to the native type.
+    match_each_float_ptype!(element_ptype, |T| {
+        let mut result = BufferMut::<T>::with_capacity(dots.len());
+        for &dot in &dots {
+            result.push(f32_to_t(dot));
+        }
+        Ok(PrimitiveArray::new::<T>(result.freeze(), Validity::NonNullable).into_array())
+    })
 }
 
 /// Compute approximate dot product for all rows between two TurboQuant
 /// arrays (same rotation matrix and codebook) without full decompression.
 ///
-/// `dot_product(a, b) ≈ ||a|| * ||b|| * sum(c[code_a[j]] * c[code_b[j]])`
+/// `dot_product(a, b) = ||a|| * ||b|| * sum(c[code_a[j]] * c[code_b[j]])`
+///
+/// The output dtype matches the Vector's element type (f16, f32, or f64).
 pub fn dot_product_quantized_column(
     lhs: ArrayView<TurboQuant>,
     rhs: ArrayView<TurboQuant>,
     ctx: &mut ExecutionCtx,
 ) -> VortexResult<ArrayRef> {
+    vortex_ensure_eq!(
+        lhs.dimension(),
+        rhs.dimension(),
+        "TurboQuant quantized dot product requires matching dimensions",
+    );
+
+    let element_ptype = extension_element_ptype(lhs.dtype().as_extension())?;
+    let dots = compute_unit_dots(&lhs, &rhs, ctx)?;
     let num_rows = lhs.norms().len();
-    let (na, nb, dots) = quantized_unit_dots(lhs, rhs, ctx)?;
 
-    let mut result = BufferMut::<f32>::with_capacity(num_rows);
-    for row in 0..num_rows {
-        // Scale the unit-norm dot product by both norms to get the actual dot product.
-        result.push(na[row] * nb[row] * dots[row]);
-    }
+    let lhs_norms: PrimitiveArray = lhs.norms().clone().execute(ctx)?;
+    let rhs_norms: PrimitiveArray = rhs.norms().clone().execute(ctx)?;
+
+    // Scale the f32 unit-norm dot product by native-precision norms.
+    match_each_float_ptype!(element_ptype, |T| {
+        let na = lhs_norms.as_slice::<T>();
+        let nb = rhs_norms.as_slice::<T>();
+
+        let mut result = BufferMut::<T>::with_capacity(num_rows);
+        for row in 0..num_rows {
+            let dot_t: T = f32_to_t(dots[row]);
+            result.push(na[row] * nb[row] * dot_t);
+        }
 
-    Ok(PrimitiveArray::new::<f32>(result.freeze(), Validity::NonNullable).into_array())
+        Ok(PrimitiveArray::new::<T>(result.freeze(), Validity::NonNullable).into_array())
+    })
 }
diff --git a/vortex-tensor/src/encodings/turboquant/mod.rs b/vortex-tensor/src/encodings/turboquant/mod.rs
@@ -90,14 +90,6 @@
 //! assert!(encoded.nbytes() < 51200);
 //! ```
 
-use vortex_array::session::ArraySessionExt;
-use vortex_session::VortexSession;
-
-/// Initialize the TurboQuant encoding in the given session.
-pub fn initialize(session: &mut VortexSession) {
-    session.arrays().register(TurboQuant);
-}
-
 mod array;
 pub use array::data::TurboQuantData;
 pub use array::scheme::TurboQuantScheme;
diff --git a/vortex-tensor/src/encodings/turboquant/tests.rs b/vortex-tensor/src/encodings/turboquant/tests.rs
@@ -798,3 +798,114 @@ fn nullable_slice_preserves_validity() -> VortexResult<()> {
     }
     Ok(())
 }
+
+// -----------------------------------------------------------------------
+// Serde roundtrip tests
+// -----------------------------------------------------------------------
+
+/// Verify that a TurboQuant array survives serialize/deserialize.
+#[test]
+fn serde_roundtrip() -> VortexResult<()> {
+    use vortex_array::ArrayContext;
+    use vortex_array::ArrayEq;
+    use vortex_array::Precision;
+    use vortex_array::serde::SerializeOptions;
+    use vortex_array::serde::SerializedArray;
+    use vortex_array::session::ArraySessionExt;
+    use vortex_buffer::ByteBufferMut;
+    use vortex_fastlanes::BitPacked;
+    use vortex_session::registry::ReadContext;
+
+    let fsl = make_fsl(20, 128, 42);
+    let ext = make_vector_ext(&fsl);
+    let config = TurboQuantConfig {
+        bit_width: 3,
+        seed: Some(123),
+    };
+    let mut ctx = SESSION.create_execution_ctx();
+    let encoded = turboquant_encode(&ext, &config, &mut ctx)?;
+
+    let dtype = encoded.dtype().clone();
+    let len = encoded.len();
+
+    // Serialize.
+    let array_ctx = ArrayContext::empty();
+    let serialized = encoded.serialize(&array_ctx, &SerializeOptions::default())?;
+
+    let mut concat = ByteBufferMut::empty();
+    for buf in serialized {
+        concat.extend_from_slice(buf.as_ref());
+    }
+
+    // Deserialize. The session needs TurboQuant and BitPacked (for rotation signs) registered.
+    let serde_session = VortexSession::empty().with::<ArraySession>();
+    serde_session.arrays().register(TurboQuant);
+    serde_session.arrays().register(BitPacked);
+
+    let parts = SerializedArray::try_from(concat.freeze())?;
+    let decoded = parts.decode(
+        &dtype,
+        len,
+        &ReadContext::new(array_ctx.to_ids()),
+        &serde_session,
+    )?;
+
+    assert!(
+        decoded.array_eq(&encoded, Precision::Value),
+        "serde roundtrip did not preserve array equality"
+    );
+    Ok(())
+}
+
+/// Verify that a degenerate (empty) TurboQuant array survives serialize/deserialize.
+#[test]
+fn serde_roundtrip_empty() -> VortexResult<()> {
+    use vortex_array::ArrayContext;
+    use vortex_array::ArrayEq;
+    use vortex_array::Precision;
+    use vortex_array::serde::SerializeOptions;
+    use vortex_array::serde::SerializedArray;
+    use vortex_array::session::ArraySessionExt;
+    use vortex_buffer::ByteBufferMut;
+    use vortex_fastlanes::BitPacked;
+    use vortex_session::registry::ReadContext;
+
+    let fsl = make_fsl(0, 128, 42);
+    let ext = make_vector_ext(&fsl);
+    let config = TurboQuantConfig {
+        bit_width: 2,
+        seed: Some(123),
+    };
+    let mut ctx = SESSION.create_execution_ctx();
+    let encoded = turboquant_encode(&ext, &config, &mut ctx)?;
+    assert_eq!(encoded.len(), 0);
+
+    let dtype = encoded.dtype().clone();
+    let len = encoded.len();
+
+    let array_ctx = ArrayContext::empty();
+    let serialized = encoded.serialize(&array_ctx, &SerializeOptions::default())?;
+
+    let mut concat = ByteBufferMut::empty();
+    for buf in serialized {
+        concat.extend_from_slice(buf.as_ref());
+    }
+
+    let serde_session = VortexSession::empty().with::<ArraySession>();
+    serde_session.arrays().register(TurboQuant);
+    serde_session.arrays().register(BitPacked);
+
+    let parts = SerializedArray::try_from(concat.freeze())?;
+    let decoded = parts.decode(
+        &dtype,
+        len,
+        &ReadContext::new(array_ctx.to_ids()),
+        &serde_session,
+    )?;
+
+    assert!(
+        decoded.array_eq(&encoded, Precision::Value),
+        "serde roundtrip did not preserve array equality"
+    );
+    Ok(())
+}
diff --git a/vortex-tensor/src/encodings/turboquant/vtable.rs b/vortex-tensor/src/encodings/turboquant/vtable.rs
@@ -74,7 +74,7 @@ impl TurboQuant {
 
     /// Creates a new [`TurboQuantArray`].
     ///
-    /// Internallay calls [`TurboQuantData::try_new`].
+    /// Internally calls [`TurboQuantData::try_new`].
     pub fn try_new_array(
         dtype: DType,
         codes: ArrayRef,
@@ -101,7 +101,7 @@ impl VTable for TurboQuant {
         Self::ID
     }
 
-    fn validate(&self, data: &Self::ArrayData, dtype: &DType, _len: usize) -> VortexResult<()> {
+    fn validate(&self, data: &Self::ArrayData, dtype: &DType, len: usize) -> VortexResult<()> {
         let ext = dtype
             .as_extension_opt()
             .filter(|e| e.is::<Vector>())
@@ -117,8 +117,15 @@ impl VTable for TurboQuant {
 
         vortex_ensure_eq!(data.dimension(), dimension);
 
-        // TODO(connor): In the future, we will not need to validate `len` on the array data because
+        // TODO(connor): In the future, we may not need to validate `len` on the array data because
         // the child arrays will be located somewhere else.
+        // bit_width == 0 is only valid for degenerate (empty) arrays. A non-empty array with
+        // bit_width == 0 would have zero centroids while codes reference centroid indices.
+        vortex_ensure!(
+            data.bit_width > 0 || len == 0,
+            "bit_width == 0 is only valid for empty arrays, got len={len}"
+        );
+
         Ok(())
     }
 
@@ -187,6 +194,13 @@ impl VTable for TurboQuant {
 
         let bit_width = metadata[0];
 
+        // bit_width == 0 is only valid for degenerate (empty) arrays. A non-empty array with
+        // bit_width == 0 would have zero centroids while codes reference centroid indices.
+        vortex_ensure!(
+            bit_width > 0 || len == 0,
+            "bit_width == 0 is only valid for empty arrays, got len={len}"
+        );
+
         // Validate and derive dimension and element ptype from the Vector extension dtype.
         let ext = TurboQuant::validate_dtype(dtype)?;
         let dimension = extension_list_size(ext)?;