change defaults and constraints and tests

connortsui20 · connortsui20 · commit 5285a1331d3b · 2026-04-04T13:08:51.000-04:00
Signed-off-by: Connor Tsui &lt;connor.tsui20@gmail.com&gt;
diff --git a/vortex-tensor/public-api.lock b/vortex-tensor/public-api.lock
@@ -10,6 +10,8 @@ impl vortex_tensor::encodings::turboquant::TurboQuant
 
 pub const vortex_tensor::encodings::turboquant::TurboQuant::ID: vortex_array::array::ArrayId
 
+pub const vortex_tensor::encodings::turboquant::TurboQuant::MIN_DIMENSION: u32
+
 pub fn vortex_tensor::encodings::turboquant::TurboQuant::try_new_array(dtype: vortex_array::dtype::DType, codes: vortex_array::array::erased::ArrayRef, norms: vortex_array::array::erased::ArrayRef, centroids: vortex_array::array::erased::ArrayRef, rotation_signs: vortex_array::array::erased::ArrayRef) -> vortex_error::VortexResult<vortex_tensor::encodings::turboquant::TurboQuantArray>
 
 pub fn vortex_tensor::encodings::turboquant::TurboQuant::validate_dtype(dtype: &vortex_array::dtype::DType) -> vortex_error::VortexResult<&vortex_array::dtype::extension::erased::ExtDTypeRef>
diff --git a/vortex-tensor/src/encodings/turboquant/array/centroids.rs b/vortex-tensor/src/encodings/turboquant/array/centroids.rs
@@ -15,6 +15,8 @@ use vortex_error::VortexResult;
 use vortex_error::vortex_bail;
 use vortex_utils::aliases::dash_map::DashMap;
 
+use crate::encodings::turboquant::TurboQuant;
+
 /// Number of numerical integration points for computing conditional expectations.
 const INTEGRATION_POINTS: usize = 1000;
 
@@ -36,8 +38,11 @@ pub fn get_centroids(dimension: u32, bit_width: u8) -> VortexResult<Vec<f32>> {
     if !(1..=8).contains(&bit_width) {
         vortex_bail!("TurboQuant bit_width must be 1-8, got {bit_width}");
     }
-    if dimension < 3 {
-        vortex_bail!("TurboQuant dimension must be >= 3, got {dimension}");
+    if dimension < TurboQuant::MIN_DIMENSION {
+        vortex_bail!(
+            "TurboQuant dimension must be >= {}, got {dimension}",
+            TurboQuant::MIN_DIMENSION
+        );
     }
 
     if let Some(centroids) = CENTROID_CACHE.get(&(dimension, bit_width)) {
@@ -306,6 +311,6 @@ mod tests {
         assert!(get_centroids(128, 0).is_err());
         assert!(get_centroids(128, 9).is_err());
         assert!(get_centroids(1, 2).is_err());
-        assert!(get_centroids(2, 2).is_err());
+        assert!(get_centroids(127, 2).is_err());
     }
 }
diff --git a/vortex-tensor/src/encodings/turboquant/array/data.rs b/vortex-tensor/src/encodings/turboquant/array/data.rs
@@ -92,7 +92,7 @@ impl TurboQuantData {
     /// The caller must ensure:
     ///
     /// - `dtype` is a [`Vector`](crate::vector::Vector) extension type whose storage list size
-    ///   is >= 3.
+    ///   is >= [`MIN_DIMENSION`](crate::encodings::turboquant::TurboQuant::MIN_DIMENSION).
     /// - `codes` is a non-nullable `FixedSizeListArray<u8>` with `list_size == padded_dim` and
     ///   `codes.len() == norms.len()`. Null vectors are represented by all-zero codes.
     /// - `norms` is a primitive array whose ptype matches the element type of the Vector's storage
diff --git a/vortex-tensor/src/encodings/turboquant/array/scheme.rs b/vortex-tensor/src/encodings/turboquant/array/scheme.rs
@@ -114,12 +114,12 @@ mod tests {
     /// f32 input at 768-d (padded to 1024) with 1000 vectors should give ~4-6x.
     /// f32 input at 1024-d (no padding) should give higher ratio since no waste.
     #[rstest]
-    #[case::f32_768d(32, 768, 1000, 3.5, 8.0)]
-    #[case::f32_1024d(32, 1024, 1000, 5.0, 9.0)]
-    #[case::f32_1536d(32, 1536, 1000, 3.0, 8.0)]
-    #[case::f32_128d(32, 128, 1000, 4.0, 8.0)]
-    #[case::f64_768d(64, 768, 1000, 7.0, 16.0)]
-    #[case::f16_768d(16, 768, 1000, 1.5, 4.5)]
+    #[case::f32_768d(32, 768, 1000, 2.5, 4.0)]
+    #[case::f32_1024d(32, 1024, 1000, 3.5, 5.0)]
+    #[case::f32_1536d(32, 1536, 1000, 2.5, 4.0)]
+    #[case::f32_128d(32, 128, 1000, 3.0, 5.0)]
+    #[case::f64_768d(64, 768, 1000, 5.0, 7.0)]
+    #[case::f16_768d(16, 768, 1000, 1.2, 2.0)]
     fn compression_ratio_in_expected_range(
         #[case] bits_per_element: usize,
         #[case] dim: u32,
diff --git a/vortex-tensor/src/encodings/turboquant/compress.rs b/vortex-tensor/src/encodings/turboquant/compress.rs
@@ -43,7 +43,7 @@ pub struct TurboQuantConfig {
 impl Default for TurboQuantConfig {
     fn default() -> Self {
         Self {
-            bit_width: 4,
+            bit_width: 8,
             seed: Some(42),
         }
     }
@@ -226,8 +226,9 @@ pub fn turboquant_encode(
     );
     let dimension = fsl.list_size();
     vortex_ensure!(
-        dimension >= 3,
-        "TurboQuant requires dimension >= 3, got {dimension}"
+        dimension >= TurboQuant::MIN_DIMENSION,
+        "TurboQuant requires dimension >= {}, got {dimension}",
+        TurboQuant::MIN_DIMENSION
     );
 
     if fsl.is_empty() {
diff --git a/vortex-tensor/src/encodings/turboquant/compute/cosine_similarity.rs b/vortex-tensor/src/encodings/turboquant/compute/cosine_similarity.rs
@@ -4,9 +4,9 @@
 //! Approximate cosine similarity in the quantized domain.
 //!
 //! Since the SRHT is orthogonal, inner products are preserved in the rotated
-//! domain. For two vectors from the same TurboQuant column (same rotation and
-//! centroids), we can compute the dot product of their quantized representations
-//! without full decompression:
+//! domain. For two TurboQuant arrays that share the same SRHT rotation (i.e.,
+//! encoded from the same column), we can compute the dot product of their
+//! quantized representations without full decompression:
 //!
 //! ```text
 //! cos_approx(a, b) = sum(centroids[code_a[j]] × centroids[code_b[j]])
@@ -85,8 +85,12 @@ fn compute_unit_dots(
     Ok(dots)
 }
 
-/// Compute approximate cosine similarity for all rows between two TurboQuant
-/// arrays (same rotation matrix and codebook) without full decompression.
+/// Compute approximate cosine similarity for all rows between two TurboQuant arrays without
+/// full decompression.
+///
+/// Both arrays must share the same rotation (i.e., were encoded from the same TurboQuant
+/// column). For this function, results are meaningless if the rotations differ (there are other
+/// methods that can allow this, but that is future work).
 ///
 /// Since TurboQuant stores unit-normalized rotated vectors, the dot product of the quantized
 /// codes directly approximates cosine similarity without needing the stored norms.
@@ -120,8 +124,12 @@ pub fn cosine_similarity_quantized_column(
     })
 }
 
-/// Compute approximate dot product for all rows between two TurboQuant
-/// arrays (same rotation matrix and codebook) without full decompression.
+/// Compute approximate dot product for all rows between two TurboQuant arrays without
+/// full decompression.
+///
+/// Both arrays must share the same SRHT rotation (i.e., were encoded from the same TurboQuant
+/// column). For this function, results are meaningless if the rotations differ (there are other
+/// methods that can allow this, but that is future work).
 ///
 /// `dot_product(a, b) = ||a|| * ||b|| * sum(c[code_a[j]] * c[code_b[j]])`
 ///
diff --git a/vortex-tensor/src/encodings/turboquant/tests.rs b/vortex-tensor/src/encodings/turboquant/tests.rs
@@ -152,11 +152,9 @@ fn encode_decode(
 // -----------------------------------------------------------------------
 
 #[rstest]
-#[case(32, 1)]
-#[case(32, 2)]
-#[case(32, 3)]
-#[case(32, 4)]
+#[case(128, 1)]
 #[case(128, 2)]
+#[case(128, 3)]
 #[case(128, 4)]
 #[case(128, 6)]
 #[case(128, 8)]
@@ -280,8 +278,9 @@ fn roundtrip_edge_cases(#[case] num_rows: usize) -> VortexResult<()> {
 
 #[rstest]
 #[case(1)]
-#[case(2)]
-fn rejects_dimension_below_3(#[case] dim: usize) {
+#[case(64)]
+#[case(127)]
+fn rejects_dimension_below_128(#[case] dim: usize) {
     let fsl = make_fsl_small(dim);
     let ext = make_vector_ext(&fsl);
     let config = TurboQuantConfig {
@@ -340,7 +339,7 @@ fn all_zero_vectors_roundtrip() -> VortexResult<()> {
 #[test]
 fn f64_input_encodes_successfully() -> VortexResult<()> {
     let num_rows = 10;
-    let dim = 64;
+    let dim = 128;
     let mut rng = StdRng::seed_from_u64(99);
     let normal = Normal::new(0.0f64, 1.0).unwrap();
 
@@ -371,6 +370,48 @@ fn f64_input_encodes_successfully() -> VortexResult<()> {
     Ok(())
 }
 
+/// Verify that f16 input is accepted and encoded (upcast to f32 internally).
+#[test]
+fn f16_input_encodes_successfully() -> VortexResult<()> {
+    let num_rows = 10;
+    let dim = 128;
+    let mut rng = StdRng::seed_from_u64(99);
+    let normal = Normal::new(0.0f32, 1.0).unwrap();
+
+    let mut buf = BufferMut::<half::f16>::with_capacity(num_rows * dim);
+    for _ in 0..(num_rows * dim) {
+        buf.push(half::f16::from_f32(normal.sample(&mut rng)));
+    }
+    let elements = PrimitiveArray::new::<half::f16>(buf.freeze(), Validity::NonNullable);
+    let fsl = FixedSizeListArray::try_new(
+        elements.into_array(),
+        dim.try_into()
+            .expect("somehow got dimension greater than u32::MAX"),
+        Validity::NonNullable,
+        num_rows,
+    )?;
+
+    let ext = make_vector_ext(&fsl);
+    let config = TurboQuantConfig {
+        bit_width: 3,
+        seed: Some(42),
+    };
+    let mut ctx = SESSION.create_execution_ctx();
+    let encoded = turboquant_encode(&ext, &config, &mut ctx)?;
+    let tq = encoded.as_opt::<TurboQuant>().unwrap();
+    assert_eq!(tq.norms().len(), num_rows);
+    assert_eq!(tq.dimension() as usize, dim);
+
+    // Verify roundtrip: decode and check reconstruction is reasonable.
+    let decoded_ext = encoded.execute::<ExtensionArray>(&mut ctx)?;
+    let decoded_fsl = decoded_ext
+        .storage_array()
+        .to_canonical()?
+        .into_fixed_size_list();
+    assert_eq!(decoded_fsl.len(), num_rows);
+    Ok(())
+}
+
 // -----------------------------------------------------------------------
 // Verification tests for stored metadata
 // -----------------------------------------------------------------------
@@ -494,7 +535,7 @@ fn slice_preserves_data() -> VortexResult<()> {
 
 #[test]
 fn scalar_at_matches_decompress() -> VortexResult<()> {
-    let fsl = make_fsl(10, 64, 42);
+    let fsl = make_fsl(10, 128, 42);
     let ext = make_vector_ext(&fsl);
     let config = TurboQuantConfig {
         bit_width: 3,
@@ -593,7 +634,9 @@ fn cosine_similarity_quantized_accuracy() -> VortexResult<()> {
                 .sum::<f32>()
         };
 
-        // 4-bit quantization: expect reasonable accuracy.
+        // At 4-bit, the theoretical MSE bound per coordinate is ~0.0106 (Theorem 1). For cosine
+        // similarity (bounded [-1, 1]), the error is bounded roughly by 2*sqrt(MSE) ~ 0.2. We use
+        // 0.15 as a tighter empirical bound.
         let error = (exact_cos - approx_cos).abs();
         assert!(
             error < 0.15,
@@ -604,6 +647,105 @@ fn cosine_similarity_quantized_accuracy() -> VortexResult<()> {
     Ok(())
 }
 
+/// Verify approximate dot product in the quantized domain.
+///
+/// NOTE: The MSE quantizer (TurboQuant_mse) has inherent **multiplicative bias** for inner
+/// products — the quantized dot product systematically over- or under-estimates the true value.
+/// This is a fundamental property: the paper's `TurboQuant_prod` variant adds QJL specifically
+/// to debias inner products, but we only implement the MSE-only variant.
+///
+/// Even at 8-bit (near-lossless reconstruction, MSE ~4e-5), the quantized-domain dot product
+/// can have ~10-15% relative error due to this bias. This tolerance is therefore intentionally
+/// loose — we're testing that the approximation is in the right ballpark, not that it's precise.
+///
+/// TODO(connor): Revisit these tolerances when we have TurboQuant_prod (QJL debiasing).
+#[test]
+fn dot_product_quantized_accuracy() -> VortexResult<()> {
+    let fsl = make_fsl(20, 128, 42);
+    let ext = make_vector_ext(&fsl);
+    let config = TurboQuantConfig {
+        bit_width: 8,
+        seed: Some(123),
+    };
+    let mut ctx = SESSION.create_execution_ctx();
+    let encoded = turboquant_encode(&ext, &config, &mut ctx)?;
+    let tq = encoded.as_opt::<TurboQuant>().unwrap();
+
+    let input_prim = fsl.elements().to_canonical()?.into_primitive();
+    let input_f32 = input_prim.as_slice::<f32>();
+
+    let mut ctx = SESSION.create_execution_ctx();
+    let pd = tq.padded_dim() as usize;
+    let norms_prim = tq.norms().clone().execute::<PrimitiveArray>(&mut ctx)?;
+    let norms = norms_prim.as_slice::<f32>();
+    let codes_fsl = tq.codes().clone().execute::<FixedSizeListArray>(&mut ctx)?;
+    let codes_prim = codes_fsl.elements().to_canonical()?.into_primitive();
+    let all_codes = codes_prim.as_slice::<u8>();
+    let centroids_prim = tq.centroids().clone().execute::<PrimitiveArray>(&mut ctx)?;
+    let centroid_vals = centroids_prim.as_slice::<f32>();
+
+    for (row_a, row_b) in [(0, 1), (5, 10), (0, 19)] {
+        let vec_a = &input_f32[row_a * 128..(row_a + 1) * 128];
+        let vec_b = &input_f32[row_b * 128..(row_b + 1) * 128];
+
+        let exact_dot: f32 = vec_a.iter().zip(vec_b.iter()).map(|(&x, &y)| x * y).sum();
+
+        let codes_a = &all_codes[row_a * pd..(row_a + 1) * pd];
+        let codes_b = &all_codes[row_b * pd..(row_b + 1) * pd];
+        let unit_dot: f32 = codes_a
+            .iter()
+            .zip(codes_b.iter())
+            .map(|(&ca, &cb)| centroid_vals[ca as usize] * centroid_vals[cb as usize])
+            .sum();
+        let approx_dot = norms[row_a] * norms[row_b] * unit_dot;
+
+        // See doc comment above: 15% relative error is expected due to MSE quantizer bias.
+        let scale = exact_dot.abs().max(1.0);
+        let rel_error = (exact_dot - approx_dot).abs() / scale;
+        assert!(
+            rel_error < 0.15,
+            "dot product error too large for ({row_a}, {row_b}): \
+                 exact={exact_dot:.4}, approx={approx_dot:.4}, rel_error={rel_error:.4}"
+        );
+    }
+    Ok(())
+}
+
+/// Roundtrip at large embedding dimensions to validate padding and SRHT at common sizes.
+///
+/// NOTE: The theoretical MSE bound (Theorem 1) is proved for Haar-distributed random orthogonal
+/// matrices, not SRHT. The SRHT is a practical O(d log d) approximation that doesn't exactly
+/// satisfy the Haar assumption, so empirical MSE can slightly exceed the theoretical bound. We
+/// use a 2x multiplier to account for this gap.
+///
+/// The 1024-d case uses 5-bit instead of 4-bit because at 4-bit the SRHT approximation error
+/// at d=1024 pushes MSE ~20% above the 1x theoretical bound (0.0127 vs bound 0.0106).
+///
+/// TODO(connor): Revisit after Stage 2 block decomposition — at d=768 with block_size=256,
+/// the per-block SRHT will be lower-dimensional and may have different error characteristics.
+#[rstest]
+#[case(768, 4)]
+#[case(1024, 5)]
+fn large_dimension_roundtrip(#[case] dim: usize, #[case] bit_width: u8) -> VortexResult<()> {
+    let num_rows = 10;
+    let fsl = make_fsl(num_rows, dim, 42);
+    let config = TurboQuantConfig {
+        bit_width,
+        seed: Some(123),
+    };
+    let (original, decoded) = encode_decode(&fsl, &config)?;
+    assert_eq!(decoded.len(), original.len());
+
+    let normalized_mse = per_vector_normalized_mse(&original, &decoded, dim, num_rows);
+    // 2x slack for the SRHT-vs-Haar gap (see doc comment above).
+    let bound = 2.0 * theoretical_mse_bound(bit_width);
+    assert!(
+        normalized_mse < bound,
+        "Normalized MSE {normalized_mse:.6} exceeds 2x bound {bound:.6} for dim={dim}, bits={bit_width}",
+    );
+    Ok(())
+}
+
 /// Verify that the encoded array's dtype is a Vector extension type.
 #[test]
 fn encoded_dtype_is_vector_extension() -> VortexResult<()> {
@@ -702,7 +844,7 @@ fn nullable_vectors_roundtrip() -> VortexResult<()> {
 #[test]
 fn nullable_norms_match_validity() -> VortexResult<()> {
     let validity = Validity::from_iter([true, false, true, false, true]);
-    let fsl = make_fsl_with_validity(5, 64, 42, validity);
+    let fsl = make_fsl_with_validity(5, 128, 42, validity);
     let ext = make_vector_ext(&fsl);
 
     let config = TurboQuantConfig {
@@ -729,7 +871,7 @@ fn nullable_norms_match_validity() -> VortexResult<()> {
 #[test]
 fn nullable_l2_norm_readthrough() -> VortexResult<()> {
     let validity = Validity::from_iter([true, false, true, false, true]);
-    let fsl = make_fsl_with_validity(5, 64, 42, validity);
+    let fsl = make_fsl_with_validity(5, 128, 42, validity);
     let ext = make_vector_ext(&fsl);
 
     let config = TurboQuantConfig {
@@ -749,7 +891,7 @@ fn nullable_l2_norm_readthrough() -> VortexResult<()> {
     for row in 0..5 {
         if row % 2 == 0 {
             assert!(norms.is_valid(row)?, "row {row} should be valid");
-            let expected: f32 = orig_f32[row * 64..(row + 1) * 64]
+            let expected: f32 = orig_f32[row * 128..(row + 1) * 128]
                 .iter()
                 .map(|&v| v * v)
                 .sum::<f32>()
@@ -773,7 +915,7 @@ fn nullable_slice_preserves_validity() -> VortexResult<()> {
     let validity = Validity::from_iter([
         true, true, false, true, true, false, true, false, true, true,
     ]);
-    let fsl = make_fsl_with_validity(10, 64, 42, validity);
+    let fsl = make_fsl_with_validity(10, 128, 42, validity);
     let ext = make_vector_ext(&fsl);
 
     let config = TurboQuantConfig {
diff --git a/vortex-tensor/src/encodings/turboquant/vtable.rs b/vortex-tensor/src/encodings/turboquant/vtable.rs