clean up some stuff

connortsui20 · connortsui20 · commit 0ea2b2b7eddf · 2026-04-07T11:34:47.000-04:00
Signed-off-by: Connor Tsui &lt;connor.tsui20@gmail.com&gt;
diff --git a/vortex-tensor/public-api.lock b/vortex-tensor/public-api.lock
@@ -182,7 +182,7 @@ pub fn T::padded_dim(&self) -> u32
 
 pub fn T::rotation_signs(&self) -> &vortex_array::array::erased::ArrayRef
 
-pub fn vortex_tensor::encodings::turboquant::turboquant_encode(ext: &vortex_array::arrays::extension::vtable::ExtensionArray, config: &vortex_tensor::encodings::turboquant::TurboQuantConfig, ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::array::erased::ArrayRef>
+pub fn vortex_tensor::encodings::turboquant::turboquant_encode(ext: vortex_array::array::view::ArrayView<'_, vortex_array::arrays::extension::vtable::Extension>, config: &vortex_tensor::encodings::turboquant::TurboQuantConfig, ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::array::erased::ArrayRef>
 
 pub type vortex_tensor::encodings::turboquant::TurboQuantArray = vortex_array::array::typed::Array<vortex_tensor::encodings::turboquant::TurboQuant>
 
diff --git a/vortex-tensor/src/encodings/turboquant/array/centroids.rs b/vortex-tensor/src/encodings/turboquant/array/centroids.rs
@@ -12,53 +12,58 @@
 use std::sync::LazyLock;
 
 use vortex_error::VortexResult;
-use vortex_error::vortex_bail;
+use vortex_error::vortex_ensure;
 use vortex_utils::aliases::dash_map::DashMap;
 
 use crate::encodings::turboquant::TurboQuant;
 
-/// Number of numerical integration points for computing conditional expectations.
-const INTEGRATION_POINTS: usize = 1000;
+/// The maximum iterations for Max-Lloyd algorithm when computing centroids.
+const MAX_ITERATIONS: usize = 200;
 
-/// Max-Lloyd convergence threshold.
+/// The Max-Lloyd convergence threshold for stopping early when computing centroids.
 const CONVERGENCE_EPSILON: f64 = 1e-12;
 
-/// Maximum iterations for Max-Lloyd algorithm.
-const MAX_ITERATIONS: usize = 200;
+/// Number of numerical integration points for computing conditional expectations.
+const INTEGRATION_POINTS: usize = 1000;
 
+// TODO(connor): Maybe we should just store an `ArrayRef` here?
 /// Global centroid cache keyed by (dimension, bit_width).
 static CENTROID_CACHE: LazyLock<DashMap<(u32, u8), Vec<f32>>> = LazyLock::new(DashMap::default);
 
 /// Get or compute cached centroids for the given dimension and bit width.
 ///
-/// Returns `2^bit_width` centroids sorted in ascending order, representing
-/// optimal scalar quantization levels for the coordinate distribution after
-/// random rotation in `dimension`-dimensional space.
+/// Returns `2^bit_width` centroids sorted in ascending order, representing optimal scalar
+/// quantization levels for the coordinate distribution after random rotation in
+/// `dimension`-dimensional space.
 pub fn get_centroids(dimension: u32, bit_width: u8) -> VortexResult<Vec<f32>> {
-    if !(1..=8).contains(&bit_width) {
-        vortex_bail!("TurboQuant bit_width must be 1-8, got {bit_width}");
-    }
-    if dimension < TurboQuant::MIN_DIMENSION {
-        vortex_bail!(
-            "TurboQuant dimension must be >= {}, got {dimension}",
-            TurboQuant::MIN_DIMENSION
-        );
-    }
+    vortex_ensure!(
+        (1..=8).contains(&bit_width),
+        "TurboQuant bit_width must be 1-8, got {bit_width}"
+    );
+    vortex_ensure!(
+        dimension >= TurboQuant::MIN_DIMENSION,
+        "TurboQuant dimension must be >= {}, got {dimension}",
+        TurboQuant::MIN_DIMENSION
+    );
 
     if let Some(centroids) = CENTROID_CACHE.get(&(dimension, bit_width)) {
         return Ok(centroids.clone());
     }
 
     let centroids = max_lloyd_centroids(dimension, bit_width);
     CENTROID_CACHE.insert((dimension, bit_width), centroids.clone());
+
     Ok(centroids)
 }
 
+// TODO(connor): It would potentially be more performant if this was modelled as const generic
+// parameters to functions.
 /// Half-integer exponent: represents `int_part + (if has_half { 0.5 } else { 0.0 })`.
 ///
-/// The marginal distribution exponent `(d-3)/2` is always an integer (when `d` is odd)
-/// or a half-integer (when `d` is even). This type makes that invariant explicit and
-/// avoids floating-point comparison in the hot path.
+/// The marginal distribution exponent `(d-3)/2` is always an integer (when `d` is odd) or a
+/// half-integer (when `d` is even).
+///
+/// This type makes that invariant explicit and avoids floating-point comparison in the hot path.
 #[derive(Clone, Copy, Debug)]
 struct HalfIntExponent {
     int_part: i32,
@@ -70,12 +75,7 @@ impl HalfIntExponent {
     ///
     /// `numerator` is `d - 3` where `d` is the dimension (>= 2), so it can be negative.
     fn from_numerator(numerator: i32) -> Self {
-        // Integer division truncates toward zero; for negative odd numerators
-        // (e.g., d=2 → num=-1) this gives int_part=0, has_half=true,
-        // representing -0.5 = 0 + (-0.5). The sign is handled by adjusting
-        // int_part: -1/2 = 0 with has_half, but we need the floor division.
-        // Rust's `/` truncates toward zero, so -1/2 = 0. We want floor: -1.
-        // Use divmod that rounds toward negative infinity.
+        // Use Euclidean division to get floor division toward negative infinity.
         let int_part = numerator.div_euclid(2);
         let has_half = numerator.rem_euclid(2) != 0;
         Self { int_part, has_half }
@@ -84,12 +84,14 @@ impl HalfIntExponent {
 
 /// Compute optimal centroids via the Max-Lloyd (Lloyd-Max) algorithm.
 ///
-/// Operates on the marginal distribution of a single coordinate of a randomly
-/// rotated unit vector in d dimensions. The PDF is:
+/// Operates on the marginal distribution of a single coordinate of a randomly rotated unit vector
+/// in d dimensions.
+///
+/// The probability distribution function is:
 ///   `f(x) = C_d * (1 - x^2)^((d-3)/2)` on `[-1, 1]`
 /// where `C_d` is the normalizing constant.
-#[allow(clippy::cast_possible_truncation)] // f64→f32 centroid values are intentional
 fn max_lloyd_centroids(dimension: u32, bit_width: u8) -> Vec<f32> {
+    debug_assert!((1..=8).contains(&bit_width));
     let num_centroids = 1usize << bit_width;
 
     // For the marginal distribution on [-1, 1], we use the exponent (d-3)/2.
@@ -114,7 +116,7 @@ fn max_lloyd_centroids(dimension: u32, bit_width: u8) -> Vec<f32> {
         for idx in 0..num_centroids {
             let lo = boundaries[idx];
             let hi = boundaries[idx + 1];
-            let new_centroid = conditional_mean(lo, hi, exponent);
+            let new_centroid = mean_between_centroids(lo, hi, exponent);
             max_change = max_change.max((new_centroid - centroids[idx]).abs());
             centroids[idx] = new_centroid;
         }
@@ -124,14 +126,19 @@ fn max_lloyd_centroids(dimension: u32, bit_width: u8) -> Vec<f32> {
         }
     }
 
+    #[expect(
+        clippy::cast_possible_truncation,
+        reason = "all values are in [-1, 1] so this just loses precision"
+    )]
     centroids.into_iter().map(|val| val as f32).collect()
 }
 
 /// Compute the conditional mean of the coordinate distribution on interval [lo, hi].
 ///
-/// Returns `E[X | lo <= X <= hi]` where X has PDF proportional to `(1 - x^2)^exponent`
-/// on [-1, 1].
-fn conditional_mean(lo: f64, hi: f64, exponent: HalfIntExponent) -> f64 {
+/// Returns `E[X | lo <= X <= hi]` where X has PDF proportional to `(1 - x^2)^exponent` on [-1, 1].
+///
+/// Since there is no closed form for the integrals, we compute this numerically.
+fn mean_between_centroids(lo: f64, hi: f64, exponent: HalfIntExponent) -> f64 {
     if (hi - lo).abs() < 1e-15 {
         return (lo + hi) / 2.0;
     }
@@ -164,9 +171,9 @@ fn conditional_mean(lo: f64, hi: f64, exponent: HalfIntExponent) -> f64 {
 
 /// Unnormalized PDF of the coordinate distribution: `(1 - x^2)^exponent`.
 ///
-/// Uses `powi` + `sqrt` instead of `powf` for the half-integer exponents
-/// that arise from `(d-3)/2`. This is significantly faster than the general
-/// `powf` which goes through `exp(exponent * ln(base))`.
+/// Uses `powi` + `sqrt` instead of `powf` for the half-integer exponents that arise from `(d-3)/2`.
+/// This is significantly faster than the general `powf` which goes through
+/// `exp(exponent * ln(base))`.
 #[inline]
 fn pdf_unnormalized(x_val: f64, exponent: HalfIntExponent) -> f64 {
     let base = (1.0 - x_val * x_val).max(0.0);
@@ -182,10 +189,10 @@ fn pdf_unnormalized(x_val: f64, exponent: HalfIntExponent) -> f64 {
 
 /// Precompute decision boundaries (midpoints between adjacent centroids).
 ///
-/// For `k` centroids, returns `k-1` boundaries. A value below `boundaries[0]` maps
-/// to centroid 0, a value in `[boundaries[i-1], boundaries[i])` maps to centroid `i`,
-/// and a value >= `boundaries[k-2]` maps to centroid `k-1`.
-pub fn compute_boundaries(centroids: &[f32]) -> Vec<f32> {
+/// For `k` centroids, returns `k-1` boundaries. A value below `boundaries[0]` maps to centroid 0, a
+/// value in `[boundaries[i-1], boundaries[i])` maps to centroid `i`, and a
+/// value `>= boundaries[k-2]` maps to centroid `k-1`.
+pub fn compute_centroid_boundaries(centroids: &[f32]) -> Vec<f32> {
     centroids.windows(2).map(|w| (w[0] + w[1]) * 0.5).collect()
 }
 
@@ -195,14 +202,21 @@ pub fn compute_boundaries(centroids: &[f32]) -> Vec<f32> {
 /// centroids. Uses binary search on the midpoints, avoiding distance comparisons
 /// in the inner loop.
 #[inline]
-#[allow(clippy::cast_possible_truncation)] // bounded by num_centroids <= 256
 pub fn find_nearest_centroid(value: f32, boundaries: &[f32]) -> u8 {
     debug_assert!(
         boundaries.windows(2).all(|w| w[0] <= w[1]),
         "boundaries must be sorted"
     );
+    debug_assert!(
+        boundaries.len() <= 256, // 1 << 8
+        "boundaries must be sorted"
+    );
 
-    boundaries.partition_point(|&b| b < value) as u8
+    #[expect(
+        clippy::cast_possible_truncation,
+        reason = "num_centroids <= 256 and partition_point will return at most 255"
+    )]
+    (boundaries.partition_point(|&b| b < value) as u8)
 }
 
 #[cfg(test)]
@@ -294,7 +308,7 @@ mod tests {
     #[test]
     fn find_nearest_basic() -> VortexResult<()> {
         let centroids = get_centroids(128, 2)?;
-        let boundaries = compute_boundaries(&centroids);
+        let boundaries = compute_centroid_boundaries(&centroids);
         assert_eq!(find_nearest_centroid(-1.0, &boundaries), 0);
 
         let last_idx = (centroids.len() - 1) as u8;
diff --git a/vortex-tensor/src/encodings/turboquant/array/data.rs b/vortex-tensor/src/encodings/turboquant/array/data.rs
@@ -32,7 +32,7 @@ pub struct TurboQuantData {
     /// Stored as a convenience field to avoid repeatedly extracting it from `dtype`.
     pub(crate) dimension: u32,
 
-    /// The number of bits per coordinate (1-8), derived from `log2(centroids.len())`.
+    /// The number of bits per coordinate (0-8), derived from `log2(centroids.len())`.
     ///
     /// This is 0 for degenerate empty arrays.
     pub(crate) bit_width: u8,
@@ -56,6 +56,7 @@ impl TurboQuantData {
             bit_width <= 8,
             "bit_width is expected to be between 0 and 8, got {bit_width}"
         );
+
         Ok(Self {
             dimension,
             bit_width,
diff --git a/vortex-tensor/src/encodings/turboquant/array/mod.rs b/vortex-tensor/src/encodings/turboquant/array/mod.rs
@@ -11,15 +11,3 @@ pub(crate) mod centroids;
 pub(crate) mod rotation;
 
 pub(crate) mod scheme;
-
-use num_traits::Float;
-use num_traits::FromPrimitive;
-use vortex_error::VortexExpect;
-
-/// Convert an f32 value to a float type `T`.
-///
-/// `FromPrimitive::from_f32` is infallible for all Vortex float types: f16 saturates via the
-/// inherent `f16::from_f32()`, f32 is identity, f64 is lossless widening.
-pub(crate) fn float_from_f32<T: Float + FromPrimitive>(v: f32) -> T {
-    FromPrimitive::from_f32(v).vortex_expect("f32-to-float conversion is infallible")
-}
diff --git a/vortex-tensor/src/encodings/turboquant/array/scheme.rs b/vortex-tensor/src/encodings/turboquant/array/scheme.rs
@@ -5,6 +5,7 @@
 
 use vortex_array::ArrayRef;
 use vortex_array::Canonical;
+use vortex_array::arrays::Extension;
 use vortex_compressor::CascadingCompressor;
 use vortex_compressor::ctx::CompressorContext;
 use vortex_compressor::scheme::Scheme;
@@ -75,11 +76,13 @@ impl Scheme for TurboQuantScheme {
         data: &mut ArrayAndStats,
         _ctx: CompressorContext,
     ) -> VortexResult<ArrayRef> {
-        // TODO(connor): Fix this once we ensure that the data array is always canonical.
-        let ext_array = data.array().to_canonical()?.into_extension();
+        let ext_array = data
+            .array()
+            .as_opt::<Extension>()
+            .vortex_expect("expected an extension array");
 
         let config = TurboQuantConfig::default();
-        turboquant_encode(&ext_array, &config, &mut compressor.execution_ctx())
+        turboquant_encode(ext_array, &config, &mut compressor.execution_ctx())
     }
 }
 
diff --git a/vortex-tensor/src/encodings/turboquant/compress.rs b/vortex-tensor/src/encodings/turboquant/compress.rs
@@ -5,9 +5,10 @@
 
 use num_traits::ToPrimitive;
 use vortex_array::ArrayRef;
+use vortex_array::ArrayView;
 use vortex_array::ExecutionCtx;
 use vortex_array::IntoArray;
-use vortex_array::arrays::ExtensionArray;
+use vortex_array::arrays::Extension;
 use vortex_array::arrays::FixedSizeListArray;
 use vortex_array::arrays::PrimitiveArray;
 use vortex_array::arrays::extension::ExtensionArrayExt;
@@ -25,7 +26,7 @@ use vortex_error::vortex_ensure;
 use vortex_fastlanes::bitpack_compress::bitpack_encode;
 
 use crate::encodings::turboquant::TurboQuant;
-use crate::encodings::turboquant::array::centroids::compute_boundaries;
+use crate::encodings::turboquant::array::centroids::compute_centroid_boundaries;
 use crate::encodings::turboquant::array::centroids::find_nearest_centroid;
 use crate::encodings::turboquant::array::centroids::get_centroids;
 use crate::encodings::turboquant::array::rotation::RotationMatrix;
@@ -95,7 +96,7 @@ struct QuantizationResult {
 /// all-zero codes.
 #[allow(clippy::cast_possible_truncation)]
 fn turboquant_quantize_core(
-    ext: &ExtensionArray,
+    ext: ArrayView<Extension>,
     fsl: &FixedSizeListArray,
     seed: u64,
     bit_width: u8,
@@ -130,7 +131,7 @@ fn turboquant_quantize_core(
     let f32_elements = extract_f32_elements(fsl)?;
 
     let centroids = get_centroids(padded_dim as u32, bit_width)?;
-    let boundaries = compute_boundaries(&centroids);
+    let boundaries = compute_centroid_boundaries(&centroids);
 
     let mut all_indices = BufferMut::<u8>::with_capacity(num_rows * padded_dim);
     let mut padded = vec![0.0f32; padded_dim];
@@ -213,7 +214,7 @@ fn build_turboquant(
 /// Nullable inputs are supported: null vectors get all-zero codes and null norms. The validity
 /// of the resulting TurboQuant array is carried by the norms child.
 pub fn turboquant_encode(
-    ext: &ExtensionArray,
+    ext: ArrayView<Extension>,
     config: &TurboQuantConfig,
     ctx: &mut ExecutionCtx,
 ) -> VortexResult<ArrayRef> {
diff --git a/vortex-tensor/src/encodings/turboquant/compute/cosine_similarity.rs b/vortex-tensor/src/encodings/turboquant/compute/cosine_similarity.rs
@@ -44,7 +44,7 @@ use vortex_error::vortex_ensure_eq;
 
 use crate::encodings::turboquant::TurboQuant;
 use crate::encodings::turboquant::TurboQuantArrayExt;
-use crate::encodings::turboquant::array::float_from_f32;
+use crate::encodings::turboquant::compute::float_from_f32;
 use crate::vector::AnyVector;
 
 /// Compute the per-row unit-norm dot products in f32 (centroids are always f32).
diff --git a/vortex-tensor/src/encodings/turboquant/compute/mod.rs b/vortex-tensor/src/encodings/turboquant/compute/mod.rs
@@ -4,7 +4,21 @@
 //! Compute pushdown implementations for TurboQuant.
 
 pub(crate) mod cosine_similarity;
+
 mod ops;
-pub(crate) mod rules;
 mod slice;
 mod take;
+
+pub(crate) mod rules;
+
+use num_traits::Float;
+use num_traits::FromPrimitive;
+use vortex_error::VortexExpect;
+
+/// Convert an f32 value to a float type `T`.
+///
+/// `FromPrimitive::from_f32` is infallible for all Vortex float types: f16 saturates via the
+/// inherent `f16::from_f32()`, f32 is identity, f64 is lossless widening.
+pub(crate) fn float_from_f32<T: Float + FromPrimitive>(v: f32) -> T {
+    FromPrimitive::from_f32(v).vortex_expect("f32-to-float conversion is infallible")
+}
diff --git a/vortex-tensor/src/encodings/turboquant/decompress.rs b/vortex-tensor/src/encodings/turboquant/decompress.rs
@@ -22,8 +22,8 @@ use vortex_error::VortexResult;
 
 use crate::encodings::turboquant::TurboQuant;
 use crate::encodings::turboquant::TurboQuantArrayExt;
-use crate::encodings::turboquant::array::float_from_f32;
 use crate::encodings::turboquant::array::rotation::RotationMatrix;
+use crate::encodings::turboquant::compute::float_from_f32;
 use crate::vector::AnyVector;
 
 /// Decompress a `TurboQuantArray` into a [`Vector`] extension array.
diff --git a/vortex-tensor/src/encodings/turboquant/metadata.rs b/vortex-tensor/src/encodings/turboquant/metadata.rs
@@ -11,7 +11,7 @@ use vortex_error::vortex_err;
 /// Serialized metadata for TurboQuant arrays.
 #[derive(Clone, PartialEq, Message)]
 pub(super) struct TurboQuantMetadata {
-    /// The number of bits per coordinate.
+    /// The number of bits per coordinate, which must be <= 8.
     #[prost(uint32, required, tag = "1")]
     bit_width: u32,
 }
diff --git a/vortex-tensor/src/encodings/turboquant/tests.rs b/vortex-tensor/src/encodings/turboquant/tests.rs
diff --git a/vortex-tensor/src/encodings/turboquant/vtable.rs b/vortex-tensor/src/encodings/turboquant/vtable.rs
diff --git a/vortex/benches/single_encoding_throughput.rs b/vortex/benches/single_encoding_throughput.rs

Original file line number	Diff line number	Diff line change
`@@ -11,7 +11,7 @@ use vortex_error::vortex_err;`
`11`	`11`	`/// Serialized metadata for TurboQuant arrays.`
`12`	`12`	`#[derive(Clone, PartialEq, Message)]`
`13`	`13`	`pub(super) struct TurboQuantMetadata {`
`14`		`- /// The number of bits per coordinate.`
	`14`	`+ /// The number of bits per coordinate, which must be <= 8.`
`15`	`15`	`#[prost(uint32, required, tag = "1")]`
`16`	`16`	`bit_width: u32,`
`17`	`17`	`}`