use 'transform' instead of 'rotate'

connortsui20 · connortsui20 · commit f8c6551d6bea · 2026-05-07T17:23:10.000-04:00
Signed-off-by: Connor Tsui &lt;connor.tsui20@gmail.com&gt;
diff --git a/vortex-turboquant/src/centroids.rs b/vortex-turboquant/src/centroids.rs
@@ -4,20 +4,21 @@
 //! Max-Lloyd centroid computation for TurboQuant scalar quantizers.
 //!
 //! Pre-computes and caches optimal scalar quantizer centroids for the marginal distribution of
-//! coordinates after random rotation of a unit-norm vector.
+//! coordinates after a random orthogonal transform of a unit-norm vector.
 //!
-//! In high dimensions, each coordinate of a randomly rotated unit vector follows a distribution
-//! proportional to `(1 - x^2)^((d-3)/2)` on `[-1, 1]`, which converges to `N(0, 1/d)`.
+//! In high dimensions, each coordinate of a randomly transformed unit vector follows a
+//! distribution proportional to `(1 - x^2)^((d-3)/2)` on `[-1, 1]`, which converges to
+//! `N(0, 1/d)`.
 //!
 //! The Max-Lloyd algorithm finds optimal quantization centroids that minimize MSE for this
 //! distribution.
 //!
 //! Centroids are not stored in TurboQuant arrays. They are deterministically derived from
 //! `(padded_dim, bit_width)` and cached process-locally.
 //!
-//! The centroid model follows the random-rotation marginal used by the TurboQuant paper. This
-//! encoder applies a SORF-style structured rotation instead of a dense random Gaussian or
-//! orthogonal matrix, so paper-level error bounds should not be treated as verified for this
+//! The centroid model follows the random orthogonal transform marginal used by the TurboQuant
+//! paper. This encoder applies a SORF-style structured transform instead of a dense random Gaussian
+//! or orthogonal matrix, so paper-level error bounds should not be treated as verified for this
 //! implementation without separate empirical validation.
 
 use std::sync::LazyLock;
@@ -47,7 +48,7 @@ static CENTROID_CACHE: LazyLock<DashMap<(u32, u8), Buffer<f32>>> = LazyLock::new
 /// Get or compute cached centroids for the given dimension and bit width.
 ///
 /// Returns `2^bit_width` centroids sorted in ascending order, representing optimal scalar
-/// quantization levels for the coordinate distribution after random rotation in
+/// quantization levels for the coordinate distribution after a random orthogonal transform in
 /// `dimension`-dimensional space.
 pub(crate) fn compute_or_get_centroids(dimension: u32, bit_width: u8) -> VortexResult<Buffer<f32>> {
     vortex_ensure!(
@@ -99,8 +100,8 @@ impl HalfIntExponent {
 
 /// Compute optimal centroids via the Max-Lloyd (Lloyd-Max) algorithm.
 ///
-/// Operates on the marginal distribution of a single coordinate of a randomly rotated unit vector
-/// in d dimensions.
+/// Operates on the marginal distribution of a single coordinate of a randomly transformed unit
+/// vector in d dimensions.
 ///
 /// The probability distribution function is:
 ///   `f(x) = C_d * (1 - x^2)^((d-3)/2)` on `[-1, 1]`
diff --git a/vortex-turboquant/src/config.rs b/vortex-turboquant/src/config.rs
@@ -49,12 +49,12 @@ impl TurboQuantConfig {
         self.bit_width
     }
 
-    /// Seed used to derive the deterministic SORF rotation.
+    /// Seed used to derive the deterministic SORF transform.
     pub fn seed(&self) -> u64 {
         self.seed
     }
 
-    /// Number of sign-diagonal plus Walsh-Hadamard rounds in the SORF rotation.
+    /// Number of sign-diagonal plus Walsh-Hadamard rounds in the SORF transform.
     pub fn num_rounds(&self) -> u8 {
         self.num_rounds
     }
diff --git a/vortex-turboquant/src/lib.rs b/vortex-turboquant/src/lib.rs
@@ -6,20 +6,20 @@
 //! Implements a Stage 1 TurboQuant encoding ([arXiv:2504.19874], [RFC 0033]) for lossy compression
 //! of high-dimensional vector data. The extension operates on
 //! [`Vector`](vortex_tensor::vector::Vector) extension arrays, packing their `FixedSizeList`
-//! storage into quantized codes after a structured orthogonal surrogate rotation.
+//! storage into quantized codes after a structured orthogonal surrogate transform.
 //!
 //! [arXiv:2504.19874]: https://arxiv.org/abs/2504.19874
 //! [RFC 0033]: https://vortex-data.github.io/rfcs/rfc/0033.html
 //!
 //! # Overview
 //!
 //! TurboQuant minimizes mean-squared reconstruction error (1-8 bits per coordinate)
-//! using MSE-optimal scalar quantization on coordinates of a rotated unit vector.
+//! using MSE-optimal scalar quantization on coordinates of a transformed unit vector.
 //!
 //! The [`turboquant_pack()`] path first computes and stores the original L2 norm for each vector
-//! row, then normalizes each valid nonzero row internally before SORF rotation and scalar
+//! row, then normalizes each valid nonzero row internally before SORF transform and scalar
 //! quantization. The [`turboquant_unpack()`] path dequantizes through deterministic centroids,
-//! applies the inverse SORF rotation, truncates back to the original dimension, and re-applies the
+//! applies the inverse SORF transform, truncates back to the original dimension, and re-applies the
 //! stored norm.
 //!
 //! [`turboquant_pack()`]: crate::turboquant_pack
@@ -37,7 +37,7 @@
 //! ```
 //!
 //! Stored norms are authoritative for future TurboQuant-aware scalar functions. Decoded quantized
-//! directions are not guaranteed to have unit norm after scalar quantization and inverse rotation.
+//! directions are not guaranteed to have unit norm after scalar quantization and inverse transform.
 //!
 //! # Source map
 //!
@@ -47,10 +47,10 @@
 //!   validity for null vectors.
 //! - `vector/normalize.rs`: TurboQuant-local normalization and how it differs from the tensor
 //!   crate's null-row zeroing helper.
-//! - `vector/quantize.rs`: SORF rotation, centroid lookup, and why invalid rows are skipped rather
+//! - `vector/quantize.rs`: SORF transform, centroid lookup, and why invalid rows are skipped rather
 //!   than quantized.
 //! - `centroids.rs`: deterministic Max-Lloyd centroid computation and process-local caching.
-//! - `sorf/`: the Walsh-Hadamard-based structured rotation and the stable SplitMix64 sign stream.
+//! - `sorf/`: the Walsh-Hadamard-based structured transform and the stable SplitMix64 sign stream.
 //!
 //! The current encoding is intentionally MSE-only. It does not yet implement the paper's QJL
 //! residual correction for unbiased inner-product estimation, and it still uses internal
diff --git a/vortex-turboquant/src/sorf/transform.rs b/vortex-turboquant/src/sorf/transform.rs
@@ -28,7 +28,7 @@
 //! This makes SORF sign generation stable as an extension format contract even if external RNG
 //! implementations change.
 //!
-//! This transform is the crate's practical structured-rotation choice for TurboQuant. It is not
+//! This transform is the crate's practical structured transform choice for TurboQuant. It is not
 //! the dense random Gaussian or orthogonal matrix used by some theoretical analyses, so theoretical
 //! bounds from those models need separate validation before being presented as implementation
 //! guarantees.
@@ -118,7 +118,7 @@ impl SorfMatrix {
 
     /// Returns the padded dimension (next power of 2 >= dim).
     ///
-    /// All `rotate`/`inverse_rotate` buffers must be this length.
+    /// All `transform`/`inverse_transform` buffers must be this length.
     pub(crate) fn padded_dim(&self) -> usize {
         self.padded_dim
     }
@@ -127,7 +127,7 @@ impl SorfMatrix {
     ///
     /// Both `input` and `output` must have length [`padded_dim()`](Self::padded_dim). The caller is
     /// responsible for zero-padding input beyond `dim` positions.
-    pub(crate) fn rotate(&self, input: &[f32], output: &mut [f32]) {
+    pub(crate) fn transform(&self, input: &[f32], output: &mut [f32]) {
         debug_assert_eq!(input.len(), self.padded_dim);
         debug_assert_eq!(output.len(), self.padded_dim);
 
@@ -138,7 +138,7 @@ impl SorfMatrix {
     /// Apply the inverse orthogonal transform: `output = R⁻¹(input)`.
     ///
     /// Both `input` and `output` must have length `padded_dim()`.
-    pub(crate) fn inverse_rotate(&self, input: &[f32], output: &mut [f32]) {
+    pub(crate) fn inverse_transform(&self, input: &[f32], output: &mut [f32]) {
         debug_assert_eq!(input.len(), self.padded_dim);
         debug_assert_eq!(output.len(), self.padded_dim);
 
@@ -355,9 +355,9 @@ mod tests {
         let padded_dim = dim_to_usize(64u32);
         let num_rounds = rounds_to_usize(3u8);
         let seed = 42u64;
-        let r1 = SorfMatrix::try_new(padded_dim, num_rounds, seed)?;
-        let r2 = SorfMatrix::try_new(padded_dim, num_rounds, seed)?;
-        let pd = r1.padded_dim();
+        let transform1 = SorfMatrix::try_new(padded_dim, num_rounds, seed)?;
+        let transform2 = SorfMatrix::try_new(padded_dim, num_rounds, seed)?;
+        let pd = transform1.padded_dim();
 
         let mut input = vec![0.0f32; pd];
         for i in 0..padded_dim {
@@ -366,8 +366,8 @@ mod tests {
         let mut out1 = vec![0.0f32; pd];
         let mut out2 = vec![0.0f32; pd];
 
-        r1.rotate(&input, &mut out1);
-        r2.rotate(&input, &mut out2);
+        transform1.transform(&input, &mut out1);
+        transform2.transform(&input, &mut out2);
 
         assert_eq!(out1, out2);
         Ok(())
@@ -378,8 +378,8 @@ mod tests {
         let padded_dim = dim_to_usize(64u32);
         let num_rounds = rounds_to_usize(2u8);
         let seed = 42u64;
-        let rot = SorfMatrix::try_new(padded_dim, num_rounds, seed)?;
-        let actual = rot.export_inverse_signs_u8();
+        let transform = SorfMatrix::try_new(padded_dim, num_rounds, seed)?;
+        let actual = transform.export_inverse_signs_u8();
         let mut rng = SplitMix64::new(seed);
         let round0_word = rng.next_u64();
         let round1_word = rng.next_u64();
@@ -446,18 +446,18 @@ mod tests {
     fn roundtrip_exact(#[case] dim: u32, #[case] num_rounds: u8) -> VortexResult<()> {
         let dim = dim_to_usize(dim);
         let num_rounds = rounds_to_usize(num_rounds);
-        let rot = SorfMatrix::try_new(dim.next_power_of_two(), num_rounds, 42u64)?;
-        let padded_dim = rot.padded_dim();
+        let transform = SorfMatrix::try_new(dim.next_power_of_two(), num_rounds, 42u64)?;
+        let padded_dim = transform.padded_dim();
 
         let mut input = vec![0.0f32; padded_dim];
         for i in 0..dim {
             input[i] = (i as f32 + 1.0) * 0.01;
         }
-        let mut rotated = vec![0.0f32; padded_dim];
+        let mut transformed = vec![0.0f32; padded_dim];
         let mut recovered = vec![0.0f32; padded_dim];
 
-        rot.rotate(&input, &mut rotated);
-        rot.inverse_rotate(&rotated, &mut recovered);
+        transform.transform(&input, &mut transformed);
+        transform.inverse_transform(&transformed, &mut recovered);
 
         let max_err: f32 = input
             .iter()
@@ -484,25 +484,25 @@ mod tests {
     fn preserves_norm(#[case] dim: u32, #[case] num_rounds: u8) -> VortexResult<()> {
         let dim = dim_to_usize(dim);
         let num_rounds = rounds_to_usize(num_rounds);
-        let rot = SorfMatrix::try_new(dim.next_power_of_two(), num_rounds, 7u64)?;
-        let padded_dim = rot.padded_dim();
+        let transform = SorfMatrix::try_new(dim.next_power_of_two(), num_rounds, 7u64)?;
+        let padded_dim = transform.padded_dim();
 
         let mut input = vec![0.0f32; padded_dim];
         for i in 0..dim {
             input[i] = (i as f32) * 0.01;
         }
         let input_norm: f32 = input.iter().map(|x| x * x).sum::<f32>().sqrt();
 
-        let mut rotated = vec![0.0f32; padded_dim];
-        rot.rotate(&input, &mut rotated);
-        let rotated_norm: f32 = rotated.iter().map(|x| x * x).sum::<f32>().sqrt();
+        let mut transformed = vec![0.0f32; padded_dim];
+        transform.transform(&input, &mut transformed);
+        let transformed_norm: f32 = transformed.iter().map(|x| x * x).sum::<f32>().sqrt();
 
         assert!(
-            (input_norm - rotated_norm).abs() / input_norm < 1e-5,
+            (input_norm - transformed_norm).abs() / input_norm < 1e-5,
             "norm not preserved for dim={dim}: {} vs {} (rel err: {:.2e})",
             input_norm,
-            rotated_norm,
-            (input_norm - rotated_norm).abs() / input_norm
+            transformed_norm,
+            (input_norm - transformed_norm).abs() / input_norm
         );
         Ok(())
     }
@@ -517,11 +517,11 @@ mod tests {
     fn sign_export_import_roundtrip(#[case] dim: u32, #[case] num_rounds: u8) -> VortexResult<()> {
         let dim = dim_to_usize(dim);
         let num_rounds = rounds_to_usize(num_rounds);
-        let rot = SorfMatrix::try_new(dim.next_power_of_two(), num_rounds, 42u64)?;
-        let padded_dim = rot.padded_dim();
+        let transform = SorfMatrix::try_new(dim.next_power_of_two(), num_rounds, 42u64)?;
+        let padded_dim = transform.padded_dim();
 
-        let signs_u8 = rot.export_inverse_signs_u8();
-        let rot2 = SorfMatrix::from_u8_slice(&signs_u8, dim, num_rounds)?;
+        let signs_u8 = transform.export_inverse_signs_u8();
+        let transform2 = SorfMatrix::from_u8_slice(&signs_u8, dim, num_rounds)?;
 
         let mut input = vec![0.0f32; padded_dim];
         for i in 0..dim {
@@ -530,13 +530,13 @@ mod tests {
 
         let mut out1 = vec![0.0f32; padded_dim];
         let mut out2 = vec![0.0f32; padded_dim];
-        rot.rotate(&input, &mut out1);
-        rot2.rotate(&input, &mut out2);
+        transform.transform(&input, &mut out1);
+        transform2.transform(&input, &mut out2);
         assert_eq!(out1, out2, "Forward transform mismatch after export/import");
 
-        rot.inverse_rotate(&out1, &mut out2);
+        transform.inverse_transform(&out1, &mut out2);
         let mut out3 = vec![0.0f32; padded_dim];
-        rot2.inverse_rotate(&out1, &mut out3);
+        transform2.inverse_transform(&out1, &mut out3);
         assert_eq!(out2, out3, "Inverse transform mismatch after export/import");
 
         Ok(())
diff --git a/vortex-turboquant/src/vector/pack.rs b/vortex-turboquant/src/vector/pack.rs
@@ -34,7 +34,7 @@ use crate::vtable::TurboQuantMetadata;
 
 /// Lossily pack a `Vector` extension array into a `TurboQuant` extension array.
 ///
-/// Valid rows are normalized internally before SORF rotation and scalar quantization. The original
+/// Valid rows are normalized internally before SORF transform and scalar quantization. The original
 /// row norms are stored explicitly, and original vector nulls are preserved on the storage struct
 /// and both row-aligned child arrays.
 pub fn turboquant_pack(
diff --git a/vortex-turboquant/src/vector/quantize.rs b/vortex-turboquant/src/vector/quantize.rs
@@ -3,11 +3,11 @@
 
 //! Core TurboQuant quantization helpers.
 //!
-//! Quantization consumes the TurboQuant-local normalized `Vector` child. Valid rows are rotated and
-//! mapped to scalar centroid indices. Invalid rows remain in the full-length output but are
+//! Quantization consumes the TurboQuant-local normalized `Vector` child. Valid rows are transformed
+//! and mapped to scalar centroid indices. Invalid rows remain in the full-length output but are
 //! skipped: their physical code bytes are placeholders guarded by the `codes` row validity.
 //!
-//! This matters because TurboQuant's scalar codebook is optimized for coordinates of rotated
+//! This matters because TurboQuant's scalar codebook is optimized for coordinates of transformed
 //! unit-norm vectors. The codebook does not generally contain an exact zero centroid, and a
 //! physical code byte of `0` means "centroid 0", not "zero coordinate". Null vectors therefore
 //! should not be converted to zero vectors and fed through the quantizer.
@@ -44,13 +44,13 @@ pub(crate) fn empty_quantization(padded_dim: usize) -> QuantizationResult {
     }
 }
 
-/// Core quantization: rotate and quantize already-normalized rows.
+/// Core quantization: transform and quantize already-normalized rows.
 ///
 /// # Safety
 ///
 /// The input `fsl` must contain unit-norm vectors (already L2-normalized) for every valid row.
-/// Invalid rows are left row-aligned in the output but are not rotated or quantized. The rotation
-/// and centroid lookup happen in f32.
+/// Invalid rows are left row-aligned in the output but are not transformed or quantized. The
+/// transform and centroid lookup happen in f32.
 pub(crate) unsafe fn turboquant_quantize_core(
     fsl: &FixedSizeListArray,
     config: &TurboQuantConfig,
@@ -60,8 +60,9 @@ pub(crate) unsafe fn turboquant_quantize_core(
     let num_vectors = fsl.len();
     let padded_dim = tq_padded_dim(dimension)?;
 
-    let rotation = SorfMatrix::try_new(padded_dim, config.num_rounds() as usize, config.seed())?;
-    debug_assert_eq!(rotation.padded_dim(), padded_dim);
+    let sorf_transform =
+        SorfMatrix::try_new(padded_dim, config.num_rounds() as usize, config.seed())?;
+    debug_assert_eq!(sorf_transform.padded_dim(), padded_dim);
     let padded_dim_u32 = u32::try_from(padded_dim)
         .map_err(|_| vortex_err!("TurboQuant padded dimension does not fit u32"))?;
 
@@ -78,7 +79,7 @@ pub(crate) unsafe fn turboquant_quantize_core(
         .ok_or_else(|| vortex_err!("TurboQuant codes length overflow"))?;
     let mut all_indices = BufferMut::<u8>::with_capacity(codes_len);
     let mut padded = vec![0.0f32; padded_dim];
-    let mut rotated = vec![0.0f32; padded_dim];
+    let mut transformed = vec![0.0f32; padded_dim];
 
     let f32_slice = f32_elements.as_slice();
     let dimension = dimension as usize;
@@ -100,11 +101,11 @@ pub(crate) unsafe fn turboquant_quantize_core(
         padded[..dimension].copy_from_slice(x);
         padded[dimension..].fill(0.0);
 
-        rotation.rotate(&padded, &mut rotated);
+        sorf_transform.transform(&padded, &mut transformed);
 
         // SAFETY: `all_indices` was allocated with capacity `codes_len`, and the loop appends
         // exactly `padded_dim` codes for each of `num_vectors` iterations.
-        for &value in rotated.iter() {
+        for &value in transformed.iter() {
             unsafe { all_indices.push_unchecked(find_nearest_centroid(value, &boundaries)) };
         }
     }
diff --git a/vortex-turboquant/src/vector/unpack.rs b/vortex-turboquant/src/vector/unpack.rs
diff --git a/vortex-turboquant/src/vtable.rs b/vortex-turboquant/src/vtable.rs

Original file line number	Diff line number	Diff line change
`@@ -49,12 +49,12 @@ impl TurboQuantConfig {`
`49`	`49`	`self.bit_width`
`50`	`50`	`}`
`51`	`51`
`52`		`- /// Seed used to derive the deterministic SORF rotation.`
	`52`	`+ /// Seed used to derive the deterministic SORF transform.`
`53`	`53`	`pub fn seed(&self) -> u64 {`
`54`	`54`	`self.seed`
`55`	`55`	`}`
`56`	`56`
`57`		`- /// Number of sign-diagonal plus Walsh-Hadamard rounds in the SORF rotation.`
	`57`	`+ /// Number of sign-diagonal plus Walsh-Hadamard rounds in the SORF transform.`
`58`	`58`	`pub fn num_rounds(&self) -> u8 {`
`59`	`59`	`self.num_rounds`
`60`	`60`	`}`