-
Notifications
You must be signed in to change notification settings - Fork 160
Expand file tree
/
Copy pathmod.rs
More file actions
127 lines (119 loc) · 5.2 KB
/
mod.rs
File metadata and controls
127 lines (119 loc) · 5.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors
//! TurboQuant vector quantization encoding for Vortex.
//!
//! Implements a Stage 1 TurboQuant encoding ([arXiv:2504.19874], [RFC 0033]) for lossy
//! compression of high-dimensional vector data. The encoding operates on [`Vector`] extension
//! arrays, compressing their `FixedSizeList` storage into quantized codes after a structured
//! orthogonal surrogate rotation.
//!
//! [arXiv:2504.19874]: https://arxiv.org/abs/2504.19874
//! [RFC 0033]: https://vortex-data.github.io/rfcs/rfc/0033.html
//! [`Vector`]: crate::vector::Vector
//!
//! # Overview
//!
//! TurboQuant minimizes mean-squared reconstruction error (1-8 bits per coordinate)
//! using MSE-optimal scalar quantization on coordinates of a rotated unit vector.
//!
//! The TurboQuant paper analyzes a full random orthogonal rotation. The current Vortex
//! implementation instead uses a fixed 3-round Walsh-Hadamard-based structured transform with
//! random sign diagonals. This is a practical approximation chosen for encode/decode efficiency,
//! and should be understood as an implementation choice rather than the exact construction used in
//! the paper's proofs.
//!
//! The current encoding is also intentionally MSE-only. It does not yet implement the paper's QJL
//! residual correction for unbiased inner-product estimation, and it still uses internal
//! power-of-2 padding rather than the block decomposition proposed in RFC 0033.
//!
//! # Theoretical error bounds
//!
//! For unit-norm vectors quantized at `b` bits per coordinate, the paper's Theorem 1
//! guarantees normalized MSE distortion:
//!
//! > `E[||x - x_hat||^2 / ||x||^2] <= (sqrt(3) * pi / 2) / 4^b`
//!
//! | Bits | MSE bound | Quality |
//! |------|------------|-------------------|
//! | 1 | 6.80e-01 | Poor |
//! | 2 | 1.70e-01 | Usable for ANN |
//! | 3 | 4.25e-02 | Good |
//! | 4 | 1.06e-02 | Very good |
//! | 5 | 2.66e-03 | Excellent |
//! | 6 | 6.64e-04 | Near-lossless |
//! | 7 | 1.66e-04 | Near-lossless |
//! | 8 | 4.15e-05 | Near-lossless |
//!
//! # Compression ratios
//!
//! Each vector is stored as `padded_dim * bit_width / 8` bytes of quantized codes plus one stored
//! norm. In the current implementation, that norm uses the vector's element float type, not a
//! separate fixed storage precision. Non-power-of-2 dimensions are padded to the next power of 2
//! for the structured rotation, which reduces the effective ratio for those dimensions.
//!
//! The table below assumes f32 input, so the stored norm is 4 bytes.
//!
//! | dim | padded | bits | f32 bytes | TQ bytes | ratio |
//! |------|--------|------|-----------|----------|--------|
//! | 768 | 1024 | 2 | 3072 | 260 | 11.8x |
//! | 1024 | 1024 | 2 | 4096 | 260 | 15.8x |
//! | 768 | 1024 | 4 | 3072 | 516 | 6.0x |
//! | 1024 | 1024 | 4 | 4096 | 516 | 7.9x |
//! | 768 | 1024 | 8 | 3072 | 1028 | 3.0x |
//! | 1024 | 1024 | 8 | 4096 | 1028 | 4.0x |
//!
//! # Example
//!
//! ```
//! use vortex_array::IntoArray;
//! use vortex_array::VortexSessionExecute;
//! use vortex_array::arrays::ExtensionArray;
//! use vortex_array::arrays::FixedSizeListArray;
//! use vortex_array::arrays::PrimitiveArray;
//! use vortex_array::dtype::extension::ExtDType;
//! use vortex_array::extension::EmptyMetadata;
//! use vortex_array::validity::Validity;
//! use vortex_buffer::BufferMut;
//! use vortex_array::session::ArraySession;
//! use vortex_session::VortexSession;
//! use vortex_tensor::encodings::turboquant::{TurboQuantConfig, turboquant_encode};
//! use vortex_tensor::vector::Vector;
//!
//! // Create a Vector extension array of 100 random 128-d vectors.
//! let num_rows = 100;
//! let dim = 128u32;
//! let mut buf = BufferMut::<f32>::with_capacity(num_rows * dim as usize);
//! for i in 0..(num_rows * dim as usize) {
//! buf.push((i as f32 * 0.001).sin());
//! }
//! let elements = PrimitiveArray::new::<f32>(buf.freeze(), Validity::NonNullable);
//! let fsl = FixedSizeListArray::try_new(
//! elements.into_array(), dim, Validity::NonNullable, num_rows,
//! ).unwrap();
//! let ext_dtype = ExtDType::<Vector>::try_new(EmptyMetadata, fsl.dtype().clone())
//! .unwrap().erased();
//! let ext = ExtensionArray::new(ext_dtype, fsl.into_array());
//!
//! // Quantize at 2 bits per coordinate.
//! let config = TurboQuantConfig { bit_width: 2, seed: Some(42), num_rounds: 3 };
//! let session = VortexSession::empty().with::<ArraySession>();
//! let mut ctx = session.create_execution_ctx();
//! let encoded = turboquant_encode(ext.as_view(), &config, &mut ctx).unwrap();
//!
//! // Verify compression: 100 vectors x 128 dims x 4 bytes = 51200 bytes input.
//! assert!(encoded.nbytes() < 51200);
//! ```
mod array;
pub use array::data::TurboQuantArrayExt;
pub use array::data::TurboQuantData;
pub(crate) mod compute;
mod metadata;
mod vtable;
pub use vtable::TurboQuant;
pub use vtable::TurboQuantArray;
mod scheme;
pub use scheme::TurboQuantScheme;
pub use scheme::compress::TurboQuantConfig;
pub use scheme::compress::turboquant_encode;
#[cfg(test)]
mod tests;