Skip to content

Commit f46567e

Browse files
lwwmanningclaude
authored andcommitted
TurboQuant encoding for Vectors (#7167)
Lossy quantization for vector data (e.g., embeddings) based on TurboQuant (https://arxiv.org/abs/2504.19874). Supports both MSE-optimal and inner-product-optimal (Prod with QJL correction) variants at 1-8 bits per coordinate. Key components: - Single TurboQuant array encoding with optional QJL correction fields, storing quantized codes, norms, centroids, and rotation signs as children. - Structured Random Hadamard Transform (SRHT) for O(d log d) rotation, fully self-contained with no external linear algebra library. - Max-Lloyd centroid computation on Beta(d/2, d/2) distribution. - Approximate cosine similarity and dot product compute directly on quantized arrays without full decompression. - Pluggable TurboQuantScheme for BtrBlocks, exposed via WriteStrategyBuilder::with_vector_quantization(). - Benchmarks covering common embedding dimensions (128, 768, 1024, 1536). Also refactors CompressingStrategy to a single constructor, and adds vortex_tensor::initialize() for session registration of tensor types, encodings, and scalar functions. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> Co-Authored-By: Will Manning <will@willmanning.io> Signed-off-by: Connor Tsui <connor.tsui20@gmail.com>
1 parent 02b0949 commit f46567e

File tree

29 files changed

+3655
-16
lines changed

29 files changed

+3655
-16
lines changed

Cargo.lock

Lines changed: 9 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

_typos.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[default]
2-
extend-ignore-identifiers-re = ["ffor", "FFOR", "FoR", "typ", "ratatui"]
2+
extend-ignore-identifiers-re = ["ffor", "FFOR", "FoR", "typ", "ratatui", "wht", "WHT"]
33
# We support a few common special comments to tell the checker to ignore sections of code
44
extend-ignore-re = [
55
"(#|//)\\s*spellchecker:ignore-next-line\\n.*", # Ignore the next line

vortex-btrblocks/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ vortex-pco = { workspace = true, optional = true }
3535
vortex-runend = { workspace = true }
3636
vortex-sequence = { workspace = true }
3737
vortex-sparse = { workspace = true }
38+
vortex-tensor = { workspace = true, optional = true }
3839
vortex-utils = { workspace = true }
3940
vortex-zigzag = { workspace = true }
4041
vortex-zstd = { workspace = true, optional = true }
@@ -47,7 +48,7 @@ vortex-array = { workspace = true, features = ["_test-harness"] }
4748

4849
[features]
4950
# This feature enabled unstable encodings for which we don't guarantee stability.
50-
unstable_encodings = ["vortex-zstd?/unstable_encodings"]
51+
unstable_encodings = ["dep:vortex-tensor", "vortex-zstd?/unstable_encodings"]
5152
pco = ["dep:pco", "dep:vortex-pco"]
5253
zstd = ["dep:vortex-zstd"]
5354

vortex-btrblocks/src/builder.rs

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ impl BtrBlocksCompressorBuilder {
120120
/// Adds compact encoding schemes (Zstd for strings, Pco for numerics).
121121
///
122122
/// This provides better compression ratios than the default, especially for floating-point
123-
/// heavy datasets. Requires the `zstd` feature. When the `pco` feature is also enabled,
123+
/// heavy datasets. Requires the `zstd` feature. When the `pco` rfeature is also enabled,
124124
/// Pco schemes for integers and floats are included.
125125
///
126126
/// # Panics
@@ -138,6 +138,23 @@ impl BtrBlocksCompressorBuilder {
138138
builder
139139
}
140140

141+
/// Adds the TurboQuant lossy vector quantization scheme.
142+
///
143+
/// When enabled, [`Vector`] extension arrays are compressed using the TurboQuant algorithm with
144+
/// QJL correction for unbiased inner product estimation.
145+
///
146+
/// # Panics
147+
///
148+
/// Panics if the TurboQuant scheme is already present.
149+
///
150+
/// [`Vector`]: vortex_tensor::vector::Vector
151+
/// [`FixedShapeTensor`]: vortex_tensor::fixed_shape::FixedShapeTensor
152+
#[cfg(feature = "unstable_encodings")]
153+
pub fn with_turboquant(self) -> Self {
154+
use vortex_tensor::encodings::turboquant::scheme::TURBOQUANT_SCHEME;
155+
self.with_new_scheme(&TURBOQUANT_SCHEME)
156+
}
157+
141158
/// Excludes schemes without CUDA kernel support and adds Zstd for string compression.
142159
///
143160
/// With the `unstable_encodings` feature, buffer-level Zstd compression is used which

vortex-file/src/strategy.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ use vortex_pco::Pco;
5656
use vortex_runend::RunEnd;
5757
use vortex_sequence::Sequence;
5858
use vortex_sparse::Sparse;
59+
#[cfg(feature = "unstable_encodings")]
60+
use vortex_tensor::encodings::turboquant::TurboQuant;
5961
use vortex_utils::aliases::hash_map::HashMap;
6062
use vortex_zigzag::ZigZag;
6163
#[cfg(feature = "zstd")]
@@ -104,6 +106,8 @@ pub static ALLOWED_ENCODINGS: LazyLock<ArrayRegistry> = LazyLock::new(|| {
104106
session.register(RunEnd);
105107
session.register(Sequence);
106108
session.register(Sparse);
109+
#[cfg(feature = "unstable_encodings")]
110+
session.register(TurboQuant);
107111
session.register(ZigZag);
108112

109113
#[cfg(feature = "zstd")]

vortex-layout/src/layouts/table.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,13 +86,14 @@ impl TableStrategy {
8686
/// ```ignore
8787
/// # use std::sync::Arc;
8888
/// # use vortex_array::dtype::{field_path, Field, FieldPath};
89+
/// # use vortex_btrblocks::BtrBlocksCompressor;
8990
/// # use vortex_layout::layouts::compressed::CompressingStrategy;
9091
/// # use vortex_layout::layouts::flat::writer::FlatLayoutStrategy;
9192
/// # use vortex_layout::layouts::table::TableStrategy;
9293
///
93-
/// # use vortex_btrblocks::BtrBlocksCompressor;
9494
/// // A strategy for compressing data using the balanced BtrBlocks compressor.
95-
/// let compress = CompressingStrategy::new(FlatLayoutStrategy::default(), BtrBlocksCompressor::default());
95+
/// let compress =
96+
/// CompressingStrategy::new(FlatLayoutStrategy::default(), BtrBlocksCompressor::default());
9697
///
9798
/// // Our combined strategy uses no compression for validity buffers, BtrBlocks compression
9899
/// // for most columns, and stores a nested binary column uncompressed (flat) because it

vortex-tensor/Cargo.toml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,18 @@ workspace = true
1919
[dependencies]
2020
vortex-array = { workspace = true }
2121
vortex-buffer = { workspace = true }
22+
vortex-compressor = { workspace = true }
2223
vortex-error = { workspace = true }
24+
vortex-fastlanes = { workspace = true }
2325
vortex-session = { workspace = true }
26+
vortex-utils = { workspace = true }
2427

28+
half = { workspace = true }
2529
itertools = { workspace = true }
2630
num-traits = { workspace = true }
2731
prost = { workspace = true }
32+
rand = { workspace = true }
2833

2934
[dev-dependencies]
35+
rand_distr = { workspace = true }
3036
rstest = { workspace = true }
31-
vortex-buffer = { workspace = true }

0 commit comments

Comments
 (0)