From ab15db25017d1612bdb9fa4bdc0d727618365944 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Mon, 16 Mar 2026 11:46:29 -0400 Subject: [PATCH] add high-level docs to vortex-btrblocks Signed-off-by: Connor Tsui --- vortex-btrblocks/src/compressor/mod.rs | 14 +++++++++++++- vortex-btrblocks/src/lib.rs | 16 ++++++++++++++++ vortex-btrblocks/src/scheme.rs | 19 ++++++++++++++++++- 3 files changed, 47 insertions(+), 2 deletions(-) diff --git a/vortex-btrblocks/src/compressor/mod.rs b/vortex-btrblocks/src/compressor/mod.rs index af59ad41bda..5c3a31271cd 100644 --- a/vortex-btrblocks/src/compressor/mod.rs +++ b/vortex-btrblocks/src/compressor/mod.rs @@ -1,7 +1,19 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -//! Compressor traits for type-specific compression. +//! Type-specific compressor traits that drive scheme selection and compression. +//! +//! [`Compressor`] defines the interface: generate statistics for an array via +//! [`Compressor::gen_stats`], and provide available [`Scheme`]s via [`Compressor::schemes`]. +//! +//! [`CompressorExt`] is blanket-implemented for all `Compressor`s and adds the core logic: +//! +//! - [`CompressorExt::choose_scheme`] iterates all schemes, skips excluded ones, and calls +//! [`Scheme::expected_compression_ratio`] on each. It returns the scheme with the highest ratio +//! above 1.0, or falls back to the default. See the [`scheme`](crate::scheme) module for how +//! ratio estimation works. +//! - [`CompressorExt::compress`] generates stats, calls `choose_scheme()`, and applies the +//! result. If compression did not shrink the array, the original is returned. use vortex_array::ArrayRef; use vortex_array::IntoArray; diff --git a/vortex-btrblocks/src/lib.rs b/vortex-btrblocks/src/lib.rs index 19130b47e32..28e4eeb8dfa 100644 --- a/vortex-btrblocks/src/lib.rs +++ b/vortex-btrblocks/src/lib.rs @@ -18,6 +18,22 @@ //! - **Statistical Analysis**: Uses data sampling and statistics to predict compression ratios //! - **Recursive Structure Handling**: Compresses nested structures like structs and lists //! +//! # How It Works +//! +//! [`BtrBlocksCompressor::compress()`] takes an `&ArrayRef` and returns an `ArrayRef` that may +//! use a different encoding. It first canonicalizes the input, then dispatches by type. +//! Primitives go to a type-specific `Compressor` (integer, float, or string). Compound types +//! like structs and lists recurse into their fields and elements. +//! +//! Each type-specific compressor holds a static list of `Scheme` implementations (e.g. +//! BitPacking, ALP, Dict). There is no dynamic registry. The compressor evaluates each scheme by +//! compressing a ~1% sample and measuring the ratio, then picks the best. See `SchemeExt` for +//! details on how sampling works. +//! +//! Schemes can produce arrays that are themselves further compressed (e.g. FoR then BitPacking), +//! up to `MAX_CASCADE` (3) layers deep. An `Excludes` set prevents the same scheme from being +//! applied twice in a chain. +//! //! # Example //! //! ```rust diff --git a/vortex-btrblocks/src/scheme.rs b/vortex-btrblocks/src/scheme.rs index 63cb4bad28a..1b12a5930e5 100644 --- a/vortex-btrblocks/src/scheme.rs +++ b/vortex-btrblocks/src/scheme.rs @@ -1,7 +1,24 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -//! Compression scheme traits. +//! Compression scheme traits. This is the interface each encoding implements to participate in +//! compression. +//! +//! [`Scheme`] is the core trait. Each encoding (e.g. BitPacking, ALP, Dict) implements it with +//! two key methods: [`Scheme::expected_compression_ratio`] to estimate how well it compresses +//! the data, and [`Scheme::compress`] to apply the encoding. Type-specific sub-traits +//! ([`IntegerScheme`], [`FloatScheme`], [`StringScheme`]) bind schemes to the appropriate stats +//! and code types. +//! +//! [`SchemeExt`] provides the default ratio estimation strategy. It samples ~1% of the array +//! (minimum [`SAMPLE_SIZE`] values), compresses the sample, and returns the before/after byte +//! ratio. Schemes can override [`Scheme::expected_compression_ratio`] if they have a cheaper +//! heuristic. +//! +//! [`IntegerScheme`]: crate::compressor::integer::IntegerScheme +//! [`FloatScheme`]: crate::compressor::float::FloatScheme +//! [`StringScheme`]: crate::compressor::string::StringScheme +//! [`SAMPLE_SIZE`]: crate::stats::SAMPLE_SIZE use std::fmt::Debug; use std::hash::Hash;