Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions encodings/fastlanes/public-api.lock
Original file line number Diff line number Diff line change
Expand Up @@ -526,6 +526,8 @@ impl vortex_array::hash::ArrayHash for vortex_fastlanes::RLEData

pub fn vortex_fastlanes::RLEData::array_hash<H: core::hash::Hasher>(&self, state: &mut H, _precision: vortex_array::hash::Precision)

pub static vortex_fastlanes::USE_EXPERIMENTAL_PATCHES: std::sync::lazy_lock::LazyLock<bool>

pub trait vortex_fastlanes::BitPackedArrayExt: vortex_array::array::typed::TypedArrayRef<vortex_fastlanes::BitPacked>

pub fn vortex_fastlanes::BitPackedArrayExt::bit_width(&self) -> u8
Expand Down
3 changes: 3 additions & 0 deletions encodings/fastlanes/src/bitpacking/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ pub use array::unpack_iter;

pub(crate) mod compute;

mod plugin;
mod vtable;

pub(crate) use plugin::BitPackedPatchedPlugin;
pub use vtable::BitPacked;
pub use vtable::BitPackedArray;
76 changes: 76 additions & 0 deletions encodings/fastlanes/src/bitpacking/plugin.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

//! A custom [`ArrayPlugin`] that lets you load in and deserialize a `BitPacked` array with interior
//! patches as a `PatchedArray` that wraps a patchless `BitPacked` array.
//!
//! This enables zero-cost backward compatibility with previously written datasets.

Comment on lines +4 to +8
Copy link

Copilot AI Apr 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This introduces a new deserialization pathway that conditionally rewrites BitPacked (with interior patches) into a Patched wrapper based on runtime configuration, but there’s no test exercising the behavior. Adding a unit/integration test that round-trips a BitPacked with patches through serialization/deserialization and asserts the resulting encoding (BitPacked vs Patched) would help prevent regressions.

Copilot uses AI. Check for mistakes.
use vortex_array::ArrayId;
use vortex_array::ArrayPlugin;
use vortex_array::ArrayRef;
use vortex_array::IntoArray;
use vortex_array::VortexSessionExecute;
use vortex_array::arrays::Patched;
use vortex_array::buffer::BufferHandle;
use vortex_array::dtype::DType;
use vortex_array::serde::ArrayChildren;
use vortex_error::VortexResult;
use vortex_error::vortex_err;
use vortex_session::VortexSession;

use crate::BitPacked;
use crate::BitPackedArrayExt;

/// Custom deserialization plugin that converts a BitPacked array with interior
/// Patches into a PatchedArray holding a BitPacked array.
#[derive(Debug, Clone)]
pub(crate) struct BitPackedPatchedPlugin;

impl ArrayPlugin for BitPackedPatchedPlugin {
fn id(&self) -> ArrayId {
// We reuse the existing `BitPacked` ID so that we can take over its
// deserialization pathway.
BitPacked::ID
}

fn deserialize(
&self,
dtype: &DType,
len: usize,
metadata: &[u8],
buffers: &[BufferHandle],
children: &dyn ArrayChildren,
session: &VortexSession,
) -> VortexResult<ArrayRef> {
let bitpacked = BitPacked
.deserialize(dtype, len, metadata, buffers, children, session)?
.try_downcast::<BitPacked>()
.map_err(|_| {
vortex_err!("BitPacked plugin should only deserialize vortex.bitpacked")
Copy link

Copilot AI Apr 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The error message mentions vortex.bitpacked, but this encoding’s ID is fastlanes.bitpacked (see BitPacked::ID). Using the correct ID (or formatting the BitPacked::ID value) will make failures easier to diagnose.

Suggested change
vortex_err!("BitPacked plugin should only deserialize vortex.bitpacked")
vortex_err!("BitPacked plugin should only deserialize {}", BitPacked::ID)

Copilot uses AI. Check for mistakes.
})?;

// Create a new BitPackedArray without the interior patches installed.
let Some(patches) = bitpacked.patches() else {
return Ok(bitpacked.into_array());
};

let packed = bitpacked.packed().clone();
let ptype = bitpacked.dtype().as_ptype();
let validity = bitpacked.validity()?;
let bw = bitpacked.bit_width;
Copy link

Copilot AI Apr 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This accesses bitpacked.bit_width directly even though BitPackedArrayExt is already in scope and provides bit_width(). Using the accessor avoids relying on internal struct fields and reduces the chance of breakage if the BitPacked data representation changes.

Suggested change
let bw = bitpacked.bit_width;
let bw = bitpacked.bit_width();

Copilot uses AI. Check for mistakes.
let len = bitpacked.len();
let offset = bitpacked.offset();

let bitpacked_without_patches =
BitPacked::try_new(packed, ptype, validity, None, bw, len, offset)?.into_array();

let patched = Patched::from_array_and_patches(
bitpacked_without_patches,
&patches,
&mut session.create_execution_ctx(),
)?;

Ok(patched.into_array())
}
}
21 changes: 20 additions & 1 deletion encodings/fastlanes/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@

#![allow(clippy::cast_possible_truncation)]

use std::env;
use std::sync::LazyLock;

pub use bitpacking::*;
pub use delta::*;
pub use r#for::*;
Expand Down Expand Up @@ -31,9 +34,25 @@ use vortex_array::aggregate_fn::session::AggregateFnSessionExt;
use vortex_array::session::ArraySessionExt;
use vortex_session::VortexSession;

/// Flag indicating if experimental patched array support is enabled.
///
/// This is set using the environment variable `VORTEX_EXPERIMENTAL_PATCHED_ARRAY`.
///
/// When this is true, any BitPacked array with interior patches will be read as a `Patched`
/// array, and the builtin compressor will use Patched array with BitPacked instead of
/// BitPacked array with interior patches.
pub static USE_EXPERIMENTAL_PATCHES: LazyLock<bool> =
LazyLock::new(|| env::var("VORTEX_EXPERIMENTAL_PATCHED_ARRAY").is_ok());

/// Initialize fastlanes encodings in the given session.
pub fn initialize(session: &VortexSession) {
session.arrays().register(BitPacked);
// If we're using the experimental Patched encoding, register a shim
// for BitPacked with interior patches decode as Patched array.
if *USE_EXPERIMENTAL_PATCHES {
session.arrays().register(BitPackedPatchedPlugin);
} else {
session.arrays().register(BitPacked);
}
session.arrays().register(Delta);
session.arrays().register(FoR);
session.arrays().register(RLE);
Expand Down
64 changes: 48 additions & 16 deletions vortex-btrblocks/src/schemes/integer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@
use vortex_array::ArrayRef;
use vortex_array::Canonical;
use vortex_array::IntoArray;
use vortex_array::LEGACY_SESSION;
use vortex_array::ToCanonical;
use vortex_array::VortexSessionExecute;
use vortex_array::arrays::ConstantArray;
use vortex_array::arrays::Patched;
use vortex_array::arrays::primitive::PrimitiveArrayExt;
use vortex_array::scalar::Scalar;
use vortex_compressor::builtins::FloatDictScheme;
Expand All @@ -20,9 +23,10 @@ use vortex_error::VortexExpect;
use vortex_error::VortexResult;
use vortex_error::vortex_bail;
use vortex_error::vortex_err;
use vortex_fastlanes::BitPackedArrayExt;
use vortex_fastlanes::BitPacked;
use vortex_fastlanes::FoR;
use vortex_fastlanes::FoRArrayExt;
use vortex_fastlanes::USE_EXPERIMENTAL_PATCHES;
use vortex_fastlanes::bitpack_compress::bit_width_histogram;
use vortex_fastlanes::bitpack_compress::bitpack_encode;
use vortex_fastlanes::bitpack_compress::find_best_bit_width;
Expand Down Expand Up @@ -324,21 +328,49 @@ impl Scheme for BitPackingScheme {

let packed_stats = packed.statistics().to_owned();
let ptype = packed.dtype().as_ptype();
let patches = packed.patches().map(compress_patches).transpose()?;
let mut parts = vortex_fastlanes::BitPacked::into_parts(packed);
parts.patches = patches;

Ok(vortex_fastlanes::BitPacked::try_new(
parts.packed,
ptype,
parts.validity,
parts.patches,
parts.bit_width,
parts.len,
parts.offset,
)?
.with_stats_set(packed_stats)
.into_array())
let mut parts = BitPacked::into_parts(packed);

let array = if *USE_EXPERIMENTAL_PATCHES {
let patches = parts.patches.take();
// Transpose patches into G-ALP style PatchedArray, wrapping an inner BitPackedArray.
let array = BitPacked::try_new(
parts.packed,
ptype,
parts.validity,
None,
parts.bit_width,
parts.len,
parts.offset,
)?
.into_array();

match patches {
None => array,
Some(p) => Patched::from_array_and_patches(
array,
&p,
&mut LEGACY_SESSION.create_execution_ctx(),
)?
.into_array(),
}
} else {
Comment on lines +333 to +356
Copy link

Copilot AI Apr 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When USE_EXPERIMENTAL_PATCHES is enabled, this branch no longer reapplies packed_stats to the returned array (neither the inner patchless BitPacked nor the outer Patched). This drops statistics that were previously preserved, which can regress downstream optimizations/decisions that rely on stats. Consider propagating packed_stats onto the final returned array in both branches (e.g., set stats on the BitPacked before wrapping, and/or on the resulting Patched array).

Copilot uses AI. Check for mistakes.
// Compress patches and place back into BitPackedArray.
let patches = parts.patches.take().map(compress_patches).transpose()?;
parts.patches = patches;
BitPacked::try_new(
parts.packed,
ptype,
parts.validity,
parts.patches,
parts.bit_width,
parts.len,
parts.offset,
)?
.with_stats_set(packed_stats)
.into_array()
};

Ok(array)
}
}

Expand Down
7 changes: 5 additions & 2 deletions vortex-file/src/strategy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@ use std::sync::Arc;
use std::sync::LazyLock;

use vortex_alp::ALP;
// Compressed encodings from encoding crates
// Canonical array encodings from vortex-array
use vortex_alp::ALPRD;
use vortex_array::ArrayId;
use vortex_array::VTable;
Expand All @@ -23,6 +21,7 @@ use vortex_array::arrays::List;
use vortex_array::arrays::ListView;
use vortex_array::arrays::Masked;
use vortex_array::arrays::Null;
use vortex_array::arrays::Patched;
use vortex_array::arrays::Primitive;
use vortex_array::arrays::Struct;
use vortex_array::arrays::VarBin;
Expand Down Expand Up @@ -92,6 +91,10 @@ pub static ALLOWED_ENCODINGS: LazyLock<HashSet<ArrayId>> = LazyLock::new(|| {
allowed.insert(Masked.id());
allowed.insert(Dict.id());

if *vortex_fastlanes::USE_EXPERIMENTAL_PATCHES {
allowed.insert(Patched.id());
}

// Compressed encodings from encoding crates
allowed.insert(ALP.id());
allowed.insert(ALPRD.id());
Expand Down
Loading