-
Notifications
You must be signed in to change notification settings - Fork 144
add env flag for Patched array #7314
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
abb955a
dd5b0c5
7e4fc74
87a6a95
5d03f60
b03e192
07a7c5a
9d5ebe1
3cd0c48
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,76 @@ | ||||||
| // SPDX-License-Identifier: Apache-2.0 | ||||||
| // SPDX-FileCopyrightText: Copyright the Vortex contributors | ||||||
|
|
||||||
| //! A custom [`ArrayPlugin`] that lets you load in and deserialize a `BitPacked` array with interior | ||||||
| //! patches as a `PatchedArray` that wraps a patchless `BitPacked` array. | ||||||
| //! | ||||||
| //! This enables zero-cost backward compatibility with previously written datasets. | ||||||
|
|
||||||
| use vortex_array::ArrayId; | ||||||
| use vortex_array::ArrayPlugin; | ||||||
| use vortex_array::ArrayRef; | ||||||
| use vortex_array::IntoArray; | ||||||
| use vortex_array::VortexSessionExecute; | ||||||
| use vortex_array::arrays::Patched; | ||||||
| use vortex_array::buffer::BufferHandle; | ||||||
| use vortex_array::dtype::DType; | ||||||
| use vortex_array::serde::ArrayChildren; | ||||||
| use vortex_error::VortexResult; | ||||||
| use vortex_error::vortex_err; | ||||||
| use vortex_session::VortexSession; | ||||||
|
|
||||||
| use crate::BitPacked; | ||||||
| use crate::BitPackedArrayExt; | ||||||
|
|
||||||
| /// Custom deserialization plugin that converts a BitPacked array with interior | ||||||
| /// Patches into a PatchedArray holding a BitPacked array. | ||||||
| #[derive(Debug, Clone)] | ||||||
| pub(crate) struct BitPackedPatchedPlugin; | ||||||
|
|
||||||
| impl ArrayPlugin for BitPackedPatchedPlugin { | ||||||
| fn id(&self) -> ArrayId { | ||||||
| // We reuse the existing `BitPacked` ID so that we can take over its | ||||||
| // deserialization pathway. | ||||||
| BitPacked::ID | ||||||
| } | ||||||
|
|
||||||
| fn deserialize( | ||||||
| &self, | ||||||
| dtype: &DType, | ||||||
| len: usize, | ||||||
| metadata: &[u8], | ||||||
| buffers: &[BufferHandle], | ||||||
| children: &dyn ArrayChildren, | ||||||
| session: &VortexSession, | ||||||
| ) -> VortexResult<ArrayRef> { | ||||||
| let bitpacked = BitPacked | ||||||
| .deserialize(dtype, len, metadata, buffers, children, session)? | ||||||
| .try_downcast::<BitPacked>() | ||||||
| .map_err(|_| { | ||||||
| vortex_err!("BitPacked plugin should only deserialize vortex.bitpacked") | ||||||
|
||||||
| vortex_err!("BitPacked plugin should only deserialize vortex.bitpacked") | |
| vortex_err!("BitPacked plugin should only deserialize {}", BitPacked::ID) |
Copilot
AI
Apr 8, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This accesses bitpacked.bit_width directly even though BitPackedArrayExt is already in scope and provides bit_width(). Using the accessor avoids relying on internal struct fields and reduces the chance of breakage if the BitPacked data representation changes.
| let bw = bitpacked.bit_width; | |
| let bw = bitpacked.bit_width(); |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,8 +6,11 @@ | |
| use vortex_array::ArrayRef; | ||
| use vortex_array::Canonical; | ||
| use vortex_array::IntoArray; | ||
| use vortex_array::LEGACY_SESSION; | ||
| use vortex_array::ToCanonical; | ||
| use vortex_array::VortexSessionExecute; | ||
| use vortex_array::arrays::ConstantArray; | ||
| use vortex_array::arrays::Patched; | ||
| use vortex_array::arrays::primitive::PrimitiveArrayExt; | ||
| use vortex_array::scalar::Scalar; | ||
| use vortex_compressor::builtins::FloatDictScheme; | ||
|
|
@@ -20,9 +23,10 @@ use vortex_error::VortexExpect; | |
| use vortex_error::VortexResult; | ||
| use vortex_error::vortex_bail; | ||
| use vortex_error::vortex_err; | ||
| use vortex_fastlanes::BitPackedArrayExt; | ||
| use vortex_fastlanes::BitPacked; | ||
| use vortex_fastlanes::FoR; | ||
| use vortex_fastlanes::FoRArrayExt; | ||
| use vortex_fastlanes::USE_EXPERIMENTAL_PATCHES; | ||
| use vortex_fastlanes::bitpack_compress::bit_width_histogram; | ||
| use vortex_fastlanes::bitpack_compress::bitpack_encode; | ||
| use vortex_fastlanes::bitpack_compress::find_best_bit_width; | ||
|
|
@@ -324,21 +328,49 @@ impl Scheme for BitPackingScheme { | |
|
|
||
| let packed_stats = packed.statistics().to_owned(); | ||
| let ptype = packed.dtype().as_ptype(); | ||
| let patches = packed.patches().map(compress_patches).transpose()?; | ||
| let mut parts = vortex_fastlanes::BitPacked::into_parts(packed); | ||
| parts.patches = patches; | ||
|
|
||
| Ok(vortex_fastlanes::BitPacked::try_new( | ||
| parts.packed, | ||
| ptype, | ||
| parts.validity, | ||
| parts.patches, | ||
| parts.bit_width, | ||
| parts.len, | ||
| parts.offset, | ||
| )? | ||
| .with_stats_set(packed_stats) | ||
| .into_array()) | ||
| let mut parts = BitPacked::into_parts(packed); | ||
|
|
||
| let array = if *USE_EXPERIMENTAL_PATCHES { | ||
| let patches = parts.patches.take(); | ||
| // Transpose patches into G-ALP style PatchedArray, wrapping an inner BitPackedArray. | ||
| let array = BitPacked::try_new( | ||
| parts.packed, | ||
| ptype, | ||
| parts.validity, | ||
| None, | ||
| parts.bit_width, | ||
| parts.len, | ||
| parts.offset, | ||
| )? | ||
| .into_array(); | ||
|
|
||
| match patches { | ||
| None => array, | ||
| Some(p) => Patched::from_array_and_patches( | ||
| array, | ||
| &p, | ||
| &mut LEGACY_SESSION.create_execution_ctx(), | ||
| )? | ||
| .into_array(), | ||
| } | ||
| } else { | ||
|
Comment on lines
+333
to
+356
|
||
| // Compress patches and place back into BitPackedArray. | ||
| let patches = parts.patches.take().map(compress_patches).transpose()?; | ||
| parts.patches = patches; | ||
| BitPacked::try_new( | ||
| parts.packed, | ||
| ptype, | ||
| parts.validity, | ||
| parts.patches, | ||
| parts.bit_width, | ||
| parts.len, | ||
| parts.offset, | ||
| )? | ||
| .with_stats_set(packed_stats) | ||
| .into_array() | ||
| }; | ||
|
|
||
| Ok(array) | ||
| } | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This introduces a new deserialization pathway that conditionally rewrites
BitPacked(with interior patches) into aPatchedwrapper based on runtime configuration, but there’s no test exercising the behavior. Adding a unit/integration test that round-trips aBitPackedwith patches through serialization/deserialization and asserts the resulting encoding (BitPackedvsPatched) would help prevent regressions.