Skip to content

Commit bd68641

Browse files
committed
saev
Signed-off-by: Andrew Duffy <andrew@a10y.dev>
1 parent 2e4b405 commit bd68641

File tree

3 files changed

+94
-2
lines changed

3 files changed

+94
-2
lines changed

encodings/fastlanes/src/bitpacking/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ pub use array::unpack_iter;
1111

1212
pub(crate) mod compute;
1313

14+
mod plugin;
1415
mod vtable;
16+
17+
pub use plugin::BitPackedPatchedPlugin;
1518
pub use vtable::BitPacked;
1619
pub use vtable::BitPackedArray;
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
//! A custom [`ArrayPlugin`] that lets you load in and deserialize a `BitPacked` array with interior
5+
//! patches as a `PatchedArray` that wraps a patchless `BitPacked` array.
6+
//!
7+
//! This enables zero-cost backward compatibility with previously written datasets.
8+
9+
use vortex_array::ArrayId;
10+
use vortex_array::ArrayPlugin;
11+
use vortex_array::ArrayRef;
12+
use vortex_array::IntoArray;
13+
use vortex_array::VortexSessionExecute;
14+
use vortex_array::arrays::Patched;
15+
use vortex_array::buffer::BufferHandle;
16+
use vortex_array::dtype::DType;
17+
use vortex_array::serde::ArrayChildren;
18+
use vortex_error::VortexResult;
19+
use vortex_error::vortex_err;
20+
use vortex_session::VortexSession;
21+
22+
use crate::BitPacked;
23+
use crate::BitPackedArrayExt;
24+
25+
/// Custom deserialization plugin that converts a BitPacked array with interior
26+
/// Patches into a PatchedArray holding a BitPacked array.
27+
#[derive(Debug, Clone)]
28+
pub struct BitPackedPatchedPlugin;
29+
30+
impl ArrayPlugin for BitPackedPatchedPlugin {
31+
fn id(&self) -> ArrayId {
32+
// We reuse the existing `BitPacked` ID so that we can take over its
33+
// deserialization pathway.
34+
BitPacked::ID
35+
}
36+
37+
fn deserialize(
38+
&self,
39+
dtype: &DType,
40+
len: usize,
41+
metadata: &[u8],
42+
buffers: &[BufferHandle],
43+
children: &dyn ArrayChildren,
44+
session: &VortexSession,
45+
) -> VortexResult<ArrayRef> {
46+
let bitpacked = BitPacked
47+
.deserialize(dtype, len, metadata, buffers, children, session)?
48+
.try_downcast::<BitPacked>()
49+
.map_err(|_| {
50+
vortex_err!("BitPacked plugin should only deserialize vortex.bitpacked")
51+
})?;
52+
53+
// Create a new BitPackedArray without the interior patches installed.
54+
let Some(patches) = bitpacked.patches() else {
55+
return Ok(bitpacked.into_array());
56+
};
57+
58+
let packed = bitpacked.packed().clone();
59+
let ptype = bitpacked.dtype().as_ptype();
60+
let validity = bitpacked.validity()?;
61+
let bw = bitpacked.bit_width;
62+
let len = bitpacked.len();
63+
let offset = bitpacked.offset();
64+
65+
let bitpacked_without_patches =
66+
BitPacked::try_new(packed, ptype, validity, None, bw, len, offset)?.into_array();
67+
68+
let patched = Patched::from_array_and_patches(
69+
bitpacked_without_patches,
70+
&patches,
71+
&mut session.create_execution_ctx(),
72+
)?;
73+
74+
Ok(patched.into_array())
75+
}
76+
}

vortex-file/src/strategy.rs

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,14 @@
33

44
//! This module defines the default layout strategy for a Vortex file.
55
6+
use std::env;
67
use std::sync::Arc;
78
use std::sync::LazyLock;
89

910
use vortex_alp::ALP;
1011
// Compressed encodings from encoding crates
1112
// Canonical array encodings from vortex-array
1213
use vortex_alp::ALPRD;
13-
use vortex_array::arrays::Bool;
1414
use vortex_array::arrays::Chunked;
1515
use vortex_array::arrays::Constant;
1616
use vortex_array::arrays::Decimal;
@@ -25,6 +25,7 @@ use vortex_array::arrays::Primitive;
2525
use vortex_array::arrays::Struct;
2626
use vortex_array::arrays::VarBin;
2727
use vortex_array::arrays::VarBinView;
28+
use vortex_array::arrays::{Bool, Patched};
2829
use vortex_array::dtype::FieldPath;
2930
use vortex_array::session::ArrayRegistry;
3031
use vortex_array::session::ArraySession;
@@ -35,6 +36,7 @@ use vortex_bytebool::ByteBool;
3536
use vortex_datetime_parts::DateTimeParts;
3637
use vortex_decimal_byte_parts::DecimalByteParts;
3738
use vortex_fastlanes::BitPacked;
39+
use vortex_fastlanes::BitPackedPatchedPlugin;
3840
use vortex_fastlanes::Delta;
3941
use vortex_fastlanes::FoR;
4042
use vortex_fastlanes::RLE;
@@ -67,6 +69,10 @@ use vortex_zstd::ZstdBuffers;
6769

6870
const ONE_MEG: u64 = 1 << 20;
6971

72+
/// Check if we're using experimental patches deserialization
73+
static USE_EXPERIMENTAL_PATCHES: LazyLock<bool> =
74+
LazyLock::new(|| env::var("VORTEX_EXPERIMENTAL_PATCHED_ARRAY").is_ok());
75+
7076
/// Static registry of all allowed array encodings for file writing.
7177
///
7278
/// This includes all canonical encodings from vortex-array plus all compressed
@@ -94,7 +100,14 @@ pub static ALLOWED_ENCODINGS: LazyLock<ArrayRegistry> = LazyLock::new(|| {
94100
// Compressed encodings from encoding crates
95101
session.register(ALP);
96102
session.register(ALPRD);
97-
session.register(BitPacked);
103+
if *USE_EXPERIMENTAL_PATCHES {
104+
session.register(Patched);
105+
// Deserialize BitPacked arrays with Patches as PatchedArray of BitPacked
106+
// without the interior patches.
107+
session.register(BitPackedPatchedPlugin)
108+
} else {
109+
session.register(BitPacked);
110+
}
98111
session.register(ByteBool);
99112
session.register(DateTimeParts);
100113
session.register(DecimalByteParts);

0 commit comments

Comments
 (0)