Skip to content

Commit 09601af

Browse files
authored
avoid including experimental encoding in footer (#7569)
## Summary We have been publishing a footer reference to `vortex.patched` experimental encoding in the last few releases. The issue is that we should not be including the Patched encoding in the default array context This removes Patched from the default array context, instead registering it conditionally where we register the other unstable encodings. I've verified that after this change writing a file with default settings no longer includes Patched encoding <img width="1356" height="185" alt="image" src="https://github.com/user-attachments/assets/56a89186-68ff-4588-8058-3ee6dc7bb767" /> --------- Signed-off-by: Andrew Duffy <andrew@a10y.dev>
1 parent 76ccbdf commit 09601af

8 files changed

Lines changed: 25 additions & 11 deletions

File tree

vortex-array/src/arrays/patched/vtable/mod.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,7 @@ mod tests {
373373
use crate::patches::Patches;
374374
use crate::serde::SerializeOptions;
375375
use crate::serde::SerializedArray;
376+
use crate::session::ArraySessionExt;
376377
use crate::validity::Validity;
377378

378379
#[test]
@@ -588,7 +589,9 @@ mod tests {
588589
let dtype = array.dtype().clone();
589590
let len = array.len();
590591

591-
let ctx = ArrayContext::empty();
592+
LEGACY_SESSION.arrays().register(Patched);
593+
594+
let ctx = ArrayContext::empty().with_registry(LEGACY_SESSION.arrays().registry().clone());
592595
let serialized = array
593596
.serialize(&ctx, &LEGACY_SESSION, &SerializeOptions::default())
594597
.unwrap();

vortex-array/src/session/mod.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ use crate::arrays::List;
2323
use crate::arrays::ListView;
2424
use crate::arrays::Masked;
2525
use crate::arrays::Null;
26-
use crate::arrays::Patched;
2726
use crate::arrays::Primitive;
2827
use crate::arrays::Struct;
2928
use crate::arrays::VarBin;
@@ -80,7 +79,6 @@ impl Default for ArraySession {
8079
this.register(Dict);
8180
this.register(List);
8281
this.register(Masked);
83-
this.register(Patched);
8482
this.register(VarBin);
8583

8684
this

vortex-file/src/lib.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,8 @@ pub use forever_constant::*;
110110
pub use open::*;
111111
pub use strategy::*;
112112
use vortex_array::arrays::Dict;
113+
use vortex_array::arrays::Patched;
114+
use vortex_array::arrays::patched::use_experimental_patches;
113115
use vortex_array::session::ArraySessionExt;
114116
use vortex_bytebool::ByteBool;
115117
use vortex_fsst::FSST;
@@ -168,6 +170,9 @@ pub fn register_default_encodings(session: &VortexSession) {
168170
arrays.register(vortex_zstd::Zstd);
169171
#[cfg(all(feature = "zstd", feature = "unstable_encodings"))]
170172
arrays.register(vortex_zstd::ZstdBuffers);
173+
if use_experimental_patches() {
174+
arrays.register(Patched);
175+
}
171176
}
172177

173178
// Eventually all encodings crates should expose an initialize function. For now it's only

vortex-file/src/strategy.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -90,10 +90,6 @@ pub static ALLOWED_ENCODINGS: LazyLock<HashSet<ArrayId>> = LazyLock::new(|| {
9090
allowed.insert(Masked.id());
9191
allowed.insert(Dict.id());
9292

93-
if use_experimental_patches() {
94-
allowed.insert(Patched.id());
95-
}
96-
9793
// Compressed encodings from encoding crates
9894
allowed.insert(ALP.id());
9995
allowed.insert(ALPRD.id());
@@ -111,6 +107,12 @@ pub static ALLOWED_ENCODINGS: LazyLock<HashSet<ArrayId>> = LazyLock::new(|| {
111107
allowed.insert(Sparse.id());
112108
allowed.insert(ZigZag.id());
113109

110+
// Experimental encodings
111+
112+
if use_experimental_patches() {
113+
allowed.insert(Patched.id());
114+
}
115+
114116
#[cfg(feature = "zstd")]
115117
allowed.insert(Zstd.id());
116118
#[cfg(all(feature = "zstd", feature = "unstable_encodings"))]

vortex-file/src/writer.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ use vortex_session::SessionExt;
4747
use vortex_session::VortexSession;
4848
use vortex_session::registry::ReadContext;
4949

50+
use crate::ALLOWED_ENCODINGS;
5051
use crate::Footer;
5152
use crate::MAGIC_BYTES;
5253
use crate::WriteStrategyBuilder;
@@ -147,7 +148,7 @@ impl VortexWriteOptions {
147148
// serialised array order is deterministic. The serialisation of arrays are done
148149
// parallel and with an empty context they can register their encodings to the context
149150
// in different order, changing the written bytes from run to run.
150-
let ctx = ArrayContext::new(self.session.arrays().registry().ids().sorted().collect())
151+
let ctx = ArrayContext::new(ALLOWED_ENCODINGS.iter().cloned().sorted().collect())
151152
// Configure a registry just to ensure only known encodings are interned.
152153
.with_registry(self.session.arrays().registry().clone());
153154
let dtype = stream.dtype().clone();

vortex-python/src/io.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ impl PyVortexWriteOptions {
280280
/// >>> vx.io.VortexWriteOptions.default().write(sprl, "chonky.vortex")
281281
/// >>> import os
282282
/// >>> os.path.getsize('chonky.vortex')
283-
/// 216036
283+
/// 215972
284284
/// ```
285285
///
286286
/// Wow, Vortex manages to use about two bytes per integer! So advanced. So tiny.
@@ -292,7 +292,7 @@ impl PyVortexWriteOptions {
292292
/// ```python
293293
/// >>> vx.io.VortexWriteOptions.compact().write(sprl, "tiny.vortex")
294294
/// >>> os.path.getsize('tiny.vortex')
295-
/// 55152
295+
/// 55088
296296
/// ```
297297
///
298298
/// Random numbers are not (usually) composed of random bytes!

vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/mod.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ pub fn fixtures() -> Vec<Box<dyn FlatLayoutFixture>> {
4343
Box::new(dict::DictFixture),
4444
Box::new(fsst::FsstFixture),
4545
Box::new(for_::FoRFixture),
46-
Box::new(patched::PatchedFixture),
46+
// TODO(aduffy): add back once we stabilized Patched array
47+
// Box::new(patched::PatchedFixture),
4748
Box::new(pco::PcoFixture),
4849
Box::new(rle::RleFixture),
4950
Box::new(runend::RunEndFixture),

vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/patched.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@ use vortex_session::VortexSession;
1414

1515
use crate::fixtures::FlatLayoutFixture;
1616

17+
#[expect(
18+
dead_code,
19+
reason = "This will be unused until we stabilize Patched array"
20+
)]
1721
pub struct PatchedFixture;
1822

1923
impl FlatLayoutFixture for PatchedFixture {

0 commit comments

Comments
 (0)