Skip to content

Commit c2dd0c8

Browse files
joseph-isaacslwwmanning
authored andcommitted
chore: have on demand validity and patches for array remove slot extraction (#7217)
This PR get rid of holding validity and patches on each ArrayData, instead we construct this on-demand. This will also a follow up to have `slots_mut -> &mut [Option<ArrayRef>]` instead of `with_slots` --------- Signed-off-by: Joe Isaacs <joe.isaacs@live.co.uk> Signed-off-by: Will Manning <will@willmanning.io>
1 parent 822bd4a commit c2dd0c8

165 files changed

Lines changed: 589 additions & 655 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

encodings/alp/public-api.lock

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ pub fn vortex_alp::ALPArray::into_parts(self) -> (vortex_array::array::ArrayRef,
118118

119119
pub fn vortex_alp::ALPArray::new(encoded: vortex_array::array::ArrayRef, exponents: vortex_alp::Exponents, patches: core::option::Option<vortex_array::patches::Patches>) -> Self
120120

121-
pub fn vortex_alp::ALPArray::patches(&self) -> core::option::Option<&vortex_array::patches::Patches>
121+
pub fn vortex_alp::ALPArray::patches(&self) -> core::option::Option<vortex_array::patches::Patches>
122122

123123
pub fn vortex_alp::ALPArray::ptype(&self) -> vortex_array::dtype::ptype::PType
124124

@@ -278,7 +278,7 @@ pub fn vortex_alp::ALPRDArray::left_parts(&self) -> &vortex_array::array::ArrayR
278278

279279
pub fn vortex_alp::ALPRDArray::left_parts_dictionary(&self) -> &vortex_buffer::buffer::Buffer<u16>
280280

281-
pub fn vortex_alp::ALPRDArray::left_parts_patches(&self) -> core::option::Option<&vortex_array::patches::Patches>
281+
pub fn vortex_alp::ALPRDArray::left_parts_patches(&self) -> core::option::Option<vortex_array::patches::Patches>
282282

283283
pub fn vortex_alp::ALPRDArray::replace_left_parts_patches(&mut self, patches: core::option::Option<vortex_array::patches::Patches>)
284284

encodings/alp/src/alp/array.rs

Lines changed: 46 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -76,14 +76,14 @@ impl VTable for ALP {
7676
array.dtype.hash(state);
7777
array.encoded().array_hash(state, precision);
7878
array.exponents.hash(state);
79-
array.patches.array_hash(state, precision);
79+
array.patches().array_hash(state, precision);
8080
}
8181

8282
fn array_eq(array: &ALPArray, other: &ALPArray, precision: Precision) -> bool {
8383
array.dtype == other.dtype
8484
&& array.encoded().array_eq(other.encoded(), precision)
8585
&& array.exponents == other.exponents
86-
&& array.patches.array_eq(&other.patches, precision)
86+
&& array.patches().array_eq(&other.patches(), precision)
8787
}
8888

8989
fn nbuffers(_array: &ALPArray) -> usize {
@@ -114,23 +114,12 @@ impl VTable for ALP {
114114
slots.len()
115115
);
116116

117-
// Reconstruct patches from slots + existing metadata
118-
array.patches = match (&slots[PATCH_INDICES_SLOT], &slots[PATCH_VALUES_SLOT]) {
119-
(Some(indices), Some(values)) => {
120-
let old = array
121-
.patches
122-
.as_ref()
123-
.vortex_expect("ALPArray had patch slots but no patches metadata");
124-
Some(Patches::new(
125-
old.array_len(),
126-
old.offset(),
127-
indices.clone(),
128-
values.clone(),
129-
slots[PATCH_CHUNK_OFFSETS_SLOT].clone(),
130-
)?)
131-
}
132-
_ => None,
133-
};
117+
// If patch slots are being cleared, clear the metadata too
118+
if slots[PATCH_INDICES_SLOT].is_none() || slots[PATCH_VALUES_SLOT].is_none() {
119+
array.patch_offset = None;
120+
array.patch_offset_within_chunk = None;
121+
}
122+
134123
array.slots = slots;
135124
Ok(())
136125
}
@@ -240,7 +229,8 @@ pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = [
240229
#[derive(Clone, Debug)]
241230
pub struct ALPArray {
242231
slots: Vec<Option<ArrayRef>>,
243-
patches: Option<Patches>,
232+
patch_offset: Option<usize>,
233+
patch_offset_within_chunk: Option<usize>,
244234
dtype: DType,
245235
exponents: Exponents,
246236
stats_set: ArrayStats,
@@ -409,12 +399,17 @@ impl ALPArray {
409399
};
410400

411401
let slots = Self::make_slots(&encoded, &patches);
402+
let (patch_offset, patch_offset_within_chunk) = match &patches {
403+
Some(p) => (Some(p.offset()), p.offset_within_chunk()),
404+
None => (None, None),
405+
};
412406

413407
Ok(Self {
414408
dtype,
415409
slots,
416410
exponents,
417-
patches,
411+
patch_offset,
412+
patch_offset_within_chunk,
418413
stats_set: Default::default(),
419414
})
420415
}
@@ -430,12 +425,17 @@ impl ALPArray {
430425
dtype: DType,
431426
) -> Self {
432427
let slots = Self::make_slots(&encoded, &patches);
428+
let (patch_offset, patch_offset_within_chunk) = match &patches {
429+
Some(p) => (Some(p.offset()), p.offset_within_chunk()),
430+
None => (None, None),
431+
};
433432

434433
Self {
435434
dtype,
436435
slots,
437436
exponents,
438-
patches,
437+
patch_offset,
438+
patch_offset_within_chunk,
439439
stats_set: Default::default(),
440440
}
441441
}
@@ -472,17 +472,38 @@ impl ALPArray {
472472
self.exponents
473473
}
474474

475-
pub fn patches(&self) -> Option<&Patches> {
476-
self.patches.as_ref()
475+
pub fn patches(&self) -> Option<Patches> {
476+
match (
477+
&self.slots[PATCH_INDICES_SLOT],
478+
&self.slots[PATCH_VALUES_SLOT],
479+
) {
480+
(Some(indices), Some(values)) => {
481+
let patch_offset = self
482+
.patch_offset
483+
.vortex_expect("has patch slots but no patch_offset");
484+
Some(unsafe {
485+
Patches::new_unchecked(
486+
self.encoded().len(),
487+
patch_offset,
488+
indices.clone(),
489+
values.clone(),
490+
self.slots[PATCH_CHUNK_OFFSETS_SLOT].clone(),
491+
self.patch_offset_within_chunk,
492+
)
493+
})
494+
}
495+
_ => None,
496+
}
477497
}
478498

479499
/// Consumes the array and returns its parts.
480500
#[inline]
481501
pub fn into_parts(mut self) -> (ArrayRef, Exponents, Option<Patches>, DType) {
502+
let patches = self.patches();
482503
let encoded = self.slots[ENCODED_SLOT]
483504
.take()
484505
.vortex_expect("ALPArray encoded slot");
485-
(encoded, self.exponents, self.patches, self.dtype)
506+
(encoded, self.exponents, patches, self.dtype)
486507
}
487508
}
488509

@@ -506,7 +527,6 @@ mod tests {
506527
use vortex_array::arrays::PrimitiveArray;
507528
use vortex_array::assert_arrays_eq;
508529
use vortex_array::session::ArraySession;
509-
use vortex_array::vtable::ValidityHelper;
510530
use vortex_session::VortexSession;
511531

512532
use super::*;

encodings/alp/src/alp/compress.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ use vortex_array::arrays::PrimitiveArray;
88
use vortex_array::dtype::PType;
99
use vortex_array::patches::Patches;
1010
use vortex_array::validity::Validity;
11-
use vortex_array::vtable::ValidityHelper;
1211
use vortex_buffer::Buffer;
1312
use vortex_buffer::BufferMut;
1413
use vortex_error::VortexResult;
@@ -73,7 +72,7 @@ where
7372
let (exponents, encoded, exceptional_positions, exceptional_values, mut chunk_offsets) =
7473
T::encode(values_slice, exponents);
7574

76-
let encoded_array = PrimitiveArray::new(encoded, values.validity().clone()).into_array();
75+
let encoded_array = PrimitiveArray::new(encoded, values.validity()).into_array();
7776

7877
let validity = values.validity_mask()?;
7978
// exceptional_positions may contain exceptions at invalid positions (which contain garbage

encodings/alp/src/alp/compute/cast.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ impl CastReduce for ALP {
2929
.patches()
3030
.map(|p| {
3131
if p.values().dtype() == dtype {
32-
Ok(p.clone())
32+
Ok(p)
3333
} else {
3434
Patches::new(
3535
p.array_len(),

encodings/alp/src/alp/decompress.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ use vortex_array::arrays::primitive::patch_chunk;
1010
use vortex_array::dtype::DType;
1111
use vortex_array::match_each_unsigned_integer_ptype;
1212
use vortex_array::patches::Patches;
13-
use vortex_array::vtable::ValidityHelper;
1413
use vortex_buffer::BufferMut;
1514
use vortex_error::VortexResult;
1615

@@ -101,7 +100,7 @@ fn decompress_chunked_core(
101100
patches: &Patches,
102101
dtype: DType,
103102
) -> PrimitiveArray {
104-
let validity = encoded.validity().clone();
103+
let validity = encoded.validity();
105104
let ptype = dtype.as_ptype();
106105
let array_len = encoded.len();
107106
let offset_within_chunk = patches.offset_within_chunk().unwrap_or(0);
@@ -151,7 +150,7 @@ fn decompress_unchunked_core(
151150
dtype: DType,
152151
ctx: &mut ExecutionCtx,
153152
) -> VortexResult<PrimitiveArray> {
154-
let validity = encoded.validity().clone();
153+
let validity = encoded.validity();
155154
let ptype = dtype.as_ptype();
156155

157156
let decoded = match_each_alp_float_ptype!(ptype, |T| {

encodings/alp/src/alp_rd/array.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -543,8 +543,8 @@ impl ALPRDArray {
543543
}
544544

545545
/// Patches of left-most bits.
546-
pub fn left_parts_patches(&self) -> Option<&Patches> {
547-
self.left_parts_patches.as_ref()
546+
pub fn left_parts_patches(&self) -> Option<Patches> {
547+
self.left_parts_patches.clone()
548548
}
549549

550550
/// The dictionary that maps the codes in `left_parts` into bit patterns.

encodings/alp/src/alp_rd/compute/cast.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ impl CastReduce for ALPRD {
3434
array.left_parts_dictionary().clone(),
3535
array.right_parts().clone(),
3636
array.right_bit_width(),
37-
array.left_parts_patches().cloned(),
37+
array.left_parts_patches(),
3838
)?
3939
.into_array(),
4040
));

encodings/alp/src/alp_rd/compute/mask.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ impl MaskReduce for ALPRD {
2626
array.left_parts_dictionary().clone(),
2727
array.right_parts().clone(),
2828
array.right_bit_width(),
29-
array.left_parts_patches().cloned(),
29+
array.left_parts_patches(),
3030
)?
3131
.into_array(),
3232
))

encodings/alp/src/alp_rd/mod.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ use vortex_array::arrays::PrimitiveArray;
3030
use vortex_array::dtype::DType;
3131
use vortex_array::dtype::NativePType;
3232
use vortex_array::match_each_integer_ptype;
33-
use vortex_array::vtable::ValidityHelper;
3433
use vortex_buffer::Buffer;
3534
use vortex_buffer::BufferMut;
3635
use vortex_error::VortexExpect;
@@ -229,7 +228,7 @@ impl RDEncoder {
229228
}
230229

231230
// Bit-pack down the encoded left-parts array that have been dictionary encoded.
232-
let primitive_left = PrimitiveArray::new(left_parts, array.validity().clone());
231+
let primitive_left = PrimitiveArray::new(left_parts, array.validity());
233232
// SAFETY: by construction, all values in left_parts can be packed to left_bit_width.
234233
let packed_left = unsafe {
235234
bitpack_encode_unchecked(primitive_left, left_bit_width as _)

encodings/bytebool/public-api.lock

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,4 +136,4 @@ pub fn vortex_bytebool::ByteBoolArray::into_array(self) -> vortex_array::array::
136136

137137
impl vortex_array::vtable::validity::ValidityHelper for vortex_bytebool::ByteBoolArray
138138

139-
pub fn vortex_bytebool::ByteBoolArray::validity(&self) -> &vortex_array::validity::Validity
139+
pub fn vortex_bytebool::ByteBoolArray::validity(&self) -> vortex_array::validity::Validity

0 commit comments

Comments
 (0)