Skip to content

Commit 3a45546

Browse files
committed
Convert offsets into subtraction in RunEnd and Patches
Signed-off-by: Robert Kruszewski <github@robertk.io>
1 parent 3d1553e commit 3a45546

30 files changed

Lines changed: 307 additions & 244 deletions

File tree

encodings/alp/src/alp/array.rs

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,9 @@ impl VTable for ALP {
181181
.map(|dtype| children.get(3, &dtype, usize::try_from(p.chunk_offsets_len())?))
182182
.transpose()?;
183183

184-
Patches::new(len, p.offset()?, indices, values, chunk_offsets)
184+
// The indices child already has the offset embedded (via Binary(Sub) expression),
185+
// so we pass offset 0 to avoid double-wrapping.
186+
Patches::new(len, 0, indices, values, chunk_offsets)
185187
})
186188
.transpose()?;
187189

@@ -200,10 +202,10 @@ impl VTable for ALP {
200202
let patches_info = array
201203
.patches
202204
.as_ref()
203-
.map(|p| (p.array_len(), p.offset(), p.chunk_offsets().is_some()));
205+
.map(|p| (p.array_len(), p.chunk_offsets().is_some()));
204206

205207
let expected_children = match &patches_info {
206-
Some((_, _, has_chunk_offsets)) => 1 + 2 + if *has_chunk_offsets { 1 } else { 0 },
208+
Some((_, has_chunk_offsets)) => 1 + 2 + if *has_chunk_offsets { 1 } else { 0 },
207209
None => 1,
208210
};
209211

@@ -219,7 +221,7 @@ impl VTable for ALP {
219221
.next()
220222
.ok_or_else(|| vortex_err!("Expected encoded child"))?;
221223

222-
if let Some((array_len, offset, _has_chunk_offsets)) = patches_info {
224+
if let Some((array_len, _has_chunk_offsets)) = patches_info {
223225
let indices = children_iter
224226
.next()
225227
.ok_or_else(|| vortex_err!("Expected patch indices child"))?;
@@ -228,13 +230,9 @@ impl VTable for ALP {
228230
.ok_or_else(|| vortex_err!("Expected patch values child"))?;
229231
let chunk_offsets = children_iter.next();
230232

231-
array.patches = Some(Patches::new(
232-
array_len,
233-
offset,
234-
indices,
235-
values,
236-
chunk_offsets,
237-
)?);
233+
// The indices child already has the offset embedded (via Binary(Sub) expression),
234+
// so we pass offset 0 to avoid double-wrapping.
235+
array.patches = Some(Patches::new(array_len, 0, indices, values, chunk_offsets)?);
238236
}
239237

240238
Ok(())
@@ -799,10 +797,11 @@ mod tests {
799797
);
800798

801799
// Rebuild the patches WITHOUT chunk_offsets to simulate deserialized patches.
800+
let (raw_indices, offset) = original_patches.raw_indices_and_offset();
802801
let patches_without_chunk_offsets = Patches::new(
803802
original_patches.array_len(),
804-
original_patches.offset(),
805-
original_patches.indices().clone(),
803+
offset,
804+
raw_indices.clone(),
806805
original_patches.values().clone(),
807806
None, // NO chunk_offsets - this triggers the bug!
808807
)

encodings/alp/src/alp/compute/cast.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,11 @@ impl CastReduce for ALP {
3131
if p.values().dtype() == dtype {
3232
Ok(p.clone())
3333
} else {
34+
let (raw_indices, offset) = p.raw_indices_and_offset();
3435
Patches::new(
3536
p.array_len(),
36-
p.offset(),
37-
p.indices().clone(),
37+
offset,
38+
raw_indices.clone(),
3839
p.values().cast(dtype.clone())?,
3940
p.chunk_offsets().clone(),
4041
)

encodings/alp/src/alp/decompress.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ pub fn decompress_into_array(
3434
{
3535
let prim_encoded = encoded.execute::<PrimitiveArray>(ctx)?;
3636
let patches_chunk_offsets = chunk_offsets.clone().execute::<PrimitiveArray>(ctx)?;
37-
let patches_indices = patches.indices().clone().execute::<PrimitiveArray>(ctx)?;
37+
let (raw_indices, _) = patches.raw_indices_and_offset();
38+
let patches_indices = raw_indices.clone().execute::<PrimitiveArray>(ctx)?;
3839
let patches_values = patches.values().clone().execute::<PrimitiveArray>(ctx)?;
3940
Ok(decompress_chunked_core(
4041
prim_encoded,
@@ -67,7 +68,8 @@ pub fn execute_decompress(array: ALPArray, ctx: &mut ExecutionCtx) -> VortexResu
6768
// TODO(joe): have into parts.
6869
let encoded = encoded.execute::<PrimitiveArray>(ctx)?;
6970
let patches_chunk_offsets = chunk_offsets.clone().execute::<PrimitiveArray>(ctx)?;
70-
let patches_indices = patches.indices().clone().execute::<PrimitiveArray>(ctx)?;
71+
let (raw_indices, _) = patches.raw_indices_and_offset();
72+
let patches_indices = raw_indices.clone().execute::<PrimitiveArray>(ctx)?;
7173
let patches_values = patches.values().clone().execute::<PrimitiveArray>(ctx)?;
7274
Ok(decompress_chunked_core(
7375
encoded,

encodings/alp/src/alp_rd/array.rs

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -234,12 +234,10 @@ impl VTable for ALPRD {
234234
let indices = children.get(2, &p.indices_dtype()?, p.len()?)?;
235235
let values = children.get(3, &left_parts_dtype, p.len()?)?;
236236

237+
// The indices child already has the offset embedded (via Binary(Sub) expression),
238+
// so we pass offset 0 to avoid double-wrapping.
237239
Patches::new(
238-
len,
239-
p.offset()?,
240-
indices,
241-
values,
242-
// TODO(0ax1): handle chunk offsets
240+
len, 0, indices, values, // TODO(0ax1): handle chunk offsets
243241
None,
244242
)
245243
})
@@ -262,10 +260,7 @@ impl VTable for ALPRD {
262260

263261
fn with_children(array: &mut Self::Array, children: Vec<ArrayRef>) -> VortexResult<()> {
264262
// Children: left_parts, right_parts, patches (if present): indices, values
265-
let patches_info = array
266-
.left_parts_patches
267-
.as_ref()
268-
.map(|p| (p.array_len(), p.offset()));
263+
let patches_info = array.left_parts_patches.as_ref().map(|p| p.array_len());
269264

270265
let expected_children = if patches_info.is_some() { 4 } else { 2 };
271266

@@ -284,16 +279,18 @@ impl VTable for ALPRD {
284279
.next()
285280
.ok_or_else(|| vortex_err!("Expected right_parts child"))?;
286281

287-
if let Some((array_len, offset)) = patches_info {
282+
if let Some(array_len) = patches_info {
288283
let indices = children_iter
289284
.next()
290285
.ok_or_else(|| vortex_err!("Expected patch indices child"))?;
291286
let values = children_iter
292287
.next()
293288
.ok_or_else(|| vortex_err!("Expected patch values child"))?;
294289

290+
// The indices child already has the offset embedded (via Binary(Sub) expression),
291+
// so we pass offset 0 to avoid double-wrapping.
295292
array.left_parts_patches = Some(Patches::new(
296-
array_len, offset, indices, values,
293+
array_len, 0, indices, values,
297294
None, // chunk_offsets not currently supported for ALPRD
298295
)?);
299296
}

encodings/alp/src/alp_rd/mod.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -311,9 +311,10 @@ pub fn alp_rd_decode<T: ALPRDFloat>(
311311

312312
// Apply any patches
313313
if let Some(patches) = left_parts_patches {
314-
let indices = patches.indices().clone().execute::<PrimitiveArray>(ctx)?;
314+
let (raw_indices, offset) = patches.raw_indices_and_offset();
315+
let indices = raw_indices.clone().execute::<PrimitiveArray>(ctx)?;
315316
let patch_values = patches.values().clone().execute::<PrimitiveArray>(ctx)?;
316-
alp_rd_apply_patches(&mut values, &indices, &patch_values, patches.offset());
317+
alp_rd_apply_patches(&mut values, &indices, &patch_values, offset);
317318
}
318319

319320
// Shift the left-parts and add in the right-parts.

encodings/fastlanes/src/bitpacking/array/bitpack_decompress.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,8 @@ pub fn apply_patches_to_uninit_range_fn<T: NativePType, F: Fn(T) -> T>(
155155
) -> VortexResult<()> {
156156
assert_eq!(patches.array_len(), dst.len());
157157

158-
let indices = patches.indices().clone().execute::<PrimitiveArray>(ctx)?;
158+
let (raw_indices, offset) = patches.raw_indices_and_offset();
159+
let indices = raw_indices.clone().execute::<PrimitiveArray>(ctx)?;
159160
let values = patches.values().clone().execute::<PrimitiveArray>(ctx)?;
160161
let validity = values.validity_mask()?;
161162
let values = values.as_slice::<T>();
@@ -166,7 +167,7 @@ pub fn apply_patches_to_uninit_range_fn<T: NativePType, F: Fn(T) -> T>(
166167
indices.as_slice::<P>(),
167168
values,
168169
validity,
169-
patches.offset(),
170+
offset,
170171
f,
171172
)
172173
});

encodings/fastlanes/src/bitpacking/compute/cast.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,11 @@ impl CastReduce for BitPacked {
2929
.patches()
3030
.map(|patches| {
3131
let new_values = patches.values().cast(dtype.clone())?;
32+
let (raw_indices, offset) = patches.raw_indices_and_offset();
3233
Patches::new(
3334
patches.array_len(),
34-
patches.offset(),
35-
patches.indices().clone(),
35+
offset,
36+
raw_indices.clone(),
3637
new_values,
3738
patches.chunk_offsets().clone(),
3839
)

encodings/fastlanes/src/bitpacking/vtable/mod.rs

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -173,12 +173,14 @@ impl VTable for BitPacked {
173173

174174
fn with_children(array: &mut Self::Array, children: Vec<ArrayRef>) -> VortexResult<()> {
175175
// Children: patches (if present): indices, values, chunk_offsets; then validity (if present)
176-
let patches_info = array
176+
let has_patches = array.patches().is_some();
177+
let has_chunk_offsets = array
177178
.patches()
178-
.map(|p| (p.offset(), p.chunk_offsets().is_some()));
179+
.map(|p| p.chunk_offsets().is_some())
180+
.unwrap_or(false);
179181

180182
let mut child_idx = 0;
181-
let patches = if let Some((patch_offset, has_chunk_offsets)) = patches_info {
183+
let patches = if has_patches {
182184
let patch_indices = children
183185
.get(child_idx)
184186
.ok_or_else(|| vortex_err!("Expected patch_indices child at index {}", child_idx))?
@@ -204,9 +206,11 @@ impl VTable for BitPacked {
204206
None
205207
};
206208

209+
// The indices child already has the offset embedded (via Binary(Sub) expression),
210+
// so we pass offset 0 to avoid double-wrapping.
207211
Some(Patches::new(
208212
array.len(),
209-
patch_offset,
213+
0,
210214
patch_indices,
211215
patch_values,
212216
patch_chunk_offsets,
@@ -317,7 +321,9 @@ impl VTable for BitPacked {
317321
.map(|dtype| children.get(2, &dtype, p.chunk_offsets_len() as usize))
318322
.transpose()?;
319323

320-
Patches::new(len, p.offset()?, indices, values, chunk_offsets)
324+
// The indices child already has the offset embedded (via Binary(Sub) expression),
325+
// so we pass offset 0 to avoid double-wrapping.
326+
Patches::new(len, 0, indices, values, chunk_offsets)
321327
})
322328
.transpose()?;
323329

encodings/runend/public-api.lock

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,10 +124,12 @@ pub fn vortex_runend::RunEndArray::into_parts(self) -> vortex_runend::RunEndArra
124124

125125
pub fn vortex_runend::RunEndArray::new(ends: vortex_array::array::ArrayRef, values: vortex_array::array::ArrayRef) -> Self
126126

127-
pub unsafe fn vortex_runend::RunEndArray::new_unchecked(ends: vortex_array::array::ArrayRef, values: vortex_array::array::ArrayRef, offset: usize, length: usize) -> Self
127+
pub unsafe fn vortex_runend::RunEndArray::new_unchecked(ends: vortex_array::array::ArrayRef, values: vortex_array::array::ArrayRef, length: usize) -> Self
128128

129129
pub fn vortex_runend::RunEndArray::offset(&self) -> usize
130130

131+
pub fn vortex_runend::RunEndArray::raw_ends_and_offset(&self) -> (&vortex_array::array::ArrayRef, usize)
132+
131133
pub fn vortex_runend::RunEndArray::try_new(ends: vortex_array::array::ArrayRef, values: vortex_array::array::ArrayRef) -> vortex_error::VortexResult<Self>
132134

133135
pub fn vortex_runend::RunEndArray::try_new_offset_length(ends: vortex_array::array::ArrayRef, values: vortex_array::array::ArrayRef, offset: usize, length: usize) -> vortex_error::VortexResult<Self>

0 commit comments

Comments
 (0)