Skip to content

Commit 72d39fb

Browse files
abnobdossAbanoub Doss
andauthored
Fix patch_chunk index OOB when slicing ALP arrays mid-chunk (#7354)
## Summary `patch_chunk` panics with index-out-of-bounds when decompressing sliced ALP arrays with patches. The slice boundary must fall mid-chunk (non-1024-aligned) to trigger it. **Root cause:** `Patches::slice()` slices `chunk_offsets` at chunk granularity but `indices`/`values` at element granularity. When a slice ends mid-chunk, `patches_end_idx` can exceed `patches_indices.len()`. The start index already used `saturating_sub` — the end index didn't. **Fix:** `.saturating_sub(offset_within_chunk).min(patches_indices.len())` on the end index computation in `patch_chunk`, `search_index_chunked`, and `search_index_chunked_batch`. **AI disclosure:** Root cause analysis, fix implementation, and test writing were done with Claude Code under my direction and review. I identified the bug during development benchmarking at 100M rows and verified the fix against my workload. ## Testing - Unit test calling `patch_chunk` with inputs that OOB without the fix. - Integration test: ALP encode 3-chunk array with patches, slice mid-chunk, decode. Tests both start-aligned and mid-chunk-start variants. - All existing tests pass (2538 vortex-array, 167 vortex-alp). --------- Signed-off-by: Abanoub Doss <abanoub.doss@gmail.com> Co-authored-by: Abanoub Doss <abanoub.doss@gmail.com>
1 parent 957eb3a commit 72d39fb

3 files changed

Lines changed: 82 additions & 8 deletions

File tree

encodings/alp/src/alp/compress.rs

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -546,4 +546,44 @@ mod tests {
546546

547547
assert_arrays_eq!(decoded, array);
548548
}
549+
550+
/// Regression test for patch_chunk index-out-of-bounds when slicing a multi-chunk
551+
/// ALP array mid-chunk with patches in the trailing chunk.
552+
///
553+
/// The bug: chunk_offsets are sliced at chunk granularity (1024-row boundaries)
554+
/// but patches indices/values are sliced at element granularity. When a slice ends
555+
/// mid-chunk, patches_end_idx could exceed patches_indices.len(), causing OOB panic
556+
/// during decompression.
557+
#[test]
558+
fn test_slice_mid_chunk_with_patches_in_trailing_chunk() {
559+
// 3 chunks (3072 elements), patches scattered across all chunks.
560+
let mut values = vec![1.0f64; 3072];
561+
// Chunk 0 patches (indices 0..1024)
562+
values[100] = PI;
563+
values[500] = E;
564+
// Chunk 1 patches (indices 1024..2048)
565+
values[1100] = PI;
566+
values[1500] = E;
567+
values[1900] = PI;
568+
// Chunk 2 patches (indices 2048..3072)
569+
values[2100] = PI;
570+
values[2500] = E;
571+
values[2900] = PI;
572+
573+
let original = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
574+
let encoded = alp_encode(&original, None).unwrap();
575+
assert!(encoded.patches().is_some());
576+
577+
// Slice ending mid-chunk-2 (element 2500 is inside chunk 2 = 2048..3072).
578+
// This creates a mismatch: chunk_offsets includes the full chunk 2 offset,
579+
// but patches_indices only includes patches up to element 2500.
580+
let sliced_alp = encoded.slice(0..2500).unwrap();
581+
let expected = original.slice(0..2500).unwrap();
582+
assert_arrays_eq!(sliced_alp, expected);
583+
584+
// Also test slicing that starts mid-chunk (both start and end mid-chunk).
585+
let sliced_alp = encoded.slice(500..2500).unwrap();
586+
let expected = original.slice(500..2500).unwrap();
587+
assert_arrays_eq!(sliced_alp, expected);
588+
}
549589
}

vortex-array/src/arrays/primitive/array/patch.rs

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,8 +111,12 @@ pub fn patch_chunk<T, I, C>(
111111
// Use the same logic as patches slice implementation for calculating patch ranges.
112112
let patches_start_idx =
113113
(chunk_offsets_slice[chunk_idx].as_() - base_offset).saturating_sub(offset_within_chunk);
114+
// Clamp: chunk_offsets are sliced at chunk granularity but patches at element
115+
// granularity, so the next chunk offset may exceed the actual patches length.
114116
let patches_end_idx = if chunk_idx + 1 < chunk_offsets_slice.len() {
115-
chunk_offsets_slice[chunk_idx + 1].as_() - base_offset - offset_within_chunk
117+
(chunk_offsets_slice[chunk_idx + 1].as_() - base_offset)
118+
.saturating_sub(offset_within_chunk)
119+
.min(patches_indices.len())
116120
} else {
117121
patches_indices.len()
118122
};
@@ -135,6 +139,37 @@ mod tests {
135139
use crate::assert_arrays_eq;
136140
use crate::validity::Validity;
137141

142+
/// Regression: patch_chunk must not OOB when chunk_offsets (chunk granularity)
143+
/// reference more patches than patches_indices (element granularity) contains.
144+
#[test]
145+
fn patch_chunk_no_oob_on_mid_chunk_slice() {
146+
let mut decoded_values = vec![0.0f64; PATCH_CHUNK_SIZE];
147+
// 10 patches, but chunk_offsets claim 15 exist past offset adjustment.
148+
let patches_indices: Vec<u64> = (0..10)
149+
.map(|i| (PATCH_CHUNK_SIZE as u64) + i * 10)
150+
.collect();
151+
let patches_values: Vec<f64> = (0..10).map(|i| (i + 1) as f64 * 100.0).collect();
152+
// chunk_offsets [5, 12, 20]: for chunk_idx=1 with offset_within_chunk=3,
153+
// unclamped end = (20-5)-3 = 12, which exceeds patches len of 10.
154+
let chunk_offsets: Vec<u32> = vec![5, 12, 20];
155+
156+
patch_chunk(
157+
&mut decoded_values,
158+
&patches_indices,
159+
&patches_values,
160+
0,
161+
&chunk_offsets,
162+
1,
163+
3,
164+
);
165+
166+
// Spot-check: patch index 4 (first in range) should be applied.
167+
assert_ne!(
168+
decoded_values[usize::try_from(patches_indices[4]).unwrap() - PATCH_CHUNK_SIZE],
169+
0.0
170+
);
171+
}
172+
138173
#[test]
139174
fn patch_sliced() {
140175
let input = PrimitiveArray::new(buffer![2u32; 10], Validity::AllValid);

vortex-array/src/patches.rs

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -460,7 +460,9 @@ impl Patches {
460460
.saturating_sub(offset_within_chunk);
461461

462462
let patches_end_idx = if chunk_idx < chunk_offsets.len() - 1 {
463-
self.chunk_offset_at(chunk_idx + 1)? - base_offset - offset_within_chunk
463+
(self.chunk_offset_at(chunk_idx + 1)? - base_offset)
464+
.saturating_sub(offset_within_chunk)
465+
.min(self.indices.len())
464466
} else {
465467
self.indices.len()
466468
};
@@ -522,13 +524,10 @@ impl Patches {
522524
.saturating_sub(offset_within_chunk);
523525

524526
let patches_end_idx = if chunk_idx < chunk_offsets.len() - 1 {
525-
let base_offset_end = chunk_offsets[chunk_idx + 1];
526-
527-
let offset_within_chunk = O::from(offset_within_chunk)
528-
.ok_or_else(|| vortex_err!("offset_within_chunk failed to convert to O"))?;
529-
530-
usize::try_from(base_offset_end - chunk_offsets[0] - offset_within_chunk)
527+
usize::try_from(chunk_offsets[chunk_idx + 1] - chunk_offsets[0])
531528
.map_err(|_| vortex_err!("patches_end_idx failed to convert to usize"))?
529+
.saturating_sub(offset_within_chunk)
530+
.min(indices.len())
532531
} else {
533532
self.indices.len()
534533
};

0 commit comments

Comments
 (0)