Skip to content

Commit 3ac0e81

Browse files
committed
fix[vortex-array]: fix overflow on FSL element take indices
The take index types used on an FSL might be valid narrow types. This type was used as indices to take the inner elements, which could result in an overflow with long enough lists. Signed-off-by: Alfonso Subiotto Marques <alfonso.subiotto@polarsignals.com>
1 parent d0ed3fc commit 3ac0e81

2 files changed

Lines changed: 49 additions & 11 deletions

File tree

vortex-array/src/arrays/fixed_size_list/compute/take.rs

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ use crate::arrays::dict::TakeExecute;
1717
use crate::dtype::IntegerPType;
1818
use crate::executor::ExecutionCtx;
1919
use crate::match_each_integer_ptype;
20+
use crate::match_smallest_offset_type;
2021
use crate::validity::Validity;
2122
use crate::vtable::ValidityHelper;
2223

@@ -26,20 +27,24 @@ use crate::vtable::ValidityHelper;
2627
/// that elements start at offset 0 and be perfectly packed without gaps. We expand list indices
2728
/// into element indices and push them down to the child elements array.
2829
impl TakeExecute for FixedSizeList {
30+
#[expect(clippy::cognitive_complexity)]
2931
fn take(
3032
array: &FixedSizeListArray,
3133
indices: &ArrayRef,
3234
ctx: &mut ExecutionCtx,
3335
) -> VortexResult<Option<ArrayRef>> {
36+
let max_element_idx = array.elements().len();
3437
match_each_integer_ptype!(indices.dtype().as_ptype(), |I| {
35-
take_with_indices::<I>(array, indices, ctx)
38+
match_smallest_offset_type!(max_element_idx, |E| {
39+
take_with_indices::<I, E>(array, indices, ctx)
40+
})
3641
})
3742
.map(Some)
3843
}
3944
}
4045

4146
/// Dispatches to the appropriate take implementation based on list size and nullability.
42-
fn take_with_indices<I: IntegerPType>(
47+
fn take_with_indices<I: IntegerPType, E: IntegerPType>(
4348
array: &FixedSizeListArray,
4449
indices: &ArrayRef,
4550
ctx: &mut ExecutionCtx,
@@ -74,15 +79,15 @@ fn take_with_indices<I: IntegerPType>(
7479
} else {
7580
// The result's nullability is the union of the input nullabilities.
7681
if array.dtype().is_nullable() || indices_array.dtype().is_nullable() {
77-
take_nullable_fsl::<I>(array, &indices_array)
82+
take_nullable_fsl::<I, E>(array, &indices_array)
7883
} else {
79-
take_non_nullable_fsl::<I>(array, &indices_array)
84+
take_non_nullable_fsl::<I, E>(array, &indices_array)
8085
}
8186
}
8287
}
8388

8489
/// Takes from an array when both the array and indices are non-nullable.
85-
fn take_non_nullable_fsl<I: IntegerPType>(
90+
fn take_non_nullable_fsl<I: IntegerPType, E: IntegerPType>(
8691
array: &FixedSizeListArray,
8792
indices_array: &PrimitiveArray,
8893
) -> VortexResult<ArrayRef> {
@@ -91,7 +96,7 @@ fn take_non_nullable_fsl<I: IntegerPType>(
9196
let new_len = indices.len();
9297

9398
// Build the element indices directly without validity tracking.
94-
let mut elements_indices = BufferMut::<I>::with_capacity(new_len * list_size);
99+
let mut elements_indices = BufferMut::<E>::with_capacity(new_len * list_size);
95100

96101
// Build the element indices for each list.
97102
for data_idx in indices {
@@ -106,7 +111,7 @@ fn take_non_nullable_fsl<I: IntegerPType>(
106111
for i in list_start..list_end {
107112
// SAFETY: We've allocated enough space for enough indices for all `new_len` lists (that each consist of `list_size = list_end - list_start` elements), so we know we have enough capacity.
108113
unsafe {
109-
elements_indices.push_unchecked(I::from_usize(i).vortex_expect("i < list_end"))
114+
elements_indices.push_unchecked(E::from_usize(i).vortex_expect("i < list_end"))
110115
};
111116
}
112117
}
@@ -131,7 +136,7 @@ fn take_non_nullable_fsl<I: IntegerPType>(
131136
}
132137

133138
/// Takes from an array when either the array or indices are nullable.
134-
fn take_nullable_fsl<I: IntegerPType>(
139+
fn take_nullable_fsl<I: IntegerPType, E: IntegerPType>(
135140
array: &FixedSizeListArray,
136141
indices_array: &PrimitiveArray,
137142
) -> VortexResult<ArrayRef> {
@@ -144,7 +149,7 @@ fn take_nullable_fsl<I: IntegerPType>(
144149

145150
// We must use placeholder zeros for null lists to maintain the array length without
146151
// propagating nullability to the element array's take operation.
147-
let mut elements_indices = BufferMut::<I>::with_capacity(new_len * list_size);
152+
let mut elements_indices = BufferMut::<E>::with_capacity(new_len * list_size);
148153
let mut new_validity_builder = BitBufferMut::with_capacity(new_len);
149154

150155
// Build the element indices while tracking which lists are null.
@@ -159,7 +164,7 @@ fn take_nullable_fsl<I: IntegerPType>(
159164
if !is_index_valid || !array_validity.value(data_idx) {
160165
// Append placeholder zeros for null lists. These will be masked by the validity array.
161166
// We cannot use append_nulls here as explained above.
162-
unsafe { elements_indices.push_n_unchecked(I::zero(), list_size) };
167+
unsafe { elements_indices.push_n_unchecked(E::zero(), list_size) };
163168
new_validity_builder.append(false);
164169
} else {
165170
// Append the actual element indices for this list.
@@ -170,7 +175,7 @@ fn take_nullable_fsl<I: IntegerPType>(
170175
for i in list_start..list_end {
171176
// SAFETY: We've allocated enough space for enough indices for all `new_len` lists (that each consist of `list_size = list_end - list_start` elements), so we know we have enough capacity.
172177
unsafe {
173-
elements_indices.push_unchecked(I::from_usize(i).vortex_expect("i < list_end"))
178+
elements_indices.push_unchecked(E::from_usize(i).vortex_expect("i < list_end"))
174179
};
175180
}
176181

vortex-array/src/arrays/fixed_size_list/tests/take.rs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use super::common::create_empty_fsl;
99
use super::common::create_large_fsl;
1010
use super::common::create_nullable_fsl;
1111
use super::common::create_single_element_fsl;
12+
use crate::ArrayRef;
1213
use crate::DynArray;
1314
use crate::IntoArray;
1415
use crate::arrays::FixedSizeListArray;
@@ -130,6 +131,38 @@ fn test_take_fsl_with_null_indices_preserves_elements() {
130131
assert_arrays_eq!(expected, result);
131132
}
132133

134+
// Element index overflow: with u8 indices and list_size=16, data_idx=16 produces element index
135+
// 16*16=256 which overflows u8. The take kernel must widen the element index type.
136+
#[rstest]
137+
#[case::non_nullable(
138+
FixedSizeListArray::new(
139+
buffer![0u8; 320].into_array(), 16, Validity::NonNullable, 20,
140+
),
141+
buffer![0u8, 16, 19].into_array(),
142+
FixedSizeListArray::new(
143+
buffer![0u8; 48].into_array(), 16, Validity::NonNullable, 3,
144+
),
145+
)]
146+
#[case::nullable(
147+
FixedSizeListArray::new(
148+
buffer![0u8; 320].into_array(), 16,
149+
Validity::from_iter((0..20).map(|i| i != 5)), 20,
150+
),
151+
buffer![0u8, 16, 5].into_array(),
152+
FixedSizeListArray::new(
153+
buffer![0u8; 48].into_array(), 16,
154+
Validity::from_iter([true, true, false]), 3,
155+
),
156+
)]
157+
fn test_element_index_overflow(
158+
#[case] fsl: FixedSizeListArray,
159+
#[case] indices: ArrayRef,
160+
#[case] expected: FixedSizeListArray,
161+
) {
162+
let result = fsl.take(indices.to_array()).unwrap();
163+
assert_arrays_eq!(expected, result);
164+
}
165+
133166
// Parameterized test for nullable array scenarios that are specific to FSL's implementation.
134167
#[rstest]
135168
#[case::nullable_mixed_elements(

0 commit comments

Comments
 (0)