Skip to content

Commit 4f1bbbd

Browse files
Guard array_resize target size
1 parent b17847d commit 4f1bbbd

2 files changed

Lines changed: 72 additions & 10 deletions

File tree

datafusion/functions-nested/src/resize.rs

Lines changed: 69 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,20 +24,20 @@ use arrow::array::{
2424
};
2525
use arrow::buffer::OffsetBuffer;
2626
use arrow::datatypes::DataType;
27-
use arrow::datatypes::{ArrowNativeType, Field};
27+
use arrow::datatypes::Field;
2828
use arrow::datatypes::{
2929
DataType::{LargeList, List},
3030
FieldRef,
3131
};
3232
use datafusion_common::cast::{as_int64_array, as_large_list_array, as_list_array};
3333
use datafusion_common::utils::ListCoercion;
34-
use datafusion_common::{Result, ScalarValue, exec_err, internal_datafusion_err};
34+
use datafusion_common::{Result, ScalarValue, exec_err};
3535
use datafusion_expr::{
3636
ArrayFunctionArgument, ArrayFunctionSignature, ColumnarValue, Documentation,
3737
ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, Volatility,
3838
};
3939
use datafusion_macros::user_doc;
40-
use std::sync::Arc;
40+
use std::{mem::size_of, sync::Arc};
4141

4242
make_udf_expr_and_func!(
4343
ArrayResize,
@@ -206,18 +206,20 @@ fn general_list_resize<O: OffsetSizeTrait + TryInto<i64>>(
206206
if array.is_null(row_index) {
207207
continue;
208208
}
209-
let target_count = count_array.value(row_index).to_usize().ok_or_else(|| {
210-
internal_datafusion_err!("array_resize: failed to convert size to usize")
211-
})?;
209+
let target_count = target_count::<O>(count_array, row_index)?;
212210
output_values_len =
213211
output_values_len.checked_add(target_count).ok_or_else(|| {
214-
internal_datafusion_err!("array_resize: output size overflow")
212+
datafusion_common::DataFusionError::Execution(
213+
"array_resize: target size too large".to_string(),
214+
)
215215
})?;
216216
let current_len = (offset_window[1] - offset_window[0]).to_usize().unwrap();
217217
if target_count > current_len {
218218
max_extra = max_extra.max(target_count - current_len);
219219
}
220220
}
221+
validate_value_capacity(&data_type, output_values_len)?;
222+
validate_value_capacity(&data_type, max_extra)?;
221223

222224
// The fast path is valid when at least one row grows and every row would
223225
// use the same fill value.
@@ -315,9 +317,7 @@ where
315317
}
316318
null_builder.append_non_null();
317319

318-
let count = count_array.value(row_index).to_usize().ok_or_else(|| {
319-
internal_datafusion_err!("array_resize: failed to convert size to usize")
320-
})?;
320+
let count = target_count::<O>(count_array, row_index)?;
321321
let count = O::usize_as(count);
322322
let start = offset_window[0];
323323
if start + count > offset_window[1] {
@@ -341,3 +341,62 @@ where
341341
null_builder.finish(),
342342
)?))
343343
}
344+
345+
fn target_count<O: OffsetSizeTrait>(
346+
count_array: &Int64Array,
347+
row_index: usize,
348+
) -> Result<usize> {
349+
let count = count_array.value(row_index);
350+
if count < 0 {
351+
return exec_err!("array_resize: size must be non-negative");
352+
}
353+
354+
let count = count as usize;
355+
if O::from_usize(count).is_none() {
356+
return exec_err!("array_resize: target size too large");
357+
}
358+
359+
Ok(count)
360+
}
361+
362+
fn validate_value_capacity(data_type: &DataType, len: usize) -> Result<()> {
363+
let width = minimum_value_width(data_type);
364+
let Some(byte_len) = len.checked_mul(width) else {
365+
return exec_err!("array_resize: target size too large");
366+
};
367+
if byte_len >= isize::MAX as usize {
368+
return exec_err!("array_resize: target size too large");
369+
}
370+
371+
Ok(())
372+
}
373+
374+
fn minimum_value_width(data_type: &DataType) -> usize {
375+
match data_type {
376+
DataType::Boolean | DataType::Null => 1,
377+
DataType::Utf8 | DataType::Binary | List(_) => size_of::<i32>(),
378+
DataType::LargeUtf8 | DataType::LargeBinary | LargeList(_) => size_of::<i64>(),
379+
_ => data_type.primitive_width().unwrap_or(1).max(1),
380+
}
381+
}
382+
383+
#[cfg(test)]
384+
mod tests {
385+
use super::*;
386+
use arrow::array::ListArray;
387+
use arrow::datatypes::Int64Type;
388+
use datafusion_common::assert_contains;
389+
390+
#[test]
391+
fn array_resize_rejects_target_count_overflow() {
392+
let list = Arc::new(ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
393+
Some(vec![Some(1)]),
394+
])) as ArrayRef;
395+
let count = Arc::new(Int64Array::from(vec![i64::MAX])) as ArrayRef;
396+
let fill = Arc::new(Int64Array::from(vec![0])) as ArrayRef;
397+
398+
let err = array_resize_inner(&[list, count, fill]).unwrap_err();
399+
400+
assert_contains!(err.to_string(), "array_resize: target size too large");
401+
}
402+
}

datafusion/sqllogictest/test_files/array/array_resize.slt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ select array_resize(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 5, 4);
6464
query error
6565
select array_resize(make_array(1, 2, 3), -5, 2);
6666

67+
query error DataFusion error: Execution error: array_resize: target size too large
68+
select array_resize(make_array(1), 9223372036854775807, 0);
69+
6770
# array_resize scalar function #5
6871
query ?
6972
select array_resize(make_array(1.1, 2.2, 3.3), 10, 9.9);

0 commit comments

Comments
 (0)