@@ -24,20 +24,20 @@ use arrow::array::{
2424} ;
2525use arrow:: buffer:: OffsetBuffer ;
2626use arrow:: datatypes:: DataType ;
27- use arrow:: datatypes:: { ArrowNativeType , Field } ;
27+ use arrow:: datatypes:: Field ;
2828use arrow:: datatypes:: {
2929 DataType :: { LargeList , List } ,
3030 FieldRef ,
3131} ;
3232use datafusion_common:: cast:: { as_int64_array, as_large_list_array, as_list_array} ;
3333use datafusion_common:: utils:: ListCoercion ;
34- use datafusion_common:: { Result , ScalarValue , exec_err, internal_datafusion_err } ;
34+ use datafusion_common:: { Result , ScalarValue , exec_err} ;
3535use datafusion_expr:: {
3636 ArrayFunctionArgument , ArrayFunctionSignature , ColumnarValue , Documentation ,
3737 ScalarFunctionArgs , ScalarUDFImpl , Signature , TypeSignature , Volatility ,
3838} ;
3939use datafusion_macros:: user_doc;
40- use std:: sync:: Arc ;
40+ use std:: { mem :: size_of , sync:: Arc } ;
4141
4242make_udf_expr_and_func ! (
4343 ArrayResize ,
@@ -206,18 +206,20 @@ fn general_list_resize<O: OffsetSizeTrait + TryInto<i64>>(
206206 if array. is_null ( row_index) {
207207 continue ;
208208 }
209- let target_count = count_array. value ( row_index) . to_usize ( ) . ok_or_else ( || {
210- internal_datafusion_err ! ( "array_resize: failed to convert size to usize" )
211- } ) ?;
209+ let target_count = target_count :: < O > ( count_array, row_index) ?;
212210 output_values_len =
213211 output_values_len. checked_add ( target_count) . ok_or_else ( || {
214- internal_datafusion_err ! ( "array_resize: output size overflow" )
212+ datafusion_common:: DataFusionError :: Execution (
213+ "array_resize: target size too large" . to_string ( ) ,
214+ )
215215 } ) ?;
216216 let current_len = ( offset_window[ 1 ] - offset_window[ 0 ] ) . to_usize ( ) . unwrap ( ) ;
217217 if target_count > current_len {
218218 max_extra = max_extra. max ( target_count - current_len) ;
219219 }
220220 }
221+ validate_value_capacity ( & data_type, output_values_len) ?;
222+ validate_value_capacity ( & data_type, max_extra) ?;
221223
222224 // The fast path is valid when at least one row grows and every row would
223225 // use the same fill value.
@@ -315,9 +317,7 @@ where
315317 }
316318 null_builder. append_non_null ( ) ;
317319
318- let count = count_array. value ( row_index) . to_usize ( ) . ok_or_else ( || {
319- internal_datafusion_err ! ( "array_resize: failed to convert size to usize" )
320- } ) ?;
320+ let count = target_count :: < O > ( count_array, row_index) ?;
321321 let count = O :: usize_as ( count) ;
322322 let start = offset_window[ 0 ] ;
323323 if start + count > offset_window[ 1 ] {
@@ -341,3 +341,62 @@ where
341341 null_builder. finish ( ) ,
342342 ) ?) )
343343}
344+
345+ fn target_count < O : OffsetSizeTrait > (
346+ count_array : & Int64Array ,
347+ row_index : usize ,
348+ ) -> Result < usize > {
349+ let count = count_array. value ( row_index) ;
350+ if count < 0 {
351+ return exec_err ! ( "array_resize: size must be non-negative" ) ;
352+ }
353+
354+ let count = count as usize ;
355+ if O :: from_usize ( count) . is_none ( ) {
356+ return exec_err ! ( "array_resize: target size too large" ) ;
357+ }
358+
359+ Ok ( count)
360+ }
361+
362+ fn validate_value_capacity ( data_type : & DataType , len : usize ) -> Result < ( ) > {
363+ let width = minimum_value_width ( data_type) ;
364+ let Some ( byte_len) = len. checked_mul ( width) else {
365+ return exec_err ! ( "array_resize: target size too large" ) ;
366+ } ;
367+ if byte_len >= isize:: MAX as usize {
368+ return exec_err ! ( "array_resize: target size too large" ) ;
369+ }
370+
371+ Ok ( ( ) )
372+ }
373+
374+ fn minimum_value_width ( data_type : & DataType ) -> usize {
375+ match data_type {
376+ DataType :: Boolean | DataType :: Null => 1 ,
377+ DataType :: Utf8 | DataType :: Binary | List ( _) => size_of :: < i32 > ( ) ,
378+ DataType :: LargeUtf8 | DataType :: LargeBinary | LargeList ( _) => size_of :: < i64 > ( ) ,
379+ _ => data_type. primitive_width ( ) . unwrap_or ( 1 ) . max ( 1 ) ,
380+ }
381+ }
382+
383+ #[ cfg( test) ]
384+ mod tests {
385+ use super :: * ;
386+ use arrow:: array:: ListArray ;
387+ use arrow:: datatypes:: Int64Type ;
388+ use datafusion_common:: assert_contains;
389+
390+ #[ test]
391+ fn array_resize_rejects_target_count_overflow ( ) {
392+ let list = Arc :: new ( ListArray :: from_iter_primitive :: < Int64Type , _ , _ > ( vec ! [
393+ Some ( vec![ Some ( 1 ) ] ) ,
394+ ] ) ) as ArrayRef ;
395+ let count = Arc :: new ( Int64Array :: from ( vec ! [ i64 :: MAX ] ) ) as ArrayRef ;
396+ let fill = Arc :: new ( Int64Array :: from ( vec ! [ 0 ] ) ) as ArrayRef ;
397+
398+ let err = array_resize_inner ( & [ list, count, fill] ) . unwrap_err ( ) ;
399+
400+ assert_contains ! ( err. to_string( ) , "array_resize: target size too large" ) ;
401+ }
402+ }
0 commit comments