diff --git a/datafusion/functions-nested/src/position.rs b/datafusion/functions-nested/src/position.rs index d65620ede38e6..3ea2dc1f90928 100644 --- a/datafusion/functions-nested/src/position.rs +++ b/datafusion/functions-nested/src/position.rs @@ -207,17 +207,23 @@ fn resolve_start_from( match third_arg { None => Ok(vec![0i64; num_rows]), Some(ColumnarValue::Scalar(ScalarValue::Int64(Some(v)))) => { - Ok(vec![v - 1; num_rows]) + Ok(vec![normalize_start_from(*v)?; num_rows]) } Some(ColumnarValue::Scalar(s)) => { exec_err!("array_position expected Int64 for start_from, got {s}") } - Some(ColumnarValue::Array(a)) => { - Ok(as_int64_array(a)?.values().iter().map(|&x| x - 1).collect()) - } + Some(ColumnarValue::Array(a)) => as_int64_array(a)? + .values() + .iter() + .map(|&x| normalize_start_from(x)) + .collect(), } } +fn normalize_start_from(start_from: i64) -> Result { + Ok(start_from.saturating_sub(1)) +} + /// Fast path for `array_position` when the needle is scalar. /// /// Performs a single bulk `not_distinct` comparison of the needle against the @@ -309,8 +315,8 @@ fn general_position_dispatch(args: &[ArrayRef]) -> Result>() + .map(|&x| normalize_start_from(x)) + .collect::>>()? } else { vec![0; haystack.len()] }; @@ -592,9 +598,29 @@ fn array_positions_scalar( mod tests { use super::*; use arrow::array::AsArray; - use arrow::datatypes::Int32Type; + use arrow::array::Int64Array; + use arrow::datatypes::{Int32Type, Int64Type}; use datafusion_common::config::ConfigOptions; + #[test] + fn test_array_position_start_from_min_value() -> Result<()> { + let haystack = Arc::new(ListArray::from_iter_primitive::(vec![ + Some(vec![Some(1)]), + ])) as ArrayRef; + let needle = Arc::new(Int64Array::from(vec![1])) as ArrayRef; + let start_from = Arc::new(Int64Array::from(vec![i64::MIN])) as ArrayRef; + + let err = array_position_inner(&[haystack, needle, start_from]) + .unwrap_err() + .to_string(); + assert!( + err.contains("start_from out of bounds: -9223372036854775807"), + "unexpected error: {err}" + ); + + Ok(()) + } + #[test] fn test_array_position_sliced_list() -> Result<()> { // [[10, 20], [30, 40], [50, 60], [70, 80]] → slice(1,2) → [[30, 40], [50, 60]] diff --git a/datafusion/sqllogictest/test_files/array/array_position.slt b/datafusion/sqllogictest/test_files/array/array_position.slt index 07e3d3143592c..b591c12048d76 100644 --- a/datafusion/sqllogictest/test_files/array/array_position.slt +++ b/datafusion/sqllogictest/test_files/array/array_position.slt @@ -282,6 +282,9 @@ select array_position([1, 2, 3], 3, 4), array_position([1], 1, 2); ---- NULL NULL +query error DataFusion error: Execution error: start_from out of bounds: -9223372036854775807 +SELECT array_position([1], 1, -9223372036854775808); + # array_position with empty array in various contexts query II select array_position(arrow_cast(make_array(), 'List(Int64)'), 1), array_position(arrow_cast(make_array(), 'LargeList(Int64)'), 1);