Skip to content

Commit ccc67e9

Browse files
authored
feat: fix windows frame positive/neg overflows (#22140)
## Which issue does this PR close? <!-- We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. For example `Closes #123` indicates that this PR will close issue #123. --> - Closes #22137 . ## Rationale for this change `RANGE` window frames with a value offset (e.g. `RANGE BETWEEN 4 PRECEDING AND 4 FOLLOWING`) panicked with `attempt to add/subtract with overflow` whenever the boundary target (`value ± delta`) wrapped past the type's representable range. Affected inputs include values close to `i64::MAX`/`i64::MIN`, `u64::MAX`, and any analogous boundary for other integer/decimal/timestamp types. Two `// TODO: Handle ... overflows.` markers in `WindowFrameStateRange::calculate_index_of_row` had been left for this case; the unchecked `ScalarValue::add` / `sub` silently wrapped the target, after which `search_in_slice` was handed a nonsensical (wrapped) value and downstream code tripped a debug-assert subtraction in `functions-window/src/nth_value.rs`. Semantically, an overflowed boundary is *unbounded* with respect to the data in the partition — every real value lies strictly inside the wrapped sentinel — so the correct behavior is to collapse the search to the appropriate partition edge rather than to search with a wrapped target. ## What changes are included in this PR? `datafusion/expr/src/window_state.rs` - Replace `ScalarValue::add` / `sub` with their `*_checked` counterparts in the boundary computation. - On overflow, short-circuit to the correct partition edge: `search_start` for `PRECEDING`-direction searches, `length` for `FOLLOWING`-direction searches. The collapse direction depends only on the const-generic `SEARCH_SIDE` (the add branch and sub branch both reduce to `!SEARCH_SIDE` once you expand the `SEARCH_SIDE == is_descending` invariant that selects each arithmetic branch). - The pre-existing `value.is_unsigned() && value < delta` clamp-to-zero path for unsigned subtraction is preserved — it produces a valid polymorphic zero, not an overflow sentinel. - No behavior change on the non-overflow path. `datafusion/sqllogictest/test_files/window.slt` Regression coverage for positive and negative overflow, across: - `ASC` + `FOLLOWING` / `ASC` + `PRECEDING` / `DESC` + `PRECEDING` / `DESC` + `FOLLOWING` (each overflow direction occurs on both sort orders depending on which arithmetic branch is taken) - Symmetric `N PRECEDING AND N FOLLOWING` frames where only one side overflows - Signed (`i64`) and unsigned (`u64`) ordering columns - `first_value` and `last_value` both exercised to verify both frame edges - `ROWS` frame regression guard to document that the pre-existing `saturating_sub` / `min(length)` saturation behavior is unchanged.
1 parent 74c4c64 commit ccc67e9

2 files changed

Lines changed: 254 additions & 23 deletions

File tree

datafusion/expr/src/window_state.rs

Lines changed: 34 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,11 @@ impl WindowFrameStateRange {
396396
length: usize,
397397
) -> Result<usize> {
398398
let current_row_values = get_row_at_idx(range_columns, idx)?;
399+
let search_start = if SIDE {
400+
last_range.start
401+
} else {
402+
last_range.end
403+
};
399404
let end_range = if let Some(delta) = delta {
400405
let is_descending: bool = self
401406
.sort_options
@@ -407,34 +412,40 @@ impl WindowFrameStateRange {
407412
})?
408413
.descending;
409414

410-
current_row_values
411-
.iter()
412-
.map(|value| {
413-
if value.is_null() {
414-
return Ok(value.clone());
415+
// On overflow the boundary exceeds the type's range and is
416+
// effectively unbounded within the partition. Collapse to the
417+
// partition edge rather than feeding `search_in_slice` a
418+
// wrapped-around target: PRECEDING searches reach `search_start`,
419+
// FOLLOWING searches reach `length`.
420+
let unbounded_edge = if SEARCH_SIDE { search_start } else { length };
421+
let mut targets = Vec::with_capacity(current_row_values.len());
422+
for value in &current_row_values {
423+
if value.is_null() {
424+
targets.push(value.clone());
425+
continue;
426+
}
427+
let target = if SEARCH_SIDE == is_descending {
428+
match value.add_checked(delta) {
429+
Ok(v) => v,
430+
Err(_) => return Ok(unbounded_edge),
415431
}
416-
if SEARCH_SIDE == is_descending {
417-
// TODO: Handle positive overflows.
418-
value.add(delta)
419-
} else if value.is_unsigned() && value < delta {
420-
// NOTE: This gets a polymorphic zero without having long coercion code for ScalarValue.
421-
// If we decide to implement a "default" construction mechanism for ScalarValue,
422-
// change the following statement to use that.
423-
value.sub(value)
424-
} else {
425-
// TODO: Handle negative overflows.
426-
value.sub(delta)
432+
} else if value.is_unsigned() && value < delta {
433+
// NOTE: This gets a polymorphic zero without having long coercion code for ScalarValue.
434+
// If we decide to implement a "default" construction mechanism for ScalarValue,
435+
// change the following statement to use that.
436+
value.sub(value)?
437+
} else {
438+
match value.sub_checked(delta) {
439+
Ok(v) => v,
440+
Err(_) => return Ok(unbounded_edge),
427441
}
428-
})
429-
.collect::<Result<Vec<ScalarValue>>>()?
442+
};
443+
targets.push(target);
444+
}
445+
targets
430446
} else {
431447
current_row_values
432448
};
433-
let search_start = if SIDE {
434-
last_range.start
435-
} else {
436-
last_range.end
437-
};
438449
let compare_fn = |current: &[ScalarValue], target: &[ScalarValue]| {
439450
let cmp = compare_rows(current, target, &self.sort_options)?;
440451
Ok(if SIDE { cmp.is_lt() } else { cmp.is_le() })

datafusion/sqllogictest/test_files/window.slt

Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6236,6 +6236,226 @@ INNER JOIN issue_20194_t2 t2
62366236
----
62376237
6774502793 10040029 1
62386238

6239+
# Regression tests for RANGE window frames whose value-offset boundary
6240+
# computation overflows the type's representable range. Previously these
6241+
# queries panicked in functions-window/src/nth_value.rs with
6242+
# "attempt to subtract with overflow" because the wrapped-around target
6243+
# produced a frame range where `end < start`. Both positive overflows
6244+
# (target above type MAX) and negative overflows (target below type MIN)
6245+
# must be treated as unbounded within the partition.
6246+
6247+
############################################################################
6248+
# Positive overflow: value + delta exceeds type MAX
6249+
############################################################################
6250+
6251+
# ASC + FOLLOWING: end bound wraps past i64::MAX.
6252+
query II
6253+
SELECT a, last_value(a) OVER (ORDER BY a RANGE BETWEEN CURRENT ROW AND 4 FOLLOWING)
6254+
FROM (
6255+
SELECT 9223372036854775804 AS a
6256+
UNION ALL SELECT 9223372036854775805
6257+
UNION ALL SELECT 9223372036854775806
6258+
);
6259+
----
6260+
9223372036854775804 9223372036854775806
6261+
9223372036854775805 9223372036854775806
6262+
9223372036854775806 9223372036854775806
6263+
6264+
query II
6265+
SELECT a, first_value(a) OVER (ORDER BY a RANGE BETWEEN CURRENT ROW AND 4 FOLLOWING)
6266+
FROM (
6267+
SELECT 9223372036854775804 AS a
6268+
UNION ALL SELECT 9223372036854775805
6269+
UNION ALL SELECT 9223372036854775806
6270+
);
6271+
----
6272+
9223372036854775804 9223372036854775804
6273+
9223372036854775805 9223372036854775805
6274+
9223372036854775806 9223372036854775806
6275+
6276+
# Symmetric PRECEDING/FOLLOWING where the FOLLOWING side overflows past MAX.
6277+
query II
6278+
SELECT a, last_value(a) OVER (ORDER BY a RANGE BETWEEN 4 PRECEDING AND 4 FOLLOWING)
6279+
FROM (
6280+
SELECT 9223372036854775804 AS a
6281+
UNION ALL SELECT 9223372036854775805
6282+
UNION ALL SELECT 9223372036854775806
6283+
);
6284+
----
6285+
9223372036854775804 9223372036854775806
6286+
9223372036854775805 9223372036854775806
6287+
9223372036854775806 9223372036854775806
6288+
6289+
# DESC + PRECEDING: "PRECEDING" walks toward larger values in DESC order,
6290+
# so offsetting past i64::MAX exercises the ADD-overflow path.
6291+
query II
6292+
SELECT a, first_value(a) OVER (ORDER BY a DESC RANGE BETWEEN 4 PRECEDING AND CURRENT ROW)
6293+
FROM (
6294+
SELECT 9223372036854775804 AS a
6295+
UNION ALL SELECT 9223372036854775805
6296+
UNION ALL SELECT 9223372036854775806
6297+
);
6298+
----
6299+
9223372036854775806 9223372036854775806
6300+
9223372036854775805 9223372036854775806
6301+
9223372036854775804 9223372036854775806
6302+
6303+
query II
6304+
SELECT a, last_value(a) OVER (ORDER BY a DESC RANGE BETWEEN 4 PRECEDING AND CURRENT ROW)
6305+
FROM (
6306+
SELECT 9223372036854775804 AS a
6307+
UNION ALL SELECT 9223372036854775805
6308+
UNION ALL SELECT 9223372036854775806
6309+
);
6310+
----
6311+
9223372036854775806 9223372036854775806
6312+
9223372036854775805 9223372036854775805
6313+
9223372036854775804 9223372036854775804
6314+
6315+
# Unsigned ordering column: add past u64::MAX must not wrap.
6316+
query II
6317+
SELECT a, last_value(a) OVER (ORDER BY a RANGE BETWEEN CURRENT ROW AND 4 FOLLOWING)
6318+
FROM (
6319+
SELECT arrow_cast(18446744073709551612, 'UInt64') AS a
6320+
UNION ALL SELECT arrow_cast(18446744073709551613, 'UInt64')
6321+
UNION ALL SELECT arrow_cast(18446744073709551614, 'UInt64')
6322+
);
6323+
----
6324+
18446744073709551612 18446744073709551614
6325+
18446744073709551613 18446744073709551614
6326+
18446744073709551614 18446744073709551614
6327+
6328+
query II
6329+
SELECT a, first_value(a) OVER (ORDER BY a RANGE BETWEEN CURRENT ROW AND 4 FOLLOWING)
6330+
FROM (
6331+
SELECT arrow_cast(18446744073709551612, 'UInt64') AS a
6332+
UNION ALL SELECT arrow_cast(18446744073709551613, 'UInt64')
6333+
UNION ALL SELECT arrow_cast(18446744073709551614, 'UInt64')
6334+
);
6335+
----
6336+
18446744073709551612 18446744073709551612
6337+
18446744073709551613 18446744073709551613
6338+
18446744073709551614 18446744073709551614
6339+
6340+
############################################################################
6341+
# Negative overflow: value - delta falls below type MIN
6342+
############################################################################
6343+
6344+
# ASC + PRECEDING: start bound wraps below i64::MIN.
6345+
query II
6346+
SELECT a, first_value(a) OVER (ORDER BY a RANGE BETWEEN 4 PRECEDING AND CURRENT ROW)
6347+
FROM (
6348+
SELECT -9223372036854775807 AS a
6349+
UNION ALL SELECT -9223372036854775806
6350+
UNION ALL SELECT -9223372036854775805
6351+
);
6352+
----
6353+
-9223372036854775807 -9223372036854775807
6354+
-9223372036854775806 -9223372036854775807
6355+
-9223372036854775805 -9223372036854775807
6356+
6357+
query II
6358+
SELECT a, last_value(a) OVER (ORDER BY a RANGE BETWEEN 4 PRECEDING AND CURRENT ROW)
6359+
FROM (
6360+
SELECT -9223372036854775807 AS a
6361+
UNION ALL SELECT -9223372036854775806
6362+
UNION ALL SELECT -9223372036854775805
6363+
);
6364+
----
6365+
-9223372036854775807 -9223372036854775807
6366+
-9223372036854775806 -9223372036854775806
6367+
-9223372036854775805 -9223372036854775805
6368+
6369+
# Symmetric PRECEDING/FOLLOWING where the PRECEDING side underflows past MIN.
6370+
query II
6371+
SELECT a, first_value(a) OVER (ORDER BY a RANGE BETWEEN 4 PRECEDING AND 4 FOLLOWING)
6372+
FROM (
6373+
SELECT -9223372036854775807 AS a
6374+
UNION ALL SELECT -9223372036854775806
6375+
UNION ALL SELECT -9223372036854775805
6376+
);
6377+
----
6378+
-9223372036854775807 -9223372036854775807
6379+
-9223372036854775806 -9223372036854775807
6380+
-9223372036854775805 -9223372036854775807
6381+
6382+
# DESC + FOLLOWING: "FOLLOWING" walks toward smaller values in DESC order,
6383+
# so offsetting past i64::MIN exercises the SUB-underflow path.
6384+
query II
6385+
SELECT a, last_value(a) OVER (ORDER BY a DESC RANGE BETWEEN CURRENT ROW AND 4 FOLLOWING)
6386+
FROM (
6387+
SELECT -9223372036854775805 AS a
6388+
UNION ALL SELECT -9223372036854775806
6389+
UNION ALL SELECT -9223372036854775807
6390+
);
6391+
----
6392+
-9223372036854775805 -9223372036854775807
6393+
-9223372036854775806 -9223372036854775807
6394+
-9223372036854775807 -9223372036854775807
6395+
6396+
query II
6397+
SELECT a, first_value(a) OVER (ORDER BY a DESC RANGE BETWEEN CURRENT ROW AND 4 FOLLOWING)
6398+
FROM (
6399+
SELECT -9223372036854775805 AS a
6400+
UNION ALL SELECT -9223372036854775806
6401+
UNION ALL SELECT -9223372036854775807
6402+
);
6403+
----
6404+
-9223372036854775805 -9223372036854775805
6405+
-9223372036854775806 -9223372036854775806
6406+
-9223372036854775807 -9223372036854775807
6407+
6408+
# Unsigned ordering column: subtracting an offset that would go below 0
6409+
# must saturate to 0, not wrap to u64::MAX.
6410+
query II
6411+
SELECT a, first_value(a) OVER (ORDER BY a RANGE BETWEEN 4 PRECEDING AND CURRENT ROW)
6412+
FROM (
6413+
SELECT arrow_cast(1, 'UInt64') AS a
6414+
UNION ALL SELECT arrow_cast(2, 'UInt64')
6415+
UNION ALL SELECT arrow_cast(3, 'UInt64')
6416+
);
6417+
----
6418+
1 1
6419+
2 1
6420+
3 1
6421+
6422+
query II
6423+
SELECT a, last_value(a) OVER (ORDER BY a RANGE BETWEEN 4 PRECEDING AND CURRENT ROW)
6424+
FROM (
6425+
SELECT arrow_cast(1, 'UInt64') AS a
6426+
UNION ALL SELECT arrow_cast(2, 'UInt64')
6427+
UNION ALL SELECT arrow_cast(3, 'UInt64')
6428+
);
6429+
----
6430+
1 1
6431+
2 2
6432+
3 3
6433+
6434+
############################################################################
6435+
# ROWS frame regression guard: huge offsets already saturate via
6436+
# saturating_sub / min(length), verify we keep that behavior.
6437+
############################################################################
6438+
6439+
query II
6440+
SELECT a, last_value(a) OVER (ORDER BY a ROWS BETWEEN CURRENT ROW AND 9223372036854775807 FOLLOWING)
6441+
FROM (
6442+
SELECT 1 AS a UNION ALL SELECT 2 UNION ALL SELECT 3
6443+
);
6444+
----
6445+
1 3
6446+
2 3
6447+
3 3
6448+
6449+
query II
6450+
SELECT a, first_value(a) OVER (ORDER BY a ROWS BETWEEN 9223372036854775807 PRECEDING AND CURRENT ROW)
6451+
FROM (
6452+
SELECT 1 AS a UNION ALL SELECT 2 UNION ALL SELECT 3
6453+
);
6454+
----
6455+
1 1
6456+
2 1
6457+
3 1
6458+
62396459
# Config reset
62406460
statement ok
62416461
reset datafusion.execution.batch_size;

0 commit comments

Comments
 (0)