From 1084291cec0405df02330807fb180c94041cf531 Mon Sep 17 00:00:00 2001 From: comphead Date: Thu, 18 Jun 2026 15:56:01 -0700 Subject: [PATCH] chore: add ordering tests for `array_union` --- .../expressions/array/array_union.sql | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/spark/src/test/resources/sql-tests/expressions/array/array_union.sql b/spark/src/test/resources/sql-tests/expressions/array/array_union.sql index fc16c7405c..3280f0d98b 100644 --- a/spark/src/test/resources/sql-tests/expressions/array/array_union.sql +++ b/spark/src/test/resources/sql-tests/expressions/array/array_union.sql @@ -84,6 +84,32 @@ SELECT array_union(array(1), array(1)) FROM test_union_dups query SELECT array_union(array(1), array(2)) FROM test_union_dups +-- left-first-then-new-right ordering verification (issue: array_union ordering audit) +-- Spark contract: array_union emits left-array elements first (deduped, in +-- first-occurrence order), followed by right-array elements that are not +-- already present. The cases below intentionally scramble element order so +-- any deviation from "left order, then new right elements" surfaces in the +-- result -- e.g. if the engine emitted in right-side order, sorted, or by +-- some other strategy, these expectations would diverge from Spark. +statement +CREATE TABLE test_union_ordering(a array, b array) USING parquet + +statement +INSERT INTO test_union_ordering VALUES (array(5, 4, 3, 2, 1), array(1, 2, 3, 4, 5, 6, 7)), (array(3, 1, 4, 1, 5, 9, 2, 6), array(7, 5, 3, 8)), (array(10, 20, 30), array(30, 25, 20, 15, 10)), (array(1, 2, 3), array(0, -1, -2)), (array(7, 7, 7, 8, 8), array(9, 8, 7)), (array(2, 1, 2, 1, 2), array(3, 2, 1)), (array(100), array(99, 100, 98, 100, 97)), (array(2, 1), array(1, 2)), (array(1, 2), array(2, 1)) + +query +SELECT a, b, array_union(a, b) FROM test_union_ordering + +-- left longer than right (the array_intersect ordering caveat does not apply +-- here because array_union always iterates left then right; verify explicitly) +query +SELECT array_union(array(5, 4, 3, 2, 1, 0), array(2, 1)) FROM test_union_ordering + +-- right longer than left (the inverse of the above; would expose any +-- "longest-side-first" probing strategy) +query +SELECT array_union(array(2, 1), array(5, 4, 3, 2, 1, 0)) FROM test_union_ordering + -- string arrays statement CREATE TABLE test_union_str(a array, b array) USING parquet