Skip to content

Commit e16b87d

Browse files
authored
chore: add ordering tests for array_union (#4688)
1 parent 35b7619 commit e16b87d

1 file changed

Lines changed: 26 additions & 0 deletions

File tree

spark/src/test/resources/sql-tests/expressions/array/array_union.sql

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,32 @@ SELECT array_union(array(1), array(1)) FROM test_union_dups
8484
query
8585
SELECT array_union(array(1), array(2)) FROM test_union_dups
8686

87+
-- left-first-then-new-right ordering verification (issue: array_union ordering audit)
88+
-- Spark contract: array_union emits left-array elements first (deduped, in
89+
-- first-occurrence order), followed by right-array elements that are not
90+
-- already present. The cases below intentionally scramble element order so
91+
-- any deviation from "left order, then new right elements" surfaces in the
92+
-- result -- e.g. if the engine emitted in right-side order, sorted, or by
93+
-- some other strategy, these expectations would diverge from Spark.
94+
statement
95+
CREATE TABLE test_union_ordering(a array<int>, b array<int>) USING parquet
96+
97+
statement
98+
INSERT INTO test_union_ordering VALUES (array(5, 4, 3, 2, 1), array(1, 2, 3, 4, 5, 6, 7)), (array(3, 1, 4, 1, 5, 9, 2, 6), array(7, 5, 3, 8)), (array(10, 20, 30), array(30, 25, 20, 15, 10)), (array(1, 2, 3), array(0, -1, -2)), (array(7, 7, 7, 8, 8), array(9, 8, 7)), (array(2, 1, 2, 1, 2), array(3, 2, 1)), (array(100), array(99, 100, 98, 100, 97)), (array(2, 1), array(1, 2)), (array(1, 2), array(2, 1))
99+
100+
query
101+
SELECT a, b, array_union(a, b) FROM test_union_ordering
102+
103+
-- left longer than right (the array_intersect ordering caveat does not apply
104+
-- here because array_union always iterates left then right; verify explicitly)
105+
query
106+
SELECT array_union(array(5, 4, 3, 2, 1, 0), array(2, 1)) FROM test_union_ordering
107+
108+
-- right longer than left (the inverse of the above; would expose any
109+
-- "longest-side-first" probing strategy)
110+
query
111+
SELECT array_union(array(2, 1), array(5, 4, 3, 2, 1, 0)) FROM test_union_ordering
112+
87113
-- string arrays
88114
statement
89115
CREATE TABLE test_union_str(a array<string>, b array<string>) USING parquet

0 commit comments

Comments
 (0)