Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,32 @@ SELECT array_union(array(1), array(1)) FROM test_union_dups
query
SELECT array_union(array(1), array(2)) FROM test_union_dups

-- left-first-then-new-right ordering verification (issue: array_union ordering audit)
-- Spark contract: array_union emits left-array elements first (deduped, in
-- first-occurrence order), followed by right-array elements that are not
-- already present. The cases below intentionally scramble element order so
-- any deviation from "left order, then new right elements" surfaces in the
-- result -- e.g. if the engine emitted in right-side order, sorted, or by
-- some other strategy, these expectations would diverge from Spark.
statement
CREATE TABLE test_union_ordering(a array<int>, b array<int>) USING parquet

statement
INSERT INTO test_union_ordering VALUES (array(5, 4, 3, 2, 1), array(1, 2, 3, 4, 5, 6, 7)), (array(3, 1, 4, 1, 5, 9, 2, 6), array(7, 5, 3, 8)), (array(10, 20, 30), array(30, 25, 20, 15, 10)), (array(1, 2, 3), array(0, -1, -2)), (array(7, 7, 7, 8, 8), array(9, 8, 7)), (array(2, 1, 2, 1, 2), array(3, 2, 1)), (array(100), array(99, 100, 98, 100, 97)), (array(2, 1), array(1, 2)), (array(1, 2), array(2, 1))

query
SELECT a, b, array_union(a, b) FROM test_union_ordering

-- left longer than right (the array_intersect ordering caveat does not apply
-- here because array_union always iterates left then right; verify explicitly)
query
SELECT array_union(array(5, 4, 3, 2, 1, 0), array(2, 1)) FROM test_union_ordering

-- right longer than left (the inverse of the above; would expose any
-- "longest-side-first" probing strategy)
query
SELECT array_union(array(2, 1), array(5, 4, 3, 2, 1, 0)) FROM test_union_ordering

-- string arrays
statement
CREATE TABLE test_union_str(a array<string>, b array<string>) USING parquet
Expand Down
Loading