Skip to content

Commit f008bc2

Browse files
authored
feat: add native support for greatest and least expressions (#4274)
1 parent d275007 commit f008bc2

4 files changed

Lines changed: 426 additions & 2 deletions

File tree

docs/source/contributor-guide/spark_expressions_support.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -379,10 +379,10 @@
379379
- [x] expm1
380380
- [ ] factorial
381381
- [x] floor
382-
- [ ] greatest
382+
- [x] greatest
383383
- [x] hex
384384
- [ ] hypot
385-
- [ ] least
385+
- [x] least
386386
- [x] ln
387387
- [x] log
388388
- [x] log10

spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,9 +108,11 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim {
108108
classOf[Exp] -> CometScalarFunction("exp"),
109109
classOf[Expm1] -> CometScalarFunction("expm1"),
110110
classOf[Floor] -> CometFloor,
111+
classOf[Greatest] -> CometScalarFunction("greatest"),
111112
classOf[Hex] -> CometHex,
112113
classOf[IntegralDivide] -> CometIntegralDivide,
113114
classOf[IsNaN] -> CometIsNaN,
115+
classOf[Least] -> CometScalarFunction("least"),
114116
classOf[Log] -> CometLog,
115117
classOf[Log2] -> CometLog2,
116118
classOf[Log10] -> CometLog10,
Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
-- Licensed to the Apache Software Foundation (ASF) under one
2+
-- or more contributor license agreements. See the NOTICE file
3+
-- distributed with this work for additional information
4+
-- regarding copyright ownership. The ASF licenses this file
5+
-- to you under the Apache License, Version 2.0 (the
6+
-- "License"); you may not use this file except in compliance
7+
-- with the License. You may obtain a copy of the License at
8+
--
9+
-- http://www.apache.org/licenses/LICENSE-2.0
10+
--
11+
-- Unless required by applicable law or agreed to in writing,
12+
-- software distributed under the License is distributed on an
13+
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
-- KIND, either express or implied. See the License for the
15+
-- specific language governing permissions and limitations
16+
-- under the License.
17+
18+
statement
19+
CREATE TABLE test_greatest(a int, b int, c int) USING parquet
20+
21+
statement
22+
INSERT INTO test_greatest VALUES
23+
(1, 2, 3),
24+
(3, 2, 1),
25+
(NULL, 2, 3),
26+
(1, NULL, 3),
27+
(1, 2, NULL),
28+
(NULL, NULL, 3),
29+
(NULL, NULL, NULL),
30+
(-1, 0, 1),
31+
(2147483647, -2147483648, 0)
32+
33+
-- column arguments
34+
query
35+
SELECT greatest(a, b, c) FROM test_greatest
36+
37+
-- two column arguments
38+
query
39+
SELECT greatest(a, b) FROM test_greatest
40+
41+
-- literal arguments
42+
query
43+
SELECT greatest(1, 2, 3), greatest(3, 2, 1), greatest(-1, 0, 1)
44+
45+
-- NULL literal handling
46+
query
47+
SELECT greatest(NULL, 2, 3), greatest(1, NULL, 3), greatest(NULL, NULL, NULL)
48+
49+
-- mixed column and literal
50+
query
51+
SELECT greatest(a, 0) FROM test_greatest
52+
53+
statement
54+
CREATE TABLE test_greatest_types(f float, d double, s string, dt date) USING parquet
55+
56+
statement
57+
INSERT INTO test_greatest_types VALUES
58+
(1.5, 2.5, 'apple', DATE '2023-01-01'),
59+
(-1.5, -2.5, 'banana', DATE '2023-06-15'),
60+
(0.0, 0.0, 'cherry', DATE '2023-12-31'),
61+
(NULL, NULL, NULL, NULL),
62+
(cast('NaN' as float), 1.0, 'a', DATE '2020-01-01'),
63+
(cast('Infinity' as float), cast('-Infinity' as double), 'z', DATE '2099-12-31')
64+
65+
-- floating point with NaN and Infinity
66+
query
67+
SELECT greatest(f, 0.0) FROM test_greatest_types
68+
69+
query
70+
SELECT greatest(d, 0.0) FROM test_greatest_types
71+
72+
-- string type
73+
query
74+
SELECT greatest(s, 'b') FROM test_greatest_types
75+
76+
-- date type
77+
query
78+
SELECT greatest(dt, DATE '2023-06-01') FROM test_greatest_types
79+
80+
-- literal floating point edge cases
81+
query
82+
SELECT greatest(cast('NaN' as float), 1.0), greatest(cast('NaN' as float), cast('NaN' as float))
83+
84+
query
85+
SELECT greatest(cast('Infinity' as double), 1.0), greatest(cast('-Infinity' as double), 1.0)
86+
87+
88+
statement
89+
CREATE TABLE test_greatest_long(a bigint, b bigint) USING parquet
90+
91+
statement
92+
INSERT INTO test_greatest_long VALUES
93+
(1, 2),
94+
(-1, 1),
95+
(9223372036854775807, -9223372036854775808),
96+
(NULL, 100),
97+
(NULL, NULL)
98+
99+
-- long/bigint type
100+
query
101+
SELECT greatest(a, b) FROM test_greatest_long
102+
103+
query
104+
SELECT greatest(cast(1 as bigint), cast(-1 as bigint), cast(9223372036854775807 as bigint))
105+
106+
statement
107+
CREATE TABLE test_greatest_decimal(a decimal(20,5), b decimal(20,5)) USING parquet
108+
109+
statement
110+
INSERT INTO test_greatest_decimal VALUES
111+
(1.00000, 2.00000),
112+
(-99999.99999, 99999.99999),
113+
(0.00001, -0.00001),
114+
(NULL, 123.45000),
115+
(NULL, NULL)
116+
117+
-- decimal type
118+
query
119+
SELECT greatest(a, b) FROM test_greatest_decimal
120+
121+
query
122+
SELECT greatest(cast(1.5 as decimal(10,2)), cast(2.5 as decimal(10,2)), cast(-1.0 as decimal(10,2)))
123+
124+
statement
125+
CREATE TABLE test_greatest_bool(a boolean, b boolean) USING parquet
126+
127+
statement
128+
INSERT INTO test_greatest_bool VALUES
129+
(true, false),
130+
(false, true),
131+
(true, true),
132+
(false, false),
133+
(NULL, true),
134+
(NULL, NULL)
135+
136+
-- boolean type
137+
query
138+
SELECT greatest(a, b) FROM test_greatest_bool
139+
140+
query
141+
SELECT greatest(true, false), greatest(false, false), greatest(true, true)
142+
143+
statement
144+
CREATE TABLE test_greatest_ts(a timestamp, b timestamp) USING parquet
145+
146+
statement
147+
INSERT INTO test_greatest_ts VALUES
148+
(TIMESTAMP '2023-01-01 08:00:00', TIMESTAMP '2023-01-01 10:00:00'),
149+
(TIMESTAMP '2020-06-15 12:30:00', TIMESTAMP '2020-06-15 12:29:59'),
150+
(NULL, TIMESTAMP '2023-01-01 00:00:00'),
151+
(NULL, NULL)
152+
153+
-- timestamp type
154+
query
155+
SELECT greatest(a, b) FROM test_greatest_ts
156+
157+
query
158+
SELECT greatest(TIMESTAMP '2015-07-01 08:00:00', TIMESTAMP '2015-07-01 10:00:00')
159+
160+
-- many arguments (5+)
161+
query
162+
SELECT greatest(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
163+
164+
query
165+
SELECT greatest(a, b, c, a, b) FROM test_greatest
166+
167+
-- array type (lexicographic ordering)
168+
statement
169+
CREATE TABLE test_greatest_array(a array<int>, b array<int>) USING parquet
170+
171+
statement
172+
INSERT INTO test_greatest_array VALUES
173+
(array(1, 2, 3), array(1, 2, 4)),
174+
(array(1, 2), array(1, 2, 0)),
175+
(array(3), array(2, 9, 9)),
176+
(array(1, 1), array(1, 1)),
177+
(NULL, array(1)),
178+
(array(1), NULL),
179+
(NULL, NULL)
180+
181+
query
182+
SELECT greatest(a, b) FROM test_greatest_array
183+
184+
query
185+
SELECT greatest(array(1, 2), array(1, 3), array(1, 1))
186+
187+
query
188+
SELECT greatest(array('b', 'a'), array('a', 'z'))
189+
190+
-- struct type (field-by-field ordering)
191+
statement
192+
CREATE TABLE test_greatest_struct(a struct<x:int, y:string>, b struct<x:int, y:string>) USING parquet
193+
194+
statement
195+
INSERT INTO test_greatest_struct VALUES
196+
(named_struct('x', 1, 'y', 'a'), named_struct('x', 2, 'y', 'a')),
197+
(named_struct('x', 1, 'y', 'b'), named_struct('x', 1, 'y', 'a')),
198+
(named_struct('x', 3, 'y', 'z'), named_struct('x', 3, 'y', 'z')),
199+
(NULL, named_struct('x', 1, 'y', 'a')),
200+
(named_struct('x', 1, 'y', 'a'), NULL),
201+
(NULL, NULL)
202+
203+
query
204+
SELECT greatest(a, b) FROM test_greatest_struct
205+
206+
query
207+
SELECT greatest(named_struct('x', 1, 'y', 'b'), named_struct('x', 1, 'y', 'a'))
208+
209+
-- nested complex type: array of structs
210+
query
211+
SELECT greatest(array(named_struct('x', 1)), array(named_struct('x', 2)))

0 commit comments

Comments
 (0)