Skip to content

Commit 089b6a5

Browse files
authored
chore(audit): audit Average and expand tests (#4439)
1 parent a08cb4e commit 089b6a5

3 files changed

Lines changed: 109 additions & 2 deletions

File tree

docs/source/contributor-guide/spark_expressions_support.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@
4343
- [ ] approx_top_k_combine
4444
- [ ] array_agg
4545
- [x] avg
46+
- Spark 3.4.3 (2026-05-26)
47+
- Spark 3.5.8 (2026-05-26): aggregate logic identical to 3.4.3
48+
- Spark 4.0.1 (2026-05-26): aggregate logic identical to 3.5.8; only `QueryContext` import path differs. `YearMonthIntervalType` and `DayTimeIntervalType` inputs (supported by Spark) fall back to Spark in Comet.
4649
- [x] bit_and
4750
- Spark 3.4.3 (2026-05-26)
4851
- Spark 3.5.8 (2026-05-26)

spark/src/main/scala/org/apache/comet/serde/aggregates.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -155,8 +155,8 @@ object CometCount extends CometAggregateExpressionSerde[Count] {
155155

156156
object CometAverage extends CometAggregateExpressionSerde[Average] {
157157

158-
override def getIncompatibleReasons(): Seq[String] = Seq(
159-
"Falls back to Spark in ANSI mode. Supports all numeric inputs except decimal types.")
158+
override def getUnsupportedReasons(): Seq[String] = Seq(
159+
"YearMonthIntervalType and DayTimeIntervalType inputs are not supported")
160160

161161
override def convert(
162162
aggExpr: AggregateExpression,

spark/src/test/resources/sql-tests/expressions/aggregate/avg.sql

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,107 @@ SELECT avg(i), avg(l), avg(f), avg(d) FROM test_avg
2626

2727
query tolerance=1e-6
2828
SELECT grp, avg(d) FROM test_avg GROUP BY grp ORDER BY grp
29+
30+
-- single-row group (count == 1)
31+
query tolerance=1e-6
32+
SELECT grp, avg(d) FROM test_avg WHERE i = 3 GROUP BY grp
33+
34+
-- byte and short input types
35+
statement
36+
CREATE TABLE test_avg_small(b tinyint, s smallint, grp string) USING parquet
37+
38+
statement
39+
INSERT INTO test_avg_small VALUES (1, 100, 'a'), (2, 200, 'a'), (3, 300, 'b'), (NULL, NULL, 'b'), (-1, -100, 'a')
40+
41+
query tolerance=1e-6
42+
SELECT avg(b), avg(s) FROM test_avg_small
43+
44+
query tolerance=1e-6
45+
SELECT grp, avg(b), avg(s) FROM test_avg_small GROUP BY grp ORDER BY grp
46+
47+
-- all-NULL input returns NULL
48+
statement
49+
CREATE TABLE test_avg_all_null(v double, grp string) USING parquet
50+
51+
statement
52+
INSERT INTO test_avg_all_null VALUES (NULL, 'a'), (NULL, 'a'), (NULL, 'b')
53+
54+
query
55+
SELECT avg(v) FROM test_avg_all_null
56+
57+
query
58+
SELECT grp, avg(v) FROM test_avg_all_null GROUP BY grp ORDER BY grp
59+
60+
-- empty input (no rows) returns NULL
61+
statement
62+
CREATE TABLE test_avg_empty(v double) USING parquet
63+
64+
query
65+
SELECT avg(v) FROM test_avg_empty
66+
67+
-- NaN and infinity input on doubles
68+
statement
69+
CREATE TABLE test_avg_special(v double, grp string) USING parquet
70+
71+
statement
72+
INSERT INTO test_avg_special VALUES
73+
(double('NaN'), 'nan_only'),
74+
(1.0, 'nan_only'),
75+
(double('Infinity'), 'pos_inf_only'),
76+
(1.0, 'pos_inf_only'),
77+
(double('Infinity'), 'mixed_inf'),
78+
(double('-Infinity'), 'mixed_inf'),
79+
(double('-Infinity'), 'neg_inf_only'),
80+
(-2.0, 'neg_inf_only')
81+
82+
query tolerance=1e-6
83+
SELECT grp, avg(v) FROM test_avg_special GROUP BY grp ORDER BY grp
84+
85+
-- boundary integer values
86+
statement
87+
CREATE TABLE test_avg_bounds(l long, grp string) USING parquet
88+
89+
statement
90+
INSERT INTO test_avg_bounds VALUES
91+
(9223372036854775807, 'maxes'),
92+
(9223372036854775807, 'maxes'),
93+
(-9223372036854775808, 'mins'),
94+
(-9223372036854775808, 'mins'),
95+
(9223372036854775807, 'mixed'),
96+
(-9223372036854775808, 'mixed')
97+
98+
query tolerance=1e-6
99+
SELECT grp, avg(l) FROM test_avg_bounds GROUP BY grp ORDER BY grp
100+
101+
-- negative-only inputs
102+
statement
103+
CREATE TABLE test_avg_negative(d double) USING parquet
104+
105+
statement
106+
INSERT INTO test_avg_negative VALUES (-1.5), (-2.5), (-3.5), (-0.0)
107+
108+
query tolerance=1e-6
109+
SELECT avg(d) FROM test_avg_negative
110+
111+
-- decimal column at higher precision
112+
statement
113+
CREATE TABLE test_avg_decimal(d decimal(20, 5), grp string) USING parquet
114+
115+
statement
116+
INSERT INTO test_avg_decimal VALUES
117+
(10.50000, 'a'),
118+
(20.25000, 'a'),
119+
(NULL, 'a'),
120+
(-5.00000, 'b'),
121+
(0.00000, 'b'),
122+
(5.00000, 'b')
123+
124+
query
125+
SELECT avg(d) FROM test_avg_decimal
126+
127+
query
128+
SELECT grp, avg(d) FROM test_avg_decimal GROUP BY grp ORDER BY grp
129+
130+
-- count(*) and avg in the same query for cross-check
131+
query tolerance=1e-6
132+
SELECT count(d), avg(d) FROM test_avg_decimal

0 commit comments

Comments
 (0)