Skip to content

Commit 92520ac

Browse files
committed
[fix](fe) Reject COUNT DISTINCT on variant arguments
### What problem does this PR solve? Issue Number: close #25672 Related PR: None Problem Summary: COUNT(DISTINCT variant_subcolumn) could reach BE hash key selection and fail with a vague INTERNAL_ERROR when the argument was VARIANT. Reject VARIANT arguments during FE aggregate analysis and keep a clearer BE fallback for uncaught hash-key paths. ### Release note COUNT(DISTINCT ...) on VARIANT arguments now reports a clear unsupported-type error instead of a BE internal error. Cast VARIANT expressions to STRING or another supported scalar type before using COUNT DISTINCT. ### Check List (For Author) - Test: Unit Test - `./run-fe-ut.sh --run org.apache.doris.nereids.trees.expressions.functions.agg.CountTest` - Regression test added but not run because the new worktree does not have a built output cluster. - Behavior changed: Yes (COUNT DISTINCT on VARIANT now fails during analysis with a clearer error instead of a BE INTERNAL_ERROR) - Does this need documentation: No
1 parent 66dbb85 commit 92520ac

5 files changed

Lines changed: 113 additions & 4 deletions

File tree

be/src/exec/common/hash_table/hash_key_type.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,12 @@ inline HashKeyType get_hash_key_type(const std::vector<DataTypePtr>& data_types)
118118
t->get_primitive_type() == TYPE_JSONB) {
119119
return HashKeyType::string_key;
120120
}
121+
if (t->get_primitive_type() == TYPE_VARIANT) {
122+
throw Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
123+
"VARIANT type is not supported as a hash key. Cast the VARIANT "
124+
"expression to STRING or another supported scalar type before using "
125+
"it in DISTINCT, GROUP BY, JOIN, or other hash operations.");
126+
}
121127
throw Exception(ErrorCode::INTERNAL_ERROR, "meet invalid type, type={}", t->get_name());
122128
}
123129

@@ -140,4 +146,4 @@ inline HashKeyType get_hash_key_type(const std::vector<DataTypePtr>& data_types)
140146
}
141147
}
142148

143-
} // namespace doris
149+
} // namespace doris

fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Count.java

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,13 +92,24 @@ public void checkLegalityBeforeTypeCoercion() {
9292
public void checkLegalityAfterRewrite() {
9393
// after rewrite, count(distinct bitmap_column) should be rewritten to bitmap_union_count(bitmap_column)
9494
for (Expression argument : getArguments()) {
95-
if (distinct && (argument.getDataType().isComplexType()
96-
|| argument.getDataType().isObjectType() || argument.getDataType().isJsonType())) {
97-
throw new AnalysisException("COUNT DISTINCT could not process type " + this.toSql());
95+
if (distinct) {
96+
checkDistinctArgument(argument, this.toSql());
9897
}
9998
}
10099
}
101100

101+
static void checkDistinctArgument(Expression argument, String functionSql) {
102+
DataType argumentType = argument.getDataType();
103+
if (argumentType.isVariantType()) {
104+
throw new AnalysisException("COUNT DISTINCT does not support VARIANT argument in " + functionSql
105+
+ ". Cast the VARIANT expression to STRING or another supported scalar type before using "
106+
+ "COUNT DISTINCT.");
107+
}
108+
if (argumentType.isComplexType() || argumentType.isObjectType() || argumentType.isJsonType()) {
109+
throw new AnalysisException("COUNT DISTINCT could not process type " + functionSql);
110+
}
111+
}
112+
102113
public boolean isStar() {
103114
return isStar;
104115
}

fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/MultiDistinctCount.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,9 @@ private MultiDistinctCount(boolean distinct, List<Expression> children) {
5757
if (super.children().size() > 1) {
5858
throw new AnalysisException("MultiDistinctCount's children size must be 1");
5959
}
60+
for (Expression argument : super.children()) {
61+
Count.checkDistinctArgument(argument, "COUNT DISTINCT " + argument.toSql());
62+
}
6063
}
6164

6265
/** constructor for withChildren and reuse signature */
@@ -67,6 +70,9 @@ protected MultiDistinctCount(AggregateFunctionParams functionParams) {
6770
@Override
6871
public MultiDistinctCount withDistinctAndChildren(boolean distinct, List<Expression> children) {
6972
Preconditions.checkArgument(children.size() == 1, "MultiDistinctCount's children size must be 1");
73+
for (Expression argument : children) {
74+
Count.checkDistinctArgument(argument, "COUNT DISTINCT " + argument.toSql());
75+
}
7076
return new MultiDistinctCount(getFunctionParams(false, children));
7177
}
7278

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
package org.apache.doris.nereids.trees.expressions.functions.agg;
19+
20+
import org.apache.doris.nereids.exceptions.AnalysisException;
21+
import org.apache.doris.nereids.trees.expressions.SlotReference;
22+
import org.apache.doris.nereids.types.VariantType;
23+
24+
import org.junit.jupiter.api.Assertions;
25+
import org.junit.jupiter.api.Test;
26+
27+
class CountTest {
28+
@Test
29+
void testCountDistinctRejectsVariant() {
30+
Count count = new Count(true, SlotReference.of("v", VariantType.INSTANCE));
31+
32+
AnalysisException exception = Assertions.assertThrows(AnalysisException.class,
33+
count::checkLegalityAfterRewrite);
34+
Assertions.assertTrue(exception.getMessage().contains("COUNT DISTINCT does not support VARIANT argument"));
35+
Assertions.assertTrue(exception.getMessage().contains("Cast the VARIANT expression"));
36+
}
37+
38+
@Test
39+
void testMultiDistinctCountRejectsVariant() {
40+
AnalysisException exception = Assertions.assertThrows(AnalysisException.class,
41+
() -> new MultiDistinctCount(SlotReference.of("v", VariantType.INSTANCE)));
42+
Assertions.assertTrue(exception.getMessage().contains("COUNT DISTINCT does not support VARIANT argument"));
43+
Assertions.assertTrue(exception.getMessage().contains("Cast the VARIANT expression"));
44+
}
45+
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
suite("test_variant_count_distinct") {
19+
sql "DROP TABLE IF EXISTS test_variant_count_distinct_array_subcolumn"
20+
21+
sql """
22+
CREATE TABLE test_variant_count_distinct_array_subcolumn (
23+
id INT,
24+
v VARIANT
25+
) DUPLICATE KEY(id)
26+
DISTRIBUTED BY HASH(id) BUCKETS 1
27+
PROPERTIES("replication_num" = "1")
28+
"""
29+
30+
sql """
31+
INSERT INTO test_variant_count_distinct_array_subcolumn VALUES
32+
(1, '{"arr":[1,2,3]}'),
33+
(2, '{"arr":[4,5]}'),
34+
(3, '{"arr":[1,2,3]}')
35+
"""
36+
37+
test {
38+
sql "SELECT COUNT(DISTINCT v['arr']) FROM test_variant_count_distinct_array_subcolumn"
39+
exception "COUNT DISTINCT does not support VARIANT argument"
40+
}
41+
}

0 commit comments

Comments
 (0)