Skip to content

Commit aa68e4b

Browse files
authored
[Enhancement](udf) Reject bitmap, hll, and quantile_state in udf create (#63849)
Problem Summary: UDF creation currently allows `BITMAP`, `HLL`, and `QUANTILE_STATE` in function signatures, but these object types are not exposed to Java/Python UDF runtimes as first-class values. They are effectively bridged as opaque bytes, and marked unsupported in [doc](https://doris.apache.org/docs/dev/query-data/udf/python-user-defined-function#data-type-mapping)
1 parent 1e8e91d commit aa68e4b

5 files changed

Lines changed: 462 additions & 0 deletions

File tree

fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateFunctionCommand.java

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
import org.apache.doris.catalog.PrimitiveType;
3838
import org.apache.doris.catalog.ScalarFunction;
3939
import org.apache.doris.catalog.ScalarType;
40+
import org.apache.doris.catalog.StructField;
4041
import org.apache.doris.catalog.StructType;
4142
import org.apache.doris.catalog.Type;
4243
import org.apache.doris.common.AnalysisException;
@@ -346,6 +347,7 @@ private void analyzeCommon(ConnectContext ctx) throws AnalysisException {
346347
}
347348
if (binaryType == Function.BinaryType.JAVA_UDF) {
348349
FunctionUtil.checkEnableJavaUdf();
350+
checkUdfSupportedTypes();
349351
if (!isAggregate && !isTableFunction) {
350352
volatility = analyzeVolatility();
351353
}
@@ -363,6 +365,7 @@ private void analyzeCommon(ConnectContext ctx) throws AnalysisException {
363365
extractExpirationTime();
364366
} else if (binaryType == Function.BinaryType.PYTHON_UDF) {
365367
FunctionUtil.checkEnablePythonUdf();
368+
checkUdfSupportedTypes();
366369
if (!isAggregate && !isTableFunction) {
367370
volatility = analyzeVolatility();
368371
}
@@ -418,6 +421,36 @@ private static boolean validatePythonRuntimeVersion(String runtimeVersionString)
418421
return runtimeVersionString != null && PYTHON_VERSION_PATTERN.matcher(runtimeVersionString).matches();
419422
}
420423

424+
private void checkUdfSupportedTypes() throws AnalysisException {
425+
Type[] argTypes = argsDef.getArgTypes();
426+
for (int i = 0; i < argTypes.length; i++) {
427+
checkUdfSupportedType(argTypes[i], "argument " + (i + 1));
428+
}
429+
checkUdfSupportedType(returnType.toCatalogDataType(), "return");
430+
if (intermediateType != null) {
431+
checkUdfSupportedType(intermediateType.toCatalogDataType(), "intermediate");
432+
}
433+
}
434+
435+
private void checkUdfSupportedType(Type type, String typePosition) throws AnalysisException {
436+
// Reject bitmap/hll/quantile_state type
437+
if (type.isObjectStored()) {
438+
throw new AnalysisException(String.format(
439+
"%s does not support %s type %s", binaryType, typePosition, type.toSql()));
440+
}
441+
442+
if (type.isArrayType()) {
443+
checkUdfSupportedType(((ArrayType) type).getItemType(), typePosition + " element");
444+
} else if (type.isMapType()) {
445+
checkUdfSupportedType(((MapType) type).getKeyType(), typePosition + " key");
446+
checkUdfSupportedType(((MapType) type).getValueType(), typePosition + " value");
447+
} else if (type.isStructType()) {
448+
for (StructField field : ((StructType) type).getFields()) {
449+
checkUdfSupportedType(field.getType(), typePosition + " field " + field.getName());
450+
}
451+
}
452+
}
453+
421454
private Boolean parseBooleanFromProperties(String propertyString) throws AnalysisException {
422455
String valueOfString = properties.get(propertyString);
423456
if (valueOfString == null) {

fe/fe-core/src/test/java/org/apache/doris/catalog/CreateFunctionTest.java

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,35 @@ public void test() throws Exception {
130130
Assert.assertEquals(FunctionVolatility.VOLATILE, findFunction(db, "py_default").getVolatility());
131131
}
132132

133+
@Test
134+
public void testCreatePythonFunctionRejectsObjectTypes() throws Exception {
135+
ConnectContext ctx = UtFrameUtils.createDefaultCtx();
136+
createDatabase(ctx, "create database py_obj_type_db;");
137+
dorisAssert = new DorisAssert(ctx);
138+
dorisAssert.useDatabase("py_obj_type_db");
139+
140+
assertCreateFunctionAnalysisException(ctx, "create function py_obj_type_db.py_bitmap_arg(bitmap) returns int "
141+
+ "properties('type'='PYTHON_UDF', 'symbol'='evaluate', 'runtime_version'='3.10.2');",
142+
"PYTHON_UDF does not support argument 1 type bitmap");
143+
assertCreateFunctionAnalysisException(ctx, "create function py_obj_type_db.j_bitmap_arg(bitmap) returns int "
144+
+ "properties('type'='JAVA_UDF', 'symbol'='evaluate');",
145+
"JAVA_UDF does not support argument 1 type bitmap");
146+
assertCreateFunctionAnalysisException(ctx, "create function py_obj_type_db.py_hll_ret(int) returns hll "
147+
+ "properties('type'='PYTHON_UDF', 'symbol'='evaluate', 'runtime_version'='3.10.2');",
148+
"PYTHON_UDF does not support return type hll");
149+
assertCreateFunctionAnalysisException(ctx, "create aggregate function py_obj_type_db.py_quantile_arg"
150+
+ "(quantile_state) returns int properties('type'='PYTHON_UDF', 'symbol'='Agg', "
151+
+ "'runtime_version'='3.10.2');",
152+
"PYTHON_UDF does not support argument 1 type quantile_state");
153+
assertCreateFunctionAnalysisException(ctx, "create aggregate function py_obj_type_db.j_quantile_arg"
154+
+ "(quantile_state) returns int properties('type'='JAVA_UDF', 'symbol'='Agg');",
155+
"JAVA_UDF does not support argument 1 type quantile_state");
156+
assertCreateFunctionAnalysisException(ctx, "create tables function py_obj_type_db.py_bitmap_table(int) "
157+
+ "returns array<bitmap> properties('type'='PYTHON_UDF', 'symbol'='evaluate', "
158+
+ "'runtime_version'='3.10.2');",
159+
"ARRAY unsupported sub-type: bitmap");
160+
}
161+
133162
@Test
134163
public void testCreateGlobalFunction() throws Exception {
135164
ConnectContext ctx = UtFrameUtils.createDefaultCtx();
@@ -215,6 +244,12 @@ private void createFunction(String sql, ConnectContext connectContext) throws Ex
215244
}
216245
}
217246

247+
private void assertCreateFunctionAnalysisException(ConnectContext ctx, String sql, String message) {
248+
Exception exception = Assert.assertThrows(Exception.class, () -> createFunction(sql, ctx));
249+
Assert.assertTrue("Expected error to contain: " + message + ", actual: " + exception.getMessage(),
250+
exception.getMessage().contains(message));
251+
}
252+
218253
private boolean containsIgnoreCase(String str, String sub) {
219254
return str.toLowerCase().contains(sub.toLowerCase());
220255
}
Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
suite("test_pythonudaf_object_types_inline") {
19+
def runtime_version = getPythonUdfRuntimeVersion()
20+
21+
test {
22+
sql """
23+
CREATE AGGREGATE FUNCTION py_obj_udaf_bitmap_arg(bitmap)
24+
RETURNS BIGINT
25+
PROPERTIES (
26+
"type" = "PYTHON_UDF",
27+
"symbol" = "Agg",
28+
"runtime_version" = "${runtime_version}"
29+
)
30+
AS \$\$
31+
class Agg:
32+
def __init__(self):
33+
self.sum = 0
34+
def accumulate(self, v):
35+
pass
36+
def merge(self, other):
37+
pass
38+
def finish(self):
39+
return self.sum
40+
@property
41+
def aggregate_state(self):
42+
return self.sum
43+
\$\$;
44+
"""
45+
exception "does not support argument 1 type bitmap"
46+
}
47+
48+
test {
49+
sql """
50+
CREATE AGGREGATE FUNCTION py_obj_udaf_hll_ret(int)
51+
RETURNS HLL
52+
PROPERTIES (
53+
"type" = "PYTHON_UDF",
54+
"symbol" = "Agg",
55+
"runtime_version" = "${runtime_version}"
56+
)
57+
AS \$\$
58+
class Agg:
59+
def __init__(self):
60+
self.state = None
61+
def accumulate(self, v):
62+
pass
63+
def merge(self, other):
64+
pass
65+
def finish(self):
66+
return self.state
67+
@property
68+
def aggregate_state(self):
69+
return self.state
70+
\$\$;
71+
"""
72+
exception "does not support return type hll"
73+
}
74+
75+
test {
76+
sql """
77+
CREATE AGGREGATE FUNCTION py_obj_udaf_quantile_state(quantile_state)
78+
RETURNS BIGINT
79+
INTERMEDIATE BIGINT
80+
PROPERTIES (
81+
"type" = "PYTHON_UDF",
82+
"symbol" = "Agg",
83+
"runtime_version" = "${runtime_version}"
84+
)
85+
AS \$\$
86+
class Agg:
87+
def __init__(self):
88+
self.state = 0
89+
def accumulate(self, v):
90+
pass
91+
def merge(self, other):
92+
pass
93+
def finish(self):
94+
return self.state
95+
@property
96+
def aggregate_state(self):
97+
return self.state
98+
\$\$;
99+
"""
100+
exception "does not support argument 1 type quantile_state"
101+
}
102+
103+
test {
104+
sql """
105+
CREATE AGGREGATE FUNCTION py_obj_udaf_bitmap_intermediate(int)
106+
RETURNS BIGINT
107+
INTERMEDIATE BITMAP
108+
PROPERTIES (
109+
"type" = "PYTHON_UDF",
110+
"symbol" = "Agg",
111+
"runtime_version" = "${runtime_version}"
112+
)
113+
AS \$\$
114+
class Agg:
115+
def __init__(self):
116+
self.state = 0
117+
def accumulate(self, v):
118+
pass
119+
def merge(self, other):
120+
pass
121+
def finish(self):
122+
return self.state
123+
@property
124+
def aggregate_state(self):
125+
return self.state
126+
\$\$;
127+
"""
128+
exception "does not support intermediate type bitmap"
129+
}
130+
131+
test {
132+
sql """
133+
CREATE AGGREGATE FUNCTION py_obj_udaf_array_bitmap(int)
134+
RETURNS ARRAY<BITMAP>
135+
PROPERTIES (
136+
"type" = "PYTHON_UDF",
137+
"symbol" = "Agg",
138+
"runtime_version" = "${runtime_version}"
139+
)
140+
AS \$\$
141+
class Agg:
142+
def __init__(self):
143+
self.state = None
144+
def accumulate(self, v):
145+
pass
146+
def merge(self, other):
147+
pass
148+
def finish(self):
149+
return self.state
150+
@property
151+
def aggregate_state(self):
152+
return self.state
153+
\$\$;
154+
"""
155+
exception "ARRAY unsupported sub-type: bitmap"
156+
}
157+
158+
test {
159+
sql """
160+
CREATE AGGREGATE FUNCTION py_obj_udaf_struct_bitmap(int)
161+
RETURNS STRUCT<plain:INT, nested:MAP<INT, ARRAY<HLL>>>
162+
PROPERTIES (
163+
"type" = "PYTHON_UDF",
164+
"symbol" = "Agg",
165+
"runtime_version" = "${runtime_version}"
166+
)
167+
AS \$\$
168+
class Agg:
169+
def __init__(self):
170+
self.state = None
171+
def accumulate(self, v):
172+
pass
173+
def merge(self, other):
174+
pass
175+
def finish(self):
176+
return self.state
177+
@property
178+
def aggregate_state(self):
179+
return self.state
180+
\$\$;
181+
"""
182+
exception "ARRAY unsupported sub-type: hll"
183+
}
184+
}

0 commit comments

Comments
 (0)