Skip to content

Commit 0ff1eec

Browse files
authored
Register LENGTH, REGEXP_REPLACE, DATE_TRUNC in unified function spec (opensearch-project#5419)
* feat(api): Register LENGTH, REGEXP_REPLACE, DATE_TRUNC in operator table Add FunctionSpecBuilder DSL with three construction paths: delegateTo() for existing Calcite operators, vararg() for pushdown-only UDFs, and operands() for typed functions with optional late-binding impl. Register LENGTH, REGEXP_REPLACE, and DATE_TRUNC in UnifiedFunctionSpec LIBRARY category. Contribute via CoreExtension registered in UnifiedSqlSpec.extended(). This unblocks ClickBench q28 (LENGTH), q29 (REGEXP_REPLACE), and q43 (DATE_TRUNC) at the SQL Plugin parsing/validation layer. Signed-off-by: Chen Dai <daichen@amazon.com> * feat(api): Add pre-compilation rule for late-binding function impl Add preCompilationRules() extension point to LanguageSpec that allows extensions to transform the logical plan before in-memory execution only. The plan remains clean for external consumers (Analytics Engine). CoreExtension registers FunctionImplBindingRule which fetches impl bindings from UnifiedFunctionSpec and rewrites custom function calls into executable Calcite expressions at compilation time. DATE_TRUNC now has an impl that rewrites to FLOOR(ts TO unit), making it executable in-memory while preserving DATE_TRUNC in the logical plan for the Analytics Engine path. Signed-off-by: Chen Dai <daichen@amazon.com> --------- Signed-off-by: Chen Dai <daichen@amazon.com>
1 parent 3ed472e commit 0ff1eec

8 files changed

Lines changed: 412 additions & 83 deletions

File tree

api/src/main/java/org/opensearch/sql/api/compiler/UnifiedQueryCompiler.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,11 @@ public PreparedStatement compile(@NonNull RelNode plan) {
5555
}
5656

5757
private PreparedStatement doCompile(RelNode plan) throws Exception {
58+
// Apply pre-compilation rules (e.g., late-binding function impl)
59+
for (var rule : context.getLangSpec().preCompilationRules()) {
60+
plan = plan.accept(rule);
61+
}
62+
5863
// Apply shuttle to convert LogicalTableScan to BindableTableScan
5964
final RelHomogeneousShuttle shuttle =
6065
new RelHomogeneousShuttle() {
Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.api.spec;
7+
8+
import java.util.List;
9+
import java.util.Objects;
10+
import java.util.function.BiFunction;
11+
import javax.annotation.Nullable;
12+
import lombok.RequiredArgsConstructor;
13+
import org.apache.calcite.rel.type.RelDataType;
14+
import org.apache.calcite.rel.type.RelDataTypeFactory;
15+
import org.apache.calcite.rex.RexBuilder;
16+
import org.apache.calcite.rex.RexCall;
17+
import org.apache.calcite.rex.RexNode;
18+
import org.apache.calcite.sql.SqlCallBinding;
19+
import org.apache.calcite.sql.SqlFunction;
20+
import org.apache.calcite.sql.SqlFunctionCategory;
21+
import org.apache.calcite.sql.SqlIdentifier;
22+
import org.apache.calcite.sql.SqlKind;
23+
import org.apache.calcite.sql.SqlOperandCountRange;
24+
import org.apache.calcite.sql.SqlOperator;
25+
import org.apache.calcite.sql.parser.SqlParserPos;
26+
import org.apache.calcite.sql.type.InferTypes;
27+
import org.apache.calcite.sql.type.OperandTypes;
28+
import org.apache.calcite.sql.type.SqlOperandCountRanges;
29+
import org.apache.calcite.sql.type.SqlOperandMetadata;
30+
import org.apache.calcite.sql.type.SqlReturnTypeInference;
31+
import org.apache.calcite.sql.type.SqlTypeFamily;
32+
import org.apache.calcite.sql.validate.SqlUserDefinedFunction;
33+
34+
/** Fluent DSL for building {@link UnifiedFunctionSpec} instances. */
35+
@RequiredArgsConstructor
36+
class FunctionSpecBuilder {
37+
/** Function name to register. */
38+
private final String name;
39+
40+
/**
41+
* Wraps an existing Calcite operator, preserving its native type system and RexImpTable
42+
* implementation for in-memory execution.
43+
*
44+
* @param op the Calcite operator to delegate to
45+
* @return a builder that produces the spec on {@code build()}
46+
*/
47+
DelegateFunctionBuilder delegateTo(SqlOperator op) {
48+
return new DelegateFunctionBuilder(name, op);
49+
}
50+
51+
/**
52+
* Builds a pushdown-only UDF with relaxed type checking. The resulting function has no local
53+
* implementation and delegates execution to the data source via pushdown.
54+
*
55+
* @param paramNames required parameter names for signature display
56+
* @return a builder that produces the spec on {@code build()}
57+
*/
58+
CatalogFunctionBuilder vararg(String... paramNames) {
59+
return new CatalogFunctionBuilder(name, List.of(paramNames));
60+
}
61+
62+
/**
63+
* Builds a typed SqlFunction with strict operand type checking. Optionally accepts a late-binding
64+
* {@code impl} that rewrites the function into executable Calcite expressions at compilation
65+
* time.
66+
*
67+
* @param families operand type families for validation
68+
* @return a builder that produces the spec on {@code build()}
69+
*/
70+
DefaultFunctionBuilder operands(SqlTypeFamily... families) {
71+
return new DefaultFunctionBuilder(name, families);
72+
}
73+
74+
@RequiredArgsConstructor
75+
static class DefaultFunctionBuilder {
76+
private final String name;
77+
private final SqlTypeFamily[] operandFamilies;
78+
private SqlReturnTypeInference returnType;
79+
private SqlFunctionCategory category = SqlFunctionCategory.USER_DEFINED_FUNCTION;
80+
private @Nullable BiFunction<RexBuilder, RexCall, RexNode> impl;
81+
82+
DefaultFunctionBuilder returns(SqlReturnTypeInference type) {
83+
this.returnType = type;
84+
return this;
85+
}
86+
87+
DefaultFunctionBuilder category(SqlFunctionCategory cat) {
88+
this.category = cat;
89+
return this;
90+
}
91+
92+
/**
93+
* Defines how this function executes by rewriting to existing Calcite operators. Applied only
94+
* at compilation time (late binding) — the logical plan preserves the original function call.
95+
*
96+
* @param impl rewrite function that converts this call into executable RexNodes
97+
* @return this builder
98+
*/
99+
DefaultFunctionBuilder impl(BiFunction<RexBuilder, RexCall, RexNode> impl) {
100+
this.impl = impl;
101+
return this;
102+
}
103+
104+
UnifiedFunctionSpec build() {
105+
Objects.requireNonNull(returnType, "returns() is required");
106+
SqlFunction op =
107+
new SqlFunction(
108+
name.toUpperCase(),
109+
SqlKind.OTHER_FUNCTION,
110+
returnType,
111+
null,
112+
OperandTypes.family(operandFamilies),
113+
category);
114+
return new UnifiedFunctionSpec(name.toLowerCase(), op, impl);
115+
}
116+
}
117+
118+
@RequiredArgsConstructor
119+
static class DelegateFunctionBuilder {
120+
private final String name;
121+
private final SqlOperator operator;
122+
123+
UnifiedFunctionSpec build() {
124+
return new UnifiedFunctionSpec(name.toLowerCase(), operator, null);
125+
}
126+
}
127+
128+
@RequiredArgsConstructor
129+
static class CatalogFunctionBuilder {
130+
private final String name;
131+
private final List<String> paramNames;
132+
private SqlReturnTypeInference returnType;
133+
134+
CatalogFunctionBuilder returnType(SqlReturnTypeInference type) {
135+
this.returnType = type;
136+
return this;
137+
}
138+
139+
UnifiedFunctionSpec build() {
140+
Objects.requireNonNull(returnType, "returnType is required");
141+
return new UnifiedFunctionSpec(
142+
name,
143+
new SqlUserDefinedFunction(
144+
new SqlIdentifier(name, SqlParserPos.ZERO),
145+
SqlKind.OTHER_FUNCTION,
146+
returnType,
147+
InferTypes.ANY_NULLABLE,
148+
new VariadicOperandMetadata(paramNames),
149+
List::of), // Pushdown-only: no local implementation
150+
null);
151+
}
152+
}
153+
154+
/**
155+
* Custom operand metadata that bypasses Calcite's built-in type checking. Calcite's {@code
156+
* FamilyOperandTypeChecker} rejects variadic calls (CALCITE-5366), so this implementation accepts
157+
* any operand types and delegates validation to pushdown.
158+
*/
159+
record VariadicOperandMetadata(List<String> paramNames) implements SqlOperandMetadata {
160+
161+
@Override
162+
public List<String> paramNames() {
163+
return paramNames;
164+
}
165+
166+
@Override
167+
public List<RelDataType> paramTypes(RelDataTypeFactory tf) {
168+
return List.of();
169+
}
170+
171+
@Override
172+
public boolean checkOperandTypes(SqlCallBinding binding, boolean throwOnFailure) {
173+
return true;
174+
}
175+
176+
@Override
177+
public SqlOperandCountRange getOperandCountRange() {
178+
return SqlOperandCountRanges.from(paramNames.size());
179+
}
180+
181+
@Override
182+
public String getAllowedSignatures(SqlOperator op, String opName) {
183+
return opName + "(" + String.join(", ", paramNames) + "[, option=value ...])";
184+
}
185+
}
186+
}

api/src/main/java/org/opensearch/sql/api/spec/LanguageSpec.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,15 @@ default List<SqlVisitor<SqlNode>> postParseRules() {
5757
default List<RelShuttle> postAnalysisRules() {
5858
return List.of();
5959
}
60+
61+
/**
62+
* Pre-compilation rules applied only before in-memory execution. Each rule transforms the
63+
* logical plan (e.g., binding function implementations). Not applied when the plan is consumed
64+
* by external engines.
65+
*/
66+
default List<RelShuttle> preCompilationRules() {
67+
return List.of();
68+
}
6069
}
6170

6271
/**
@@ -104,4 +113,12 @@ default List<SqlVisitor<SqlNode>> postParseRules() {
104113
default List<RelShuttle> postAnalysisRules() {
105114
return extensions().stream().flatMap(ext -> ext.postAnalysisRules().stream()).toList();
106115
}
116+
117+
/**
118+
* All pre-compilation rules from registered extensions, flattened in registration order. Applied
119+
* only before in-memory execution.
120+
*/
121+
default List<RelShuttle> preCompilationRules() {
122+
return extensions().stream().flatMap(ext -> ext.preCompilationRules().stream()).toList();
123+
}
107124
}

0 commit comments

Comments
 (0)