Skip to content

Commit c578e47

Browse files
committed
feat: V2 named-argument syntax with NamedArgRewriter
Add NamedArgRewriter SqlShuttle that normalizes V2/PPL relevance syntax into MAP-based form before Calcite validation. Transforms positional and key=value arguments into MAP[paramName, value] pairs matching PPL's internal representation for uniform pushdown rules. Refactor UnifiedFunctionSpec to instance-based design with fluent builder and Category record for grouping. Use SqlUserDefinedFunction for consistency with PPL path. Add error tests and QueryErrorAssert to test base. Signed-off-by: Chen Dai <daichen@amazon.com>
1 parent 3ccc97a commit c578e47

7 files changed

Lines changed: 311 additions & 171 deletions

File tree

api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import org.apache.calcite.schema.SchemaPlus;
2626
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
2727
import org.apache.calcite.sql.parser.SqlParser;
28+
import org.apache.calcite.sql.util.SqlOperatorTables;
2829
import org.apache.calcite.tools.FrameworkConfig;
2930
import org.apache.calcite.tools.Frameworks;
3031
import org.apache.calcite.tools.Programs;
@@ -241,12 +242,13 @@ public List<?> getSettings() {
241242
private FrameworkConfig buildFrameworkConfig() {
242243
SchemaPlus rootSchema = CalciteSchema.createRootSchema(true, cacheMetadata).plus();
243244
catalogs.forEach(rootSchema::add);
244-
UnifiedFunctionSpec.registerAll(rootSchema);
245245

246246
SchemaPlus defaultSchema = findSchemaByPath(rootSchema, defaultNamespace);
247247
return Frameworks.newConfigBuilder()
248248
.parserConfig(buildParserConfig())
249-
.operatorTable(SqlStdOperatorTable.instance())
249+
.operatorTable(
250+
SqlOperatorTables.chain(
251+
SqlStdOperatorTable.instance(), UnifiedFunctionSpec.RELEVANCE.operatorTable()))
250252
.defaultSchema(defaultSchema)
251253
.traitDefs((List<RelTraitDef>) null)
252254
.programs(Programs.calc(DefaultRelMetadataProvider.INSTANCE))

api/src/main/java/org/opensearch/sql/api/UnifiedQueryPlanner.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import org.apache.calcite.sql.SqlNode;
1818
import org.apache.calcite.tools.Frameworks;
1919
import org.apache.calcite.tools.Planner;
20+
import org.opensearch.sql.api.parser.NamedArgRewriter;
2021
import org.opensearch.sql.api.parser.UnifiedQueryParser;
2122
import org.opensearch.sql.ast.tree.UnresolvedPlan;
2223
import org.opensearch.sql.calcite.CalciteRelNodeVisitor;
@@ -81,7 +82,8 @@ private static class CalciteNativeStrategy implements PlanningStrategy {
8182
public RelNode plan(String query) throws Exception {
8283
try (Planner planner = Frameworks.getPlanner(context.getPlanContext().config)) {
8384
SqlNode parsed = planner.parse(query);
84-
SqlNode validated = planner.validate(parsed);
85+
SqlNode rewritten = parsed.accept(NamedArgRewriter.INSTANCE);
86+
SqlNode validated = planner.validate(rewritten);
8587
RelRoot relRoot = planner.rel(validated);
8688
return relRoot.project();
8789
}
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.api.parser;
7+
8+
import java.util.List;
9+
import lombok.AccessLevel;
10+
import lombok.NoArgsConstructor;
11+
import org.apache.calcite.sql.SqlBasicCall;
12+
import org.apache.calcite.sql.SqlCall;
13+
import org.apache.calcite.sql.SqlKind;
14+
import org.apache.calcite.sql.SqlLiteral;
15+
import org.apache.calcite.sql.SqlNode;
16+
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
17+
import org.apache.calcite.sql.parser.SqlParserPos;
18+
import org.apache.calcite.sql.util.SqlShuttle;
19+
import org.checkerframework.checker.nullness.qual.Nullable;
20+
import org.opensearch.sql.api.spec.UnifiedFunctionSpec;
21+
22+
/**
23+
* Pre-validation rewriter that normalizes V2/PPL relevance function syntax into MAP-based form
24+
* matching PPL's internal representation. This ensures both SQL and PPL paths produce identical
25+
* query plans for pushdown rules.
26+
*
27+
* <p>Transforms: {@code match(name, 'John', operator='AND', boost=2.0)} into: {@code
28+
* match(MAP['field', name], MAP['query', 'John'], MAP['operator', 'AND'], MAP['boost', 2.0])}
29+
*
30+
* <p>Param names are resolved via {@link UnifiedFunctionSpec#getParamNames} from the operator's
31+
* {@link org.apache.calcite.sql.type.SqlOperandMetadata}. This runs before Calcite validation, when
32+
* operators are still unresolved.
33+
*/
34+
@NoArgsConstructor(access = AccessLevel.PRIVATE)
35+
public final class NamedArgRewriter extends SqlShuttle {
36+
37+
public static final NamedArgRewriter INSTANCE = new NamedArgRewriter();
38+
39+
@Override
40+
public @Nullable SqlNode visit(SqlCall call) {
41+
SqlCall visited = (SqlCall) super.visit(call);
42+
UnifiedFunctionSpec spec = UnifiedFunctionSpec.of(visited.getOperator().getName());
43+
if (spec == null) {
44+
return visited;
45+
}
46+
return rewriteToMaps(visited, spec.getParamNames());
47+
}
48+
49+
/**
50+
* Wraps positional args in MAP[paramName, value] and flattens {@code key=value} comparisons into
51+
* MAP[key, value]. Produces the same MAP-based form as PPL's internal representation.
52+
*/
53+
private static SqlCall rewriteToMaps(SqlCall call, List<String> paramNames) {
54+
List<SqlNode> operands = call.getOperandList();
55+
SqlNode[] result = new SqlNode[operands.size()];
56+
for (int i = 0; i < operands.size(); i++) {
57+
result[i] = rewriteOperand(operands.get(i), i, paramNames);
58+
}
59+
return new SqlBasicCall(call.getOperator(), result, call.getParserPosition());
60+
}
61+
62+
private static SqlNode rewriteOperand(SqlNode operand, int index, List<String> paramNames) {
63+
if (operand.getKind() == SqlKind.EQUALS) {
64+
// Optional: operator='AND' → MAP('operator', 'AND')
65+
SqlCall eq = (SqlCall) operand;
66+
return toMap(eq.operand(0).toString(), eq.operand(1));
67+
}
68+
// Required positional: name → MAP('field', name)
69+
return toMap(paramNames.get(index), operand);
70+
}
71+
72+
private static SqlNode toMap(String key, SqlNode value) {
73+
return new SqlBasicCall(
74+
SqlStdOperatorTable.MAP_VALUE_CONSTRUCTOR,
75+
new SqlNode[] {SqlLiteral.createCharString(key, SqlParserPos.ZERO), value},
76+
SqlParserPos.ZERO);
77+
}
78+
}

api/src/main/java/org/opensearch/sql/api/spec/UnifiedFunctionSpec.java

Lines changed: 120 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -5,129 +5,159 @@
55

66
package org.opensearch.sql.api.spec;
77

8+
import static org.apache.calcite.sql.type.ReturnTypes.BOOLEAN;
9+
810
import java.util.List;
911
import java.util.Map;
10-
import java.util.Set;
11-
import java.util.stream.IntStream;
12+
import java.util.Objects;
13+
import java.util.stream.Collectors;
14+
import java.util.stream.Stream;
15+
import lombok.AccessLevel;
16+
import lombok.Getter;
17+
import lombok.RequiredArgsConstructor;
18+
import lombok.ToString;
1219
import org.apache.calcite.rel.type.RelDataType;
1320
import org.apache.calcite.rel.type.RelDataTypeFactory;
14-
import org.apache.calcite.schema.FunctionParameter;
15-
import org.apache.calcite.schema.ScalarFunction;
16-
import org.apache.calcite.schema.SchemaPlus;
17-
import org.apache.calcite.sql.type.SqlTypeName;
18-
import org.checkerframework.checker.nullness.qual.Nullable;
21+
import org.apache.calcite.sql.SqlCallBinding;
22+
import org.apache.calcite.sql.SqlIdentifier;
23+
import org.apache.calcite.sql.SqlKind;
24+
import org.apache.calcite.sql.SqlOperandCountRange;
25+
import org.apache.calcite.sql.SqlOperator;
26+
import org.apache.calcite.sql.SqlOperatorTable;
27+
import org.apache.calcite.sql.parser.SqlParserPos;
28+
import org.apache.calcite.sql.type.InferTypes;
29+
import org.apache.calcite.sql.type.SqlOperandCountRanges;
30+
import org.apache.calcite.sql.type.SqlOperandMetadata;
31+
import org.apache.calcite.sql.type.SqlReturnTypeInference;
32+
import org.apache.calcite.sql.util.SqlOperatorTables;
33+
import org.apache.calcite.sql.validate.SqlUserDefinedFunction;
1934

2035
/**
21-
* Central registry of language-specified function signatures (Unified Language Specification
22-
* layer). Each entry maps a function name to a canonical {@link ScalarFunction} with named required
23-
* parameters of type {@link SqlTypeName#ANY}.
24-
*
25-
* <p>This class defines <em>what functions exist</em> and their signatures. Function
26-
* <em>implementations</em> live in the Unified Execution Runtime (UER) layer — see {@link
27-
* org.opensearch.sql.api.function.UnifiedFunction} and {@link
28-
* org.opensearch.sql.api.function.UnifiedFunctionRepository}. For data-source-specific functions
29-
* (e.g., relevance search), execution is handled by adapter pushdown rules rather than UER.
30-
*
31-
* <p>Named parameters enable SQL named-argument syntax ({@code match(field => col, query =>
32-
* 'text')}) via Calcite's {@code ARGUMENT_ASSIGNMENT} operator. With fixed required parameters (no
33-
* optional params), <a href="https://issues.apache.org/jira/browse/CALCITE-5366">CALCITE-5366</a>
34-
* is avoided entirely.
35-
*
36-
* <p>Functions are registered globally on the root schema via {@link #registerAll(SchemaPlus)},
37-
* following the same pattern as Flink's {@code FlinkSqlOperatorTable} — engine-level primitives
38-
* available regardless of catalog. Pushdown rules enforce data-source capability at optimization
39-
* time.
40-
*
41-
* @see org.opensearch.sql.api.function.UnifiedFunction
42-
* @see org.opensearch.sql.api.function.UnifiedFunctionRepository
36+
* Declarative registry of language-level functions for the unified query engine. Functions defined
37+
* here are part of the language spec — always resolvable regardless of the underlying data source.
38+
* They are grouped into {@link Category categories} that callers chain into Calcite's operator
39+
* table. Data-source capability is enforced at optimization time by pushdown rules.
4340
*/
44-
// TODO: UnifiedFunctionRepository should resolve implementations for functions defined here,
45-
// rather than independently discovering from PPLBuiltinOperators. The spec is the source of
46-
// truth for what functions exist; UER provides how they execute. Decide whether to late-bind
47-
// UER implementations (ImplementableFunction) to spec-defined signatures for engine-independent
48-
// functions (e.g., upper, lower). Currently only data-source-specific functions (pushdown-only)
49-
// are registered here.
41+
@Getter
42+
@ToString
43+
@RequiredArgsConstructor(access = AccessLevel.PRIVATE)
5044
public final class UnifiedFunctionSpec {
5145

52-
private UnifiedFunctionSpec() {}
53-
54-
/** Single-field relevance function params: (field, query). */
55-
private static final List<String> SINGLE_FIELD_PARAMS = List.of("field", "query");
56-
57-
/** Multi-field relevance function params: (fields, query). */
58-
private static final List<String> MULTI_FIELD_PARAMS = List.of("fields", "query");
59-
60-
private static final Map<String, ScalarFunction> REGISTRY =
61-
Map.of(
62-
"match", scalarFunction(SINGLE_FIELD_PARAMS),
63-
"match_phrase", scalarFunction(SINGLE_FIELD_PARAMS),
64-
"match_bool_prefix", scalarFunction(SINGLE_FIELD_PARAMS),
65-
"match_phrase_prefix", scalarFunction(SINGLE_FIELD_PARAMS),
66-
"multi_match", scalarFunction(MULTI_FIELD_PARAMS),
67-
"simple_query_string", scalarFunction(MULTI_FIELD_PARAMS),
68-
"query_string", scalarFunction(MULTI_FIELD_PARAMS));
69-
70-
/** Registers all language-specified functions on the given schema (typically root). */
71-
public static void registerAll(SchemaPlus schema) {
72-
REGISTRY.forEach(schema::add);
46+
/** Function name as registered in the operator table (e.g., "match", "multi_match"). */
47+
private final String funcName;
48+
49+
/** Calcite operator for chaining into the framework config's operator table. */
50+
private final SqlOperator operator;
51+
52+
/** Full-text search functions. */
53+
public static final Category RELEVANCE =
54+
new Category(
55+
List.of(
56+
function("match").vararg("field", "query").returnType(BOOLEAN).build(),
57+
function("match_phrase").vararg("field", "query").returnType(BOOLEAN).build(),
58+
function("match_bool_prefix").vararg("field", "query").returnType(BOOLEAN).build(),
59+
function("match_phrase_prefix").vararg("field", "query").returnType(BOOLEAN).build(),
60+
function("multi_match").vararg("fields", "query").returnType(BOOLEAN).build(),
61+
function("simple_query_string").vararg("fields", "query").returnType(BOOLEAN).build(),
62+
function("query_string").vararg("fields", "query").returnType(BOOLEAN).build()));
63+
64+
/** All registered function specs, keyed by function name. */
65+
private static final Map<String, UnifiedFunctionSpec> ALL_SPECS =
66+
Stream.of(RELEVANCE)
67+
.flatMap(c -> c.specs().stream())
68+
.collect(Collectors.toMap(UnifiedFunctionSpec::getFuncName, s -> s));
69+
70+
/**
71+
* Looks up a function spec by name across all categories.
72+
*
73+
* @param name function name (case-insensitive)
74+
* @return the spec, or {@code null} if not found
75+
*/
76+
public static UnifiedFunctionSpec of(String name) {
77+
return ALL_SPECS.get(name.toLowerCase());
7378
}
7479

75-
/** Returns the canonical ScalarFunction for a language-specified function, or null. */
76-
public static @Nullable ScalarFunction get(String name) {
77-
return REGISTRY.get(name);
80+
/**
81+
* @return required param names from {@link SqlOperandMetadata}, or empty if not available.
82+
*/
83+
public List<String> getParamNames() {
84+
return operator.getOperandTypeChecker() instanceof SqlOperandMetadata metadata
85+
? metadata.paramNames()
86+
: List.of();
7887
}
7988

80-
/** Returns true if the name is a language-specified function. */
81-
public static boolean isLanguageFunction(String name) {
82-
return REGISTRY.containsKey(name);
89+
/** A group of function specs that can be chained into Calcite's operator table. */
90+
public record Category(List<UnifiedFunctionSpec> specs) {
91+
public SqlOperatorTable operatorTable() {
92+
return SqlOperatorTables.of(specs.stream().map(UnifiedFunctionSpec::getOperator).toList());
93+
}
8394
}
8495

85-
/** All registered language function names. */
86-
public static Set<String> names() {
87-
return REGISTRY.keySet();
96+
public static Builder function(String name) {
97+
return new Builder(name);
8898
}
8999

90-
private static ScalarFunction scalarFunction(List<String> paramNames) {
91-
List<FunctionParameter> params =
92-
IntStream.range(0, paramNames.size())
93-
.mapToObj(i -> (FunctionParameter) new AnyParam(i, paramNames.get(i)))
94-
.toList();
95-
return new BooleanScalarFunction(params);
96-
}
100+
/** Fluent builder for function specs. */
101+
@RequiredArgsConstructor(access = AccessLevel.PRIVATE)
102+
public static class Builder {
103+
private final String funcName;
104+
private List<String> paramNames = List.of();
105+
private SqlReturnTypeInference returnType;
97106

98-
/** A ScalarFunction that returns BOOLEAN with the given parameters. */
99-
private record BooleanScalarFunction(List<FunctionParameter> params) implements ScalarFunction {
100-
@Override
101-
public List<FunctionParameter> getParameters() {
102-
return params;
107+
public Builder vararg(String... names) {
108+
this.paramNames = List.of(names);
109+
return this;
103110
}
104111

105-
@Override
106-
public RelDataType getReturnType(RelDataTypeFactory typeFactory) {
107-
return typeFactory.createSqlType(SqlTypeName.BOOLEAN);
112+
public Builder returnType(SqlReturnTypeInference type) {
113+
this.returnType = type;
114+
return this;
115+
}
116+
117+
public UnifiedFunctionSpec build() {
118+
Objects.requireNonNull(returnType, "returnType is required");
119+
return new UnifiedFunctionSpec(
120+
funcName,
121+
new SqlUserDefinedFunction(
122+
new SqlIdentifier(funcName, SqlParserPos.ZERO),
123+
SqlKind.OTHER_FUNCTION,
124+
returnType,
125+
InferTypes.ANY_NULLABLE,
126+
new VariadicOperandMetadata(paramNames),
127+
List::of)); // Pushdown-only: no local implementation
108128
}
109129
}
110130

111-
/** A required function parameter of type ANY. */
112-
private record AnyParam(int ordinal, String name) implements FunctionParameter {
131+
/**
132+
* Custom operand metadata that bypasses Calcite's built-in type checking. Calcite's {@code
133+
* FamilyOperandTypeChecker} rejects variadic calls (CALCITE-5366), so this implementation accepts
134+
* any operand types and delegates validation to pushdown.
135+
*/
136+
private record VariadicOperandMetadata(List<String> paramNames) implements SqlOperandMetadata {
137+
138+
@Override
139+
public List<String> paramNames() {
140+
return paramNames;
141+
}
142+
113143
@Override
114-
public int getOrdinal() {
115-
return ordinal;
144+
public List<RelDataType> paramTypes(RelDataTypeFactory tf) {
145+
return List.of();
116146
}
117147

118148
@Override
119-
public String getName() {
120-
return name;
149+
public boolean checkOperandTypes(SqlCallBinding binding, boolean throwOnFailure) {
150+
return true; // Bypass: CALCITE-5366 breaks optional argument type checking
121151
}
122152

123153
@Override
124-
public boolean isOptional() {
125-
return false;
154+
public SqlOperandCountRange getOperandCountRange() {
155+
return SqlOperandCountRanges.from(paramNames.size());
126156
}
127157

128158
@Override
129-
public RelDataType getType(RelDataTypeFactory typeFactory) {
130-
return typeFactory.createSqlType(SqlTypeName.ANY);
159+
public String getAllowedSignatures(SqlOperator op, String opName) {
160+
return opName + "(" + String.join(", ", paramNames) + "[, option=value ...])";
131161
}
132162
}
133163
}

0 commit comments

Comments
 (0)