Skip to content

Commit ae36dc1

Browse files
committed
feat: V2 named-argument syntax with NamedArgRewriter
Add NamedArgRewriter SqlShuttle that normalizes V2/PPL relevance syntax into MAP-based form before Calcite validation. Transforms positional and key=value arguments into MAP[paramName, value] pairs matching PPL's internal representation for uniform pushdown rules. Refactor UnifiedFunctionSpec to instance-based design with fluent builder and Category record for grouping. Use SqlUserDefinedFunction for consistency with PPL path. Add error tests and QueryErrorAssert to test base. Signed-off-by: Chen Dai <daichen@amazon.com>
1 parent 3ccc97a commit ae36dc1

7 files changed

Lines changed: 311 additions & 164 deletions

File tree

api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import org.apache.calcite.schema.SchemaPlus;
2626
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
2727
import org.apache.calcite.sql.parser.SqlParser;
28+
import org.apache.calcite.sql.util.SqlOperatorTables;
2829
import org.apache.calcite.tools.FrameworkConfig;
2930
import org.apache.calcite.tools.Frameworks;
3031
import org.apache.calcite.tools.Programs;
@@ -241,12 +242,13 @@ public List<?> getSettings() {
241242
private FrameworkConfig buildFrameworkConfig() {
242243
SchemaPlus rootSchema = CalciteSchema.createRootSchema(true, cacheMetadata).plus();
243244
catalogs.forEach(rootSchema::add);
244-
UnifiedFunctionSpec.registerAll(rootSchema);
245245

246246
SchemaPlus defaultSchema = findSchemaByPath(rootSchema, defaultNamespace);
247247
return Frameworks.newConfigBuilder()
248248
.parserConfig(buildParserConfig())
249-
.operatorTable(SqlStdOperatorTable.instance())
249+
.operatorTable(
250+
SqlOperatorTables.chain(
251+
SqlStdOperatorTable.instance(), UnifiedFunctionSpec.RELEVANCE.operatorTable()))
250252
.defaultSchema(defaultSchema)
251253
.traitDefs((List<RelTraitDef>) null)
252254
.programs(Programs.calc(DefaultRelMetadataProvider.INSTANCE))

api/src/main/java/org/opensearch/sql/api/UnifiedQueryPlanner.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import org.apache.calcite.sql.SqlNode;
1818
import org.apache.calcite.tools.Frameworks;
1919
import org.apache.calcite.tools.Planner;
20+
import org.opensearch.sql.api.parser.NamedArgRewriter;
2021
import org.opensearch.sql.api.parser.UnifiedQueryParser;
2122
import org.opensearch.sql.ast.tree.UnresolvedPlan;
2223
import org.opensearch.sql.calcite.CalciteRelNodeVisitor;
@@ -81,7 +82,8 @@ private static class CalciteNativeStrategy implements PlanningStrategy {
8182
public RelNode plan(String query) throws Exception {
8283
try (Planner planner = Frameworks.getPlanner(context.getPlanContext().config)) {
8384
SqlNode parsed = planner.parse(query);
84-
SqlNode validated = planner.validate(parsed);
85+
SqlNode rewritten = parsed.accept(NamedArgRewriter.INSTANCE);
86+
SqlNode validated = planner.validate(rewritten);
8587
RelRoot relRoot = planner.rel(validated);
8688
return relRoot.project();
8789
}
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.api.parser;
7+
8+
import java.util.ArrayList;
9+
import java.util.List;
10+
import org.apache.calcite.sql.SqlBasicCall;
11+
import org.apache.calcite.sql.SqlCall;
12+
import org.apache.calcite.sql.SqlKind;
13+
import org.apache.calcite.sql.SqlLiteral;
14+
import org.apache.calcite.sql.SqlNode;
15+
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
16+
import org.apache.calcite.sql.parser.SqlParserPos;
17+
import org.apache.calcite.sql.util.SqlShuttle;
18+
import org.checkerframework.checker.nullness.qual.Nullable;
19+
import org.opensearch.sql.api.spec.UnifiedFunctionSpec;
20+
21+
/**
22+
* Pre-validation rewriter that normalizes V2/PPL relevance function syntax into MAP-based form
23+
* matching PPL's internal representation. This ensures both SQL and PPL paths produce identical
24+
* query plans for pushdown rules.
25+
*
26+
* <p>Transforms: {@code match(name, 'John', operator='AND', boost=2.0)} into: {@code
27+
* match(MAP['field', name], MAP['query', 'John'], MAP['operator', 'AND'], MAP['boost', 2.0])}
28+
*
29+
* <p>Param names are resolved via {@link UnifiedFunctionSpec#getParamNames} from the operator's
30+
* {@link org.apache.calcite.sql.type.SqlOperandMetadata}. This runs before Calcite validation, when
31+
* operators are still unresolved.
32+
*/
33+
public final class NamedArgRewriter extends SqlShuttle {
34+
35+
public static final NamedArgRewriter INSTANCE = new NamedArgRewriter();
36+
37+
private NamedArgRewriter() {}
38+
39+
@Override
40+
public @Nullable SqlNode visit(SqlCall call) {
41+
SqlCall visited = (SqlCall) super.visit(call);
42+
if (visited == null) {
43+
return null;
44+
}
45+
UnifiedFunctionSpec spec = UnifiedFunctionSpec.of(visited.getOperator().getName());
46+
if (spec == null) {
47+
return visited;
48+
}
49+
return rewriteToMaps(visited, spec.getParamNames());
50+
}
51+
52+
/**
53+
* Wraps positional args in MAP[paramName, value] and flattens {@code key=value} comparisons into
54+
* MAP[key, value]. Produces the same MAP-based form as PPL's internal representation.
55+
*/
56+
private static SqlNode rewriteToMaps(SqlCall call, List<String> paramNames) {
57+
List<SqlNode> operands = call.getOperandList();
58+
List<SqlNode> result = new ArrayList<>();
59+
60+
for (int i = 0; i < operands.size(); i++) {
61+
SqlNode operand = operands.get(i);
62+
if (operand != null && operand.getKind() == SqlKind.EQUALS) {
63+
// Optional param: operator='AND' → MAP['operator', 'AND']
64+
SqlCall eq = (SqlCall) operand;
65+
result.add(toMap(eq.operand(0).toString(), eq.operand(1)));
66+
} else if (i < paramNames.size()) {
67+
// Required positional param: name → MAP['field', name]
68+
result.add(toMap(paramNames.get(i), operand));
69+
} else {
70+
result.add(operand);
71+
}
72+
}
73+
return new SqlBasicCall(
74+
call.getOperator(), result.toArray(SqlNode[]::new), call.getParserPosition());
75+
}
76+
77+
private static SqlNode toMap(String key, SqlNode value) {
78+
return new SqlBasicCall(
79+
SqlStdOperatorTable.MAP_VALUE_CONSTRUCTOR,
80+
new SqlNode[] {SqlLiteral.createCharString(key, SqlParserPos.ZERO), value},
81+
SqlParserPos.ZERO);
82+
}
83+
}

api/src/main/java/org/opensearch/sql/api/spec/UnifiedFunctionSpec.java

Lines changed: 115 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -5,129 +5,161 @@
55

66
package org.opensearch.sql.api.spec;
77

8+
import static org.apache.calcite.sql.type.ReturnTypes.BOOLEAN;
9+
810
import java.util.List;
911
import java.util.Map;
10-
import java.util.Set;
11-
import java.util.stream.IntStream;
12+
import java.util.Objects;
13+
import java.util.stream.Collectors;
14+
import java.util.stream.Stream;
15+
import lombok.AccessLevel;
16+
import lombok.Getter;
17+
import lombok.RequiredArgsConstructor;
1218
import org.apache.calcite.rel.type.RelDataType;
1319
import org.apache.calcite.rel.type.RelDataTypeFactory;
1420
import org.apache.calcite.schema.FunctionParameter;
1521
import org.apache.calcite.schema.ScalarFunction;
16-
import org.apache.calcite.schema.SchemaPlus;
17-
import org.apache.calcite.sql.type.SqlTypeName;
18-
import org.checkerframework.checker.nullness.qual.Nullable;
22+
import org.apache.calcite.sql.SqlCallBinding;
23+
import org.apache.calcite.sql.SqlIdentifier;
24+
import org.apache.calcite.sql.SqlKind;
25+
import org.apache.calcite.sql.SqlOperandCountRange;
26+
import org.apache.calcite.sql.SqlOperator;
27+
import org.apache.calcite.sql.SqlOperatorTable;
28+
import org.apache.calcite.sql.parser.SqlParserPos;
29+
import org.apache.calcite.sql.type.InferTypes;
30+
import org.apache.calcite.sql.type.SqlOperandCountRanges;
31+
import org.apache.calcite.sql.type.SqlOperandMetadata;
32+
import org.apache.calcite.sql.type.SqlReturnTypeInference;
33+
import org.apache.calcite.sql.util.SqlOperatorTables;
34+
import org.apache.calcite.sql.validate.SqlUserDefinedFunction;
1935

2036
/**
21-
* Central registry of language-specified function signatures (Unified Language Specification
22-
* layer). Each entry maps a function name to a canonical {@link ScalarFunction} with named required
23-
* parameters of type {@link SqlTypeName#ANY}.
24-
*
25-
* <p>This class defines <em>what functions exist</em> and their signatures. Function
26-
* <em>implementations</em> live in the Unified Execution Runtime (UER) layer — see {@link
27-
* org.opensearch.sql.api.function.UnifiedFunction} and {@link
28-
* org.opensearch.sql.api.function.UnifiedFunctionRepository}. For data-source-specific functions
29-
* (e.g., relevance search), execution is handled by adapter pushdown rules rather than UER.
30-
*
31-
* <p>Named parameters enable SQL named-argument syntax ({@code match(field => col, query =>
32-
* 'text')}) via Calcite's {@code ARGUMENT_ASSIGNMENT} operator. With fixed required parameters (no
33-
* optional params), <a href="https://issues.apache.org/jira/browse/CALCITE-5366">CALCITE-5366</a>
34-
* is avoided entirely.
35-
*
36-
* <p>Functions are registered globally on the root schema via {@link #registerAll(SchemaPlus)},
37-
* following the same pattern as Flink's {@code FlinkSqlOperatorTable} — engine-level primitives
38-
* available regardless of catalog. Pushdown rules enforce data-source capability at optimization
39-
* time.
40-
*
41-
* @see org.opensearch.sql.api.function.UnifiedFunction
42-
* @see org.opensearch.sql.api.function.UnifiedFunctionRepository
37+
* Unified function specification. Each spec carries its name, param names, and Calcite {@link
38+
* SqlOperator}, built via a fluent builder.
4339
*/
44-
// TODO: UnifiedFunctionRepository should resolve implementations for functions defined here,
45-
// rather than independently discovering from PPLBuiltinOperators. The spec is the source of
46-
// truth for what functions exist; UER provides how they execute. Decide whether to late-bind
47-
// UER implementations (ImplementableFunction) to spec-defined signatures for engine-independent
48-
// functions (e.g., upper, lower). Currently only data-source-specific functions (pushdown-only)
49-
// are registered here.
40+
@Getter
41+
@RequiredArgsConstructor(access = AccessLevel.PRIVATE)
5042
public final class UnifiedFunctionSpec {
5143

52-
private UnifiedFunctionSpec() {}
53-
54-
/** Single-field relevance function params: (field, query). */
55-
private static final List<String> SINGLE_FIELD_PARAMS = List.of("field", "query");
44+
/** Function name as registered in the operator table (e.g., "match", "multi_match"). */
45+
private final String funcName;
5646

57-
/** Multi-field relevance function params: (fields, query). */
58-
private static final List<String> MULTI_FIELD_PARAMS = List.of("fields", "query");
47+
/**
48+
* Required param names used by {@link org.opensearch.sql.api.parser.NamedArgRewriter} to
49+
* normalize V2 syntax into MAP form.
50+
*/
51+
private final List<String> paramNames;
5952

60-
private static final Map<String, ScalarFunction> REGISTRY =
61-
Map.of(
62-
"match", scalarFunction(SINGLE_FIELD_PARAMS),
63-
"match_phrase", scalarFunction(SINGLE_FIELD_PARAMS),
64-
"match_bool_prefix", scalarFunction(SINGLE_FIELD_PARAMS),
65-
"match_phrase_prefix", scalarFunction(SINGLE_FIELD_PARAMS),
66-
"multi_match", scalarFunction(MULTI_FIELD_PARAMS),
67-
"simple_query_string", scalarFunction(MULTI_FIELD_PARAMS),
68-
"query_string", scalarFunction(MULTI_FIELD_PARAMS));
53+
/** Calcite operator for chaining into the framework config's operator table. */
54+
private final SqlOperator operator;
6955

70-
/** Registers all language-specified functions on the given schema (typically root). */
71-
public static void registerAll(SchemaPlus schema) {
72-
REGISTRY.forEach(schema::add);
56+
/** A group of function specs that can be chained into Calcite's operator table. */
57+
public record Category(List<UnifiedFunctionSpec> specs) {
58+
public SqlOperatorTable operatorTable() {
59+
return SqlOperatorTables.of(specs.stream().map(UnifiedFunctionSpec::getOperator).toList());
60+
}
7361
}
7462

75-
/** Returns the canonical ScalarFunction for a language-specified function, or null. */
76-
public static @Nullable ScalarFunction get(String name) {
77-
return REGISTRY.get(name);
63+
/** Full-text search functions. */
64+
public static final Category RELEVANCE =
65+
new Category(
66+
List.of(
67+
function("match").vararg("field", "query").returnType(BOOLEAN).build(),
68+
function("match_phrase").vararg("field", "query").returnType(BOOLEAN).build(),
69+
function("match_bool_prefix").vararg("field", "query").returnType(BOOLEAN).build(),
70+
function("match_phrase_prefix").vararg("field", "query").returnType(BOOLEAN).build(),
71+
function("multi_match").vararg("fields", "query").returnType(BOOLEAN).build(),
72+
function("simple_query_string").vararg("fields", "query").returnType(BOOLEAN).build(),
73+
function("query_string").vararg("fields", "query").returnType(BOOLEAN).build()));
74+
75+
/** All registered function specs, keyed by function name. */
76+
private static final Map<String, UnifiedFunctionSpec> ALL_SPECS =
77+
Stream.of(RELEVANCE)
78+
.flatMap(c -> c.specs().stream())
79+
.collect(Collectors.toMap(UnifiedFunctionSpec::getFuncName, s -> s));
80+
81+
/** Looks up a function spec by name across all categories. */
82+
public static UnifiedFunctionSpec of(String name) {
83+
return ALL_SPECS.get(name.toLowerCase());
7884
}
7985

80-
/** Returns true if the name is a language-specified function. */
81-
public static boolean isLanguageFunction(String name) {
82-
return REGISTRY.containsKey(name);
86+
public static Builder function(String name) {
87+
return new Builder(name);
8388
}
8489

85-
/** All registered language function names. */
86-
public static Set<String> names() {
87-
return REGISTRY.keySet();
88-
}
90+
/** Fluent builder for function specs. */
91+
@RequiredArgsConstructor(access = AccessLevel.PRIVATE)
92+
public static class Builder {
93+
private final String funcName;
94+
private List<String> paramNames = List.of();
95+
private SqlReturnTypeInference returnType;
8996

90-
private static ScalarFunction scalarFunction(List<String> paramNames) {
91-
List<FunctionParameter> params =
92-
IntStream.range(0, paramNames.size())
93-
.mapToObj(i -> (FunctionParameter) new AnyParam(i, paramNames.get(i)))
94-
.toList();
95-
return new BooleanScalarFunction(params);
97+
public Builder vararg(String... names) {
98+
this.paramNames = List.of(names);
99+
return this;
100+
}
101+
102+
public Builder returnType(SqlReturnTypeInference type) {
103+
this.returnType = type;
104+
return this;
105+
}
106+
107+
public UnifiedFunctionSpec build() {
108+
Objects.requireNonNull(returnType);
109+
return new UnifiedFunctionSpec(
110+
funcName,
111+
paramNames,
112+
new SqlUserDefinedFunction(
113+
new SqlIdentifier(funcName, SqlParserPos.ZERO),
114+
SqlKind.OTHER_FUNCTION,
115+
returnType,
116+
InferTypes.ANY_NULLABLE,
117+
new VariadicOperandMetadata(paramNames),
118+
PushdownScalarFunction.INSTANCE));
119+
}
96120
}
97121

98-
/** A ScalarFunction that returns BOOLEAN with the given parameters. */
99-
private record BooleanScalarFunction(List<FunctionParameter> params) implements ScalarFunction {
122+
/** Placeholder for pushdown-only functions — no local execution. */
123+
private enum PushdownScalarFunction implements ScalarFunction {
124+
INSTANCE;
125+
100126
@Override
101-
public List<FunctionParameter> getParameters() {
102-
return params;
127+
public RelDataType getReturnType(RelDataTypeFactory typeFactory) {
128+
return typeFactory.createSqlType(org.apache.calcite.sql.type.SqlTypeName.ANY);
103129
}
104130

105131
@Override
106-
public RelDataType getReturnType(RelDataTypeFactory typeFactory) {
107-
return typeFactory.createSqlType(SqlTypeName.BOOLEAN);
132+
public List<FunctionParameter> getParameters() {
133+
return List.of();
108134
}
109135
}
110136

111-
/** A required function parameter of type ANY. */
112-
private record AnyParam(int ordinal, String name) implements FunctionParameter {
137+
/** Accepts required params + optional trailing params. Carries param names for rewriting. */
138+
private record VariadicOperandMetadata(List<String> paramNames) implements SqlOperandMetadata {
139+
140+
@Override
141+
public List<String> paramNames() {
142+
return paramNames;
143+
}
144+
113145
@Override
114-
public int getOrdinal() {
115-
return ordinal;
146+
public List<RelDataType> paramTypes(RelDataTypeFactory tf) {
147+
return List.of();
116148
}
117149

118150
@Override
119-
public String getName() {
120-
return name;
151+
public boolean checkOperandTypes(SqlCallBinding binding, boolean throwOnFailure) {
152+
return true; // NamedArgRewriter normalizes before validation
121153
}
122154

123155
@Override
124-
public boolean isOptional() {
125-
return false;
156+
public SqlOperandCountRange getOperandCountRange() {
157+
return SqlOperandCountRanges.from(paramNames.size());
126158
}
127159

128160
@Override
129-
public RelDataType getType(RelDataTypeFactory typeFactory) {
130-
return typeFactory.createSqlType(SqlTypeName.ANY);
161+
public String getAllowedSignatures(SqlOperator op, String opName) {
162+
return opName + "(" + String.join(", ", paramNames) + "[, option=value ...])";
131163
}
132164
}
133165
}

0 commit comments

Comments
 (0)