|
5 | 5 |
|
6 | 6 | package org.opensearch.sql.api.spec; |
7 | 7 |
|
| 8 | +import static org.apache.calcite.sql.type.ReturnTypes.BOOLEAN; |
| 9 | + |
8 | 10 | import java.util.List; |
9 | 11 | import java.util.Map; |
10 | | -import java.util.Set; |
11 | | -import java.util.stream.IntStream; |
| 12 | +import java.util.Objects; |
| 13 | +import java.util.stream.Collectors; |
| 14 | +import java.util.stream.Stream; |
| 15 | +import lombok.AccessLevel; |
| 16 | +import lombok.Getter; |
| 17 | +import lombok.RequiredArgsConstructor; |
12 | 18 | import org.apache.calcite.rel.type.RelDataType; |
13 | 19 | import org.apache.calcite.rel.type.RelDataTypeFactory; |
14 | 20 | import org.apache.calcite.schema.FunctionParameter; |
15 | 21 | import org.apache.calcite.schema.ScalarFunction; |
16 | | -import org.apache.calcite.schema.SchemaPlus; |
17 | | -import org.apache.calcite.sql.type.SqlTypeName; |
18 | | -import org.checkerframework.checker.nullness.qual.Nullable; |
| 22 | +import org.apache.calcite.sql.SqlCallBinding; |
| 23 | +import org.apache.calcite.sql.SqlIdentifier; |
| 24 | +import org.apache.calcite.sql.SqlKind; |
| 25 | +import org.apache.calcite.sql.SqlOperandCountRange; |
| 26 | +import org.apache.calcite.sql.SqlOperator; |
| 27 | +import org.apache.calcite.sql.SqlOperatorTable; |
| 28 | +import org.apache.calcite.sql.parser.SqlParserPos; |
| 29 | +import org.apache.calcite.sql.type.InferTypes; |
| 30 | +import org.apache.calcite.sql.type.SqlOperandCountRanges; |
| 31 | +import org.apache.calcite.sql.type.SqlOperandMetadata; |
| 32 | +import org.apache.calcite.sql.type.SqlReturnTypeInference; |
| 33 | +import org.apache.calcite.sql.util.SqlOperatorTables; |
| 34 | +import org.apache.calcite.sql.validate.SqlUserDefinedFunction; |
19 | 35 |
|
20 | 36 | /** |
21 | | - * Central registry of language-specified function signatures (Unified Language Specification |
22 | | - * layer). Each entry maps a function name to a canonical {@link ScalarFunction} with named required |
23 | | - * parameters of type {@link SqlTypeName#ANY}. |
24 | | - * |
25 | | - * <p>This class defines <em>what functions exist</em> and their signatures. Function |
26 | | - * <em>implementations</em> live in the Unified Execution Runtime (UER) layer — see {@link |
27 | | - * org.opensearch.sql.api.function.UnifiedFunction} and {@link |
28 | | - * org.opensearch.sql.api.function.UnifiedFunctionRepository}. For data-source-specific functions |
29 | | - * (e.g., relevance search), execution is handled by adapter pushdown rules rather than UER. |
30 | | - * |
31 | | - * <p>Named parameters enable SQL named-argument syntax ({@code match(field => col, query => |
32 | | - * 'text')}) via Calcite's {@code ARGUMENT_ASSIGNMENT} operator. With fixed required parameters (no |
33 | | - * optional params), <a href="https://issues.apache.org/jira/browse/CALCITE-5366">CALCITE-5366</a> |
34 | | - * is avoided entirely. |
35 | | - * |
36 | | - * <p>Functions are registered globally on the root schema via {@link #registerAll(SchemaPlus)}, |
37 | | - * following the same pattern as Flink's {@code FlinkSqlOperatorTable} — engine-level primitives |
38 | | - * available regardless of catalog. Pushdown rules enforce data-source capability at optimization |
39 | | - * time. |
40 | | - * |
41 | | - * @see org.opensearch.sql.api.function.UnifiedFunction |
42 | | - * @see org.opensearch.sql.api.function.UnifiedFunctionRepository |
| 37 | + * Unified function specification. Each spec carries its name, param names, and Calcite {@link |
| 38 | + * SqlOperator}, built via a fluent builder. |
43 | 39 | */ |
44 | | -// TODO: UnifiedFunctionRepository should resolve implementations for functions defined here, |
45 | | -// rather than independently discovering from PPLBuiltinOperators. The spec is the source of |
46 | | -// truth for what functions exist; UER provides how they execute. Decide whether to late-bind |
47 | | -// UER implementations (ImplementableFunction) to spec-defined signatures for engine-independent |
48 | | -// functions (e.g., upper, lower). Currently only data-source-specific functions (pushdown-only) |
49 | | -// are registered here. |
| 40 | +@Getter |
| 41 | +@RequiredArgsConstructor(access = AccessLevel.PRIVATE) |
50 | 42 | public final class UnifiedFunctionSpec { |
51 | 43 |
|
52 | | - private UnifiedFunctionSpec() {} |
53 | | - |
54 | | - /** Single-field relevance function params: (field, query). */ |
55 | | - private static final List<String> SINGLE_FIELD_PARAMS = List.of("field", "query"); |
| 44 | + /** Function name as registered in the operator table (e.g., "match", "multi_match"). */ |
| 45 | + private final String funcName; |
56 | 46 |
|
57 | | - /** Multi-field relevance function params: (fields, query). */ |
58 | | - private static final List<String> MULTI_FIELD_PARAMS = List.of("fields", "query"); |
| 47 | + /** |
| 48 | + * Required param names used by {@link org.opensearch.sql.api.parser.NamedArgRewriter} to |
| 49 | + * normalize V2 syntax into MAP form. |
| 50 | + */ |
| 51 | + private final List<String> paramNames; |
59 | 52 |
|
60 | | - private static final Map<String, ScalarFunction> REGISTRY = |
61 | | - Map.of( |
62 | | - "match", scalarFunction(SINGLE_FIELD_PARAMS), |
63 | | - "match_phrase", scalarFunction(SINGLE_FIELD_PARAMS), |
64 | | - "match_bool_prefix", scalarFunction(SINGLE_FIELD_PARAMS), |
65 | | - "match_phrase_prefix", scalarFunction(SINGLE_FIELD_PARAMS), |
66 | | - "multi_match", scalarFunction(MULTI_FIELD_PARAMS), |
67 | | - "simple_query_string", scalarFunction(MULTI_FIELD_PARAMS), |
68 | | - "query_string", scalarFunction(MULTI_FIELD_PARAMS)); |
| 53 | + /** Calcite operator for chaining into the framework config's operator table. */ |
| 54 | + private final SqlOperator operator; |
69 | 55 |
|
70 | | - /** Registers all language-specified functions on the given schema (typically root). */ |
71 | | - public static void registerAll(SchemaPlus schema) { |
72 | | - REGISTRY.forEach(schema::add); |
| 56 | + /** A group of function specs that can be chained into Calcite's operator table. */ |
| 57 | + public record Category(List<UnifiedFunctionSpec> specs) { |
| 58 | + public SqlOperatorTable operatorTable() { |
| 59 | + return SqlOperatorTables.of(specs.stream().map(UnifiedFunctionSpec::getOperator).toList()); |
| 60 | + } |
73 | 61 | } |
74 | 62 |
|
75 | | - /** Returns the canonical ScalarFunction for a language-specified function, or null. */ |
76 | | - public static @Nullable ScalarFunction get(String name) { |
77 | | - return REGISTRY.get(name); |
| 63 | + /** Full-text search functions. */ |
| 64 | + public static final Category RELEVANCE = |
| 65 | + new Category( |
| 66 | + List.of( |
| 67 | + function("match").vararg("field", "query").returnType(BOOLEAN).build(), |
| 68 | + function("match_phrase").vararg("field", "query").returnType(BOOLEAN).build(), |
| 69 | + function("match_bool_prefix").vararg("field", "query").returnType(BOOLEAN).build(), |
| 70 | + function("match_phrase_prefix").vararg("field", "query").returnType(BOOLEAN).build(), |
| 71 | + function("multi_match").vararg("fields", "query").returnType(BOOLEAN).build(), |
| 72 | + function("simple_query_string").vararg("fields", "query").returnType(BOOLEAN).build(), |
| 73 | + function("query_string").vararg("fields", "query").returnType(BOOLEAN).build())); |
| 74 | + |
| 75 | + /** All registered function specs, keyed by function name. */ |
| 76 | + private static final Map<String, UnifiedFunctionSpec> ALL_SPECS = |
| 77 | + Stream.of(RELEVANCE) |
| 78 | + .flatMap(c -> c.specs().stream()) |
| 79 | + .collect(Collectors.toMap(UnifiedFunctionSpec::getFuncName, s -> s)); |
| 80 | + |
| 81 | + /** Looks up a function spec by name across all categories. */ |
| 82 | + public static UnifiedFunctionSpec of(String name) { |
| 83 | + return ALL_SPECS.get(name.toLowerCase()); |
78 | 84 | } |
79 | 85 |
|
80 | | - /** Returns true if the name is a language-specified function. */ |
81 | | - public static boolean isLanguageFunction(String name) { |
82 | | - return REGISTRY.containsKey(name); |
| 86 | + public static Builder function(String name) { |
| 87 | + return new Builder(name); |
83 | 88 | } |
84 | 89 |
|
85 | | - /** All registered language function names. */ |
86 | | - public static Set<String> names() { |
87 | | - return REGISTRY.keySet(); |
88 | | - } |
| 90 | + /** Fluent builder for function specs. */ |
| 91 | + @RequiredArgsConstructor(access = AccessLevel.PRIVATE) |
| 92 | + public static class Builder { |
| 93 | + private final String funcName; |
| 94 | + private List<String> paramNames = List.of(); |
| 95 | + private SqlReturnTypeInference returnType; |
89 | 96 |
|
90 | | - private static ScalarFunction scalarFunction(List<String> paramNames) { |
91 | | - List<FunctionParameter> params = |
92 | | - IntStream.range(0, paramNames.size()) |
93 | | - .mapToObj(i -> (FunctionParameter) new AnyParam(i, paramNames.get(i))) |
94 | | - .toList(); |
95 | | - return new BooleanScalarFunction(params); |
| 97 | + public Builder vararg(String... names) { |
| 98 | + this.paramNames = List.of(names); |
| 99 | + return this; |
| 100 | + } |
| 101 | + |
| 102 | + public Builder returnType(SqlReturnTypeInference type) { |
| 103 | + this.returnType = type; |
| 104 | + return this; |
| 105 | + } |
| 106 | + |
| 107 | + public UnifiedFunctionSpec build() { |
| 108 | + Objects.requireNonNull(returnType); |
| 109 | + return new UnifiedFunctionSpec( |
| 110 | + funcName, |
| 111 | + paramNames, |
| 112 | + new SqlUserDefinedFunction( |
| 113 | + new SqlIdentifier(funcName, SqlParserPos.ZERO), |
| 114 | + SqlKind.OTHER_FUNCTION, |
| 115 | + returnType, |
| 116 | + InferTypes.ANY_NULLABLE, |
| 117 | + new VariadicOperandMetadata(paramNames), |
| 118 | + PushdownScalarFunction.INSTANCE)); |
| 119 | + } |
96 | 120 | } |
97 | 121 |
|
98 | | - /** A ScalarFunction that returns BOOLEAN with the given parameters. */ |
99 | | - private record BooleanScalarFunction(List<FunctionParameter> params) implements ScalarFunction { |
| 122 | + /** Placeholder for pushdown-only functions — no local execution. */ |
| 123 | + private enum PushdownScalarFunction implements ScalarFunction { |
| 124 | + INSTANCE; |
| 125 | + |
100 | 126 | @Override |
101 | | - public List<FunctionParameter> getParameters() { |
102 | | - return params; |
| 127 | + public RelDataType getReturnType(RelDataTypeFactory typeFactory) { |
| 128 | + return typeFactory.createSqlType(org.apache.calcite.sql.type.SqlTypeName.ANY); |
103 | 129 | } |
104 | 130 |
|
105 | 131 | @Override |
106 | | - public RelDataType getReturnType(RelDataTypeFactory typeFactory) { |
107 | | - return typeFactory.createSqlType(SqlTypeName.BOOLEAN); |
| 132 | + public List<FunctionParameter> getParameters() { |
| 133 | + return List.of(); |
108 | 134 | } |
109 | 135 | } |
110 | 136 |
|
111 | | - /** A required function parameter of type ANY. */ |
112 | | - private record AnyParam(int ordinal, String name) implements FunctionParameter { |
| 137 | + /** Accepts required params + optional trailing params. Carries param names for rewriting. */ |
| 138 | + private record VariadicOperandMetadata(List<String> paramNames) implements SqlOperandMetadata { |
| 139 | + |
| 140 | + @Override |
| 141 | + public List<String> paramNames() { |
| 142 | + return paramNames; |
| 143 | + } |
| 144 | + |
113 | 145 | @Override |
114 | | - public int getOrdinal() { |
115 | | - return ordinal; |
| 146 | + public List<RelDataType> paramTypes(RelDataTypeFactory tf) { |
| 147 | + return List.of(); |
116 | 148 | } |
117 | 149 |
|
118 | 150 | @Override |
119 | | - public String getName() { |
120 | | - return name; |
| 151 | + public boolean checkOperandTypes(SqlCallBinding binding, boolean throwOnFailure) { |
| 152 | + return true; // NamedArgRewriter normalizes before validation |
121 | 153 | } |
122 | 154 |
|
123 | 155 | @Override |
124 | | - public boolean isOptional() { |
125 | | - return false; |
| 156 | + public SqlOperandCountRange getOperandCountRange() { |
| 157 | + return SqlOperandCountRanges.from(paramNames.size()); |
126 | 158 | } |
127 | 159 |
|
128 | 160 | @Override |
129 | | - public RelDataType getType(RelDataTypeFactory typeFactory) { |
130 | | - return typeFactory.createSqlType(SqlTypeName.ANY); |
| 161 | + public String getAllowedSignatures(SqlOperator op, String opName) { |
| 162 | + return opName + "(" + String.join(", ", paramNames) + "[, option=value ...])"; |
131 | 163 | } |
132 | 164 | } |
133 | 165 | } |
0 commit comments