|
5 | 5 |
|
6 | 6 | package org.opensearch.sql.api.spec; |
7 | 7 |
|
| 8 | +import static org.apache.calcite.sql.type.ReturnTypes.BOOLEAN; |
| 9 | + |
8 | 10 | import java.util.List; |
9 | 11 | import java.util.Map; |
10 | | -import java.util.Set; |
11 | | -import java.util.stream.IntStream; |
| 12 | +import java.util.Objects; |
| 13 | +import java.util.stream.Collectors; |
| 14 | +import java.util.stream.Stream; |
| 15 | +import lombok.AccessLevel; |
| 16 | +import lombok.Getter; |
| 17 | +import lombok.RequiredArgsConstructor; |
| 18 | +import lombok.ToString; |
12 | 19 | import org.apache.calcite.rel.type.RelDataType; |
13 | 20 | import org.apache.calcite.rel.type.RelDataTypeFactory; |
14 | | -import org.apache.calcite.schema.FunctionParameter; |
15 | | -import org.apache.calcite.schema.ScalarFunction; |
16 | | -import org.apache.calcite.schema.SchemaPlus; |
17 | | -import org.apache.calcite.sql.type.SqlTypeName; |
18 | | -import org.checkerframework.checker.nullness.qual.Nullable; |
| 21 | +import org.apache.calcite.sql.SqlCallBinding; |
| 22 | +import org.apache.calcite.sql.SqlIdentifier; |
| 23 | +import org.apache.calcite.sql.SqlKind; |
| 24 | +import org.apache.calcite.sql.SqlOperandCountRange; |
| 25 | +import org.apache.calcite.sql.SqlOperator; |
| 26 | +import org.apache.calcite.sql.SqlOperatorTable; |
| 27 | +import org.apache.calcite.sql.parser.SqlParserPos; |
| 28 | +import org.apache.calcite.sql.type.InferTypes; |
| 29 | +import org.apache.calcite.sql.type.SqlOperandCountRanges; |
| 30 | +import org.apache.calcite.sql.type.SqlOperandMetadata; |
| 31 | +import org.apache.calcite.sql.type.SqlReturnTypeInference; |
| 32 | +import org.apache.calcite.sql.util.SqlOperatorTables; |
| 33 | +import org.apache.calcite.sql.validate.SqlUserDefinedFunction; |
19 | 34 |
|
20 | 35 | /** |
21 | | - * Central registry of language-specified function signatures (Unified Language Specification |
22 | | - * layer). Each entry maps a function name to a canonical {@link ScalarFunction} with named required |
23 | | - * parameters of type {@link SqlTypeName#ANY}. |
24 | | - * |
25 | | - * <p>This class defines <em>what functions exist</em> and their signatures. Function |
26 | | - * <em>implementations</em> live in the Unified Execution Runtime (UER) layer — see {@link |
27 | | - * org.opensearch.sql.api.function.UnifiedFunction} and {@link |
28 | | - * org.opensearch.sql.api.function.UnifiedFunctionRepository}. For data-source-specific functions |
29 | | - * (e.g., relevance search), execution is handled by adapter pushdown rules rather than UER. |
30 | | - * |
31 | | - * <p>Named parameters enable SQL named-argument syntax ({@code match(field => col, query => |
32 | | - * 'text')}) via Calcite's {@code ARGUMENT_ASSIGNMENT} operator. With fixed required parameters (no |
33 | | - * optional params), <a href="https://issues.apache.org/jira/browse/CALCITE-5366">CALCITE-5366</a> |
34 | | - * is avoided entirely. |
35 | | - * |
36 | | - * <p>Functions are registered globally on the root schema via {@link #registerAll(SchemaPlus)}, |
37 | | - * following the same pattern as Flink's {@code FlinkSqlOperatorTable} — engine-level primitives |
38 | | - * available regardless of catalog. Pushdown rules enforce data-source capability at optimization |
39 | | - * time. |
40 | | - * |
41 | | - * @see org.opensearch.sql.api.function.UnifiedFunction |
42 | | - * @see org.opensearch.sql.api.function.UnifiedFunctionRepository |
| 36 | + * Declarative registry of language-level functions for the unified query engine. Functions defined |
| 37 | + * here are part of the language spec — always resolvable regardless of the underlying data source. |
| 38 | + * They are grouped into {@link Category categories} that callers chain into Calcite's operator |
| 39 | + * table. Data-source capability is enforced at optimization time by pushdown rules. |
43 | 40 | */ |
44 | | -// TODO: UnifiedFunctionRepository should resolve implementations for functions defined here, |
45 | | -// rather than independently discovering from PPLBuiltinOperators. The spec is the source of |
46 | | -// truth for what functions exist; UER provides how they execute. Decide whether to late-bind |
47 | | -// UER implementations (ImplementableFunction) to spec-defined signatures for engine-independent |
48 | | -// functions (e.g., upper, lower). Currently only data-source-specific functions (pushdown-only) |
49 | | -// are registered here. |
| 41 | +@Getter |
| 42 | +@ToString |
| 43 | +@RequiredArgsConstructor(access = AccessLevel.PRIVATE) |
50 | 44 | public final class UnifiedFunctionSpec { |
51 | 45 |
|
52 | | - private UnifiedFunctionSpec() {} |
53 | | - |
54 | | - /** Single-field relevance function params: (field, query). */ |
55 | | - private static final List<String> SINGLE_FIELD_PARAMS = List.of("field", "query"); |
56 | | - |
57 | | - /** Multi-field relevance function params: (fields, query). */ |
58 | | - private static final List<String> MULTI_FIELD_PARAMS = List.of("fields", "query"); |
59 | | - |
60 | | - private static final Map<String, ScalarFunction> REGISTRY = |
61 | | - Map.of( |
62 | | - "match", scalarFunction(SINGLE_FIELD_PARAMS), |
63 | | - "match_phrase", scalarFunction(SINGLE_FIELD_PARAMS), |
64 | | - "match_bool_prefix", scalarFunction(SINGLE_FIELD_PARAMS), |
65 | | - "match_phrase_prefix", scalarFunction(SINGLE_FIELD_PARAMS), |
66 | | - "multi_match", scalarFunction(MULTI_FIELD_PARAMS), |
67 | | - "simple_query_string", scalarFunction(MULTI_FIELD_PARAMS), |
68 | | - "query_string", scalarFunction(MULTI_FIELD_PARAMS)); |
69 | | - |
70 | | - /** Registers all language-specified functions on the given schema (typically root). */ |
71 | | - public static void registerAll(SchemaPlus schema) { |
72 | | - REGISTRY.forEach(schema::add); |
| 46 | + /** Function name as registered in the operator table (e.g., "match", "multi_match"). */ |
| 47 | + private final String funcName; |
| 48 | + |
| 49 | + /** Calcite operator for chaining into the framework config's operator table. */ |
| 50 | + private final SqlOperator operator; |
| 51 | + |
| 52 | + /** Full-text search functions. */ |
| 53 | + public static final Category RELEVANCE = |
| 54 | + new Category( |
| 55 | + List.of( |
| 56 | + function("match").vararg("field", "query").returnType(BOOLEAN).build(), |
| 57 | + function("match_phrase").vararg("field", "query").returnType(BOOLEAN).build(), |
| 58 | + function("match_bool_prefix").vararg("field", "query").returnType(BOOLEAN).build(), |
| 59 | + function("match_phrase_prefix").vararg("field", "query").returnType(BOOLEAN).build(), |
| 60 | + function("multi_match").vararg("fields", "query").returnType(BOOLEAN).build(), |
| 61 | + function("simple_query_string").vararg("fields", "query").returnType(BOOLEAN).build(), |
| 62 | + function("query_string").vararg("fields", "query").returnType(BOOLEAN).build())); |
| 63 | + |
| 64 | + /** All registered function specs, keyed by function name. */ |
| 65 | + private static final Map<String, UnifiedFunctionSpec> ALL_SPECS = |
| 66 | + Stream.of(RELEVANCE) |
| 67 | + .flatMap(c -> c.specs().stream()) |
| 68 | + .collect(Collectors.toMap(UnifiedFunctionSpec::getFuncName, s -> s)); |
| 69 | + |
| 70 | + /** |
| 71 | + * Looks up a function spec by name across all categories. |
| 72 | + * |
| 73 | + * @param name function name (case-insensitive) |
| 74 | + * @return the spec, or {@code null} if not found |
| 75 | + */ |
| 76 | + public static UnifiedFunctionSpec of(String name) { |
| 77 | + return ALL_SPECS.get(name.toLowerCase()); |
73 | 78 | } |
74 | 79 |
|
75 | | - /** Returns the canonical ScalarFunction for a language-specified function, or null. */ |
76 | | - public static @Nullable ScalarFunction get(String name) { |
77 | | - return REGISTRY.get(name); |
| 80 | + /** |
| 81 | + * @return required param names from {@link SqlOperandMetadata}, or empty if not available. |
| 82 | + */ |
| 83 | + public List<String> getParamNames() { |
| 84 | + return operator.getOperandTypeChecker() instanceof SqlOperandMetadata metadata |
| 85 | + ? metadata.paramNames() |
| 86 | + : List.of(); |
78 | 87 | } |
79 | 88 |
|
80 | | - /** Returns true if the name is a language-specified function. */ |
81 | | - public static boolean isLanguageFunction(String name) { |
82 | | - return REGISTRY.containsKey(name); |
| 89 | + /** A group of function specs that can be chained into Calcite's operator table. */ |
| 90 | + public record Category(List<UnifiedFunctionSpec> specs) { |
| 91 | + public SqlOperatorTable operatorTable() { |
| 92 | + return SqlOperatorTables.of(specs.stream().map(UnifiedFunctionSpec::getOperator).toList()); |
| 93 | + } |
83 | 94 | } |
84 | 95 |
|
85 | | - /** All registered language function names. */ |
86 | | - public static Set<String> names() { |
87 | | - return REGISTRY.keySet(); |
| 96 | + public static Builder function(String name) { |
| 97 | + return new Builder(name); |
88 | 98 | } |
89 | 99 |
|
90 | | - private static ScalarFunction scalarFunction(List<String> paramNames) { |
91 | | - List<FunctionParameter> params = |
92 | | - IntStream.range(0, paramNames.size()) |
93 | | - .mapToObj(i -> (FunctionParameter) new AnyParam(i, paramNames.get(i))) |
94 | | - .toList(); |
95 | | - return new BooleanScalarFunction(params); |
96 | | - } |
| 100 | + /** Fluent builder for function specs. */ |
| 101 | + @RequiredArgsConstructor(access = AccessLevel.PRIVATE) |
| 102 | + public static class Builder { |
| 103 | + private final String funcName; |
| 104 | + private List<String> paramNames = List.of(); |
| 105 | + private SqlReturnTypeInference returnType; |
97 | 106 |
|
98 | | - /** A ScalarFunction that returns BOOLEAN with the given parameters. */ |
99 | | - private record BooleanScalarFunction(List<FunctionParameter> params) implements ScalarFunction { |
100 | | - @Override |
101 | | - public List<FunctionParameter> getParameters() { |
102 | | - return params; |
| 107 | + public Builder vararg(String... names) { |
| 108 | + this.paramNames = List.of(names); |
| 109 | + return this; |
103 | 110 | } |
104 | 111 |
|
105 | | - @Override |
106 | | - public RelDataType getReturnType(RelDataTypeFactory typeFactory) { |
107 | | - return typeFactory.createSqlType(SqlTypeName.BOOLEAN); |
| 112 | + public Builder returnType(SqlReturnTypeInference type) { |
| 113 | + this.returnType = type; |
| 114 | + return this; |
| 115 | + } |
| 116 | + |
| 117 | + public UnifiedFunctionSpec build() { |
| 118 | + Objects.requireNonNull(returnType, "returnType is required"); |
| 119 | + return new UnifiedFunctionSpec( |
| 120 | + funcName, |
| 121 | + new SqlUserDefinedFunction( |
| 122 | + new SqlIdentifier(funcName, SqlParserPos.ZERO), |
| 123 | + SqlKind.OTHER_FUNCTION, |
| 124 | + returnType, |
| 125 | + InferTypes.ANY_NULLABLE, |
| 126 | + new VariadicOperandMetadata(paramNames), |
| 127 | + List::of)); // Pushdown-only: no local implementation |
108 | 128 | } |
109 | 129 | } |
110 | 130 |
|
111 | | - /** A required function parameter of type ANY. */ |
112 | | - private record AnyParam(int ordinal, String name) implements FunctionParameter { |
| 131 | + /** |
| 132 | + * Custom operand metadata that bypasses Calcite's built-in type checking. Calcite's {@code |
| 133 | + * FamilyOperandTypeChecker} rejects variadic calls (CALCITE-5366), so this implementation accepts |
| 134 | + * any operand types and delegates validation to pushdown. |
| 135 | + */ |
| 136 | + private record VariadicOperandMetadata(List<String> paramNames) implements SqlOperandMetadata { |
| 137 | + |
| 138 | + @Override |
| 139 | + public List<String> paramNames() { |
| 140 | + return paramNames; |
| 141 | + } |
| 142 | + |
113 | 143 | @Override |
114 | | - public int getOrdinal() { |
115 | | - return ordinal; |
| 144 | + public List<RelDataType> paramTypes(RelDataTypeFactory tf) { |
| 145 | + return List.of(); |
116 | 146 | } |
117 | 147 |
|
118 | 148 | @Override |
119 | | - public String getName() { |
120 | | - return name; |
| 149 | + public boolean checkOperandTypes(SqlCallBinding binding, boolean throwOnFailure) { |
| 150 | + return true; // Bypass: CALCITE-5366 breaks optional argument type checking |
121 | 151 | } |
122 | 152 |
|
123 | 153 | @Override |
124 | | - public boolean isOptional() { |
125 | | - return false; |
| 154 | + public SqlOperandCountRange getOperandCountRange() { |
| 155 | + return SqlOperandCountRanges.from(paramNames.size()); |
126 | 156 | } |
127 | 157 |
|
128 | 158 | @Override |
129 | | - public RelDataType getType(RelDataTypeFactory typeFactory) { |
130 | | - return typeFactory.createSqlType(SqlTypeName.ANY); |
| 159 | + public String getAllowedSignatures(SqlOperator op, String opName) { |
| 160 | + return opName + "(" + String.join(", ", paramNames) + "[, option=value ...])"; |
131 | 161 | } |
132 | 162 | } |
133 | 163 | } |
0 commit comments