Skip to content

Commit a375f98

Browse files
authored
Define unified SQL language spec with composable extensions (opensearch-project#5360)
* feat(api): Define unified SQL language spec with composable extensions (opensearch-project#5346) Introduce UnifiedSqlSpec that configures Calcite's parser/validator seams to accept OpenSearch SQL syntax, with a composable SqlExtension interface for domain-specific functions and AST rewriters. - Lex.BIG_QUERY: hyphenated identifiers, backtick quoting - SqlBabelParserImpl: de-reserves MATCH and most keywords - SqlConformanceEnum.BABEL: lenient GROUP BY, LIMIT, optional FROM - SearchExtension: relevance functions + NamedArgRewriter Signed-off-by: Chen Dai <daichen@amazon.com> * refactor(api): add UnifiedPplSpec and route PPL through LanguageSpec Introduce UnifiedPplSpec as the PPL counterpart to UnifiedSqlSpec, using Calcite's default parser/validator configuration. PPL currently has its own ANTLR parser and translates directly to RelNode, so the parser and validator configs returned here are inert for the PPL path today; this lays the groundwork for post-analysis RelNode hooks. UnifiedQueryContext.Builder now selects the spec per QueryType via an exhaustive switch, removing the null branch for PPL and the now-dead null-check in buildFrameworkConfig. Signed-off-by: Chen Dai <daichen@amazon.com> * test(api): tighten lazy plan assertions and fix misplaced SHOW TABLES case Move the SHOW TABLES case out of testNonQueryStatementsBlockedByParser and into testNonQueryStatementsBlockedByWhitelist, where its error message 'Only query statements are supported' actually belongs. The parser accepts SHOW TABLES; the whitelist blocks it. Replace trivial LogicalFilter / LogicalProject / LogicalAggregate substring checks in UnifiedSqlSpecTest with assertions that prove the specific feature under test: double-quoted string literal, MATCH as a non-reserved function, GROUP BY ordinal resolution, boolean-to-integer cast folding, and BABEL's string-to-integer coercion. Use AS aliases to pin field names and avoid Calcite's EXPR$0 placeholders. Signed-off-by: Chen Dai <daichen@amazon.com> --------- Signed-off-by: Chen Dai <daichen@amazon.com>
1 parent 93a646f commit a375f98

12 files changed

Lines changed: 387 additions & 34 deletions

api/build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ plugins {
1313

1414
dependencies {
1515
api project(':ppl')
16+
api group: 'org.apache.calcite', name: 'calcite-babel', version: '1.41.0'
1617

1718
testImplementation testFixtures(project(':api'))
1819
testImplementation group: 'junit', name: 'junit', version: '4.13.2'

api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -17,22 +17,20 @@
1717
import java.util.concurrent.Callable;
1818
import lombok.AllArgsConstructor;
1919
import lombok.Getter;
20-
import org.apache.calcite.avatica.util.Casing;
2120
import org.apache.calcite.jdbc.CalciteSchema;
2221
import org.apache.calcite.plan.RelTraitDef;
2322
import org.apache.calcite.rel.metadata.DefaultRelMetadataProvider;
2423
import org.apache.calcite.schema.Schema;
2524
import org.apache.calcite.schema.SchemaPlus;
26-
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
27-
import org.apache.calcite.sql.parser.SqlParser;
28-
import org.apache.calcite.sql.util.SqlOperatorTables;
2925
import org.apache.calcite.tools.FrameworkConfig;
3026
import org.apache.calcite.tools.Frameworks;
3127
import org.apache.calcite.tools.Programs;
3228
import org.opensearch.sql.api.parser.CalciteSqlQueryParser;
3329
import org.opensearch.sql.api.parser.PPLQueryParser;
3430
import org.opensearch.sql.api.parser.UnifiedQueryParser;
35-
import org.opensearch.sql.api.spec.UnifiedFunctionSpec;
31+
import org.opensearch.sql.api.spec.LanguageSpec;
32+
import org.opensearch.sql.api.spec.UnifiedPplSpec;
33+
import org.opensearch.sql.api.spec.UnifiedSqlSpec;
3634
import org.opensearch.sql.calcite.CalcitePlanContext;
3735
import org.opensearch.sql.calcite.SysLimit;
3836
import org.opensearch.sql.common.setting.Settings;
@@ -60,6 +58,9 @@ public class UnifiedQueryContext implements AutoCloseable {
6058
/** Query parser created eagerly from this context's configuration. */
6159
private final UnifiedQueryParser<?> parser;
6260

61+
/** Language spec for the query's frontend (SQL or PPL). */
62+
private final LanguageSpec langSpec;
63+
6364
/**
6465
* Returns the profiling result. Call after query execution to retrieve collected metrics. Returns
6566
* empty if profiling was not enabled.
@@ -208,12 +209,18 @@ public Builder setting(String name, Object value) {
208209
public UnifiedQueryContext build() {
209210
Objects.requireNonNull(queryType, "Must specify language before build");
210211

212+
LanguageSpec langSpec =
213+
switch (queryType) {
214+
case SQL -> UnifiedSqlSpec.extended();
215+
case PPL -> UnifiedPplSpec.create();
216+
};
211217
Settings settings = buildSettings();
212218
CalcitePlanContext planContext =
213219
CalcitePlanContext.create(
214-
buildFrameworkConfig(), SysLimit.fromSettings(settings), queryType);
220+
buildFrameworkConfig(langSpec), SysLimit.fromSettings(settings), queryType);
215221
QueryProfiling.activate(profiling);
216-
return new UnifiedQueryContext(planContext, settings, createParser(planContext, settings));
222+
return new UnifiedQueryContext(
223+
planContext, settings, createParser(planContext, settings), langSpec);
217224
}
218225

219226
private UnifiedQueryParser<?> createParser(CalcitePlanContext planContext, Settings settings) {
@@ -239,25 +246,22 @@ public List<?> getSettings() {
239246
}
240247

241248
@SuppressWarnings({"rawtypes"})
242-
private FrameworkConfig buildFrameworkConfig() {
249+
private FrameworkConfig buildFrameworkConfig(LanguageSpec langSpec) {
243250
SchemaPlus rootSchema = CalciteSchema.createRootSchema(true, cacheMetadata).plus();
244251
catalogs.forEach(rootSchema::add);
245252

246253
SchemaPlus defaultSchema = findSchemaByPath(rootSchema, defaultNamespace);
247-
return Frameworks.newConfigBuilder()
248-
.parserConfig(buildParserConfig())
249-
.operatorTable(
250-
SqlOperatorTables.chain(
251-
SqlStdOperatorTable.instance(), UnifiedFunctionSpec.RELEVANCE.operatorTable()))
252-
.defaultSchema(defaultSchema)
253-
.traitDefs((List<RelTraitDef>) null)
254-
.programs(Programs.calc(DefaultRelMetadataProvider.INSTANCE))
255-
.build();
256-
}
254+
Frameworks.ConfigBuilder builder =
255+
Frameworks.newConfigBuilder()
256+
.defaultSchema(defaultSchema)
257+
.traitDefs((List<RelTraitDef>) null)
258+
.programs(Programs.calc(DefaultRelMetadataProvider.INSTANCE));
257259

258-
private SqlParser.Config buildParserConfig() {
259-
// Preserve identifier case for lowercase OpenSearch index names
260-
return SqlParser.Config.DEFAULT.withUnquotedCasing(Casing.UNCHANGED);
260+
return builder
261+
.parserConfig(langSpec.parserConfig())
262+
.sqlValidatorConfig(langSpec.validatorConfig())
263+
.operatorTable(langSpec.operatorTable())
264+
.build();
261265
}
262266

263267
private SchemaPlus findSchemaByPath(SchemaPlus rootSchema, String defaultPath) {

api/src/main/java/org/opensearch/sql/api/UnifiedQueryPlanner.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@
1616
import org.apache.calcite.rel.logical.LogicalSort;
1717
import org.apache.calcite.sql.SqlKind;
1818
import org.apache.calcite.sql.SqlNode;
19+
import org.apache.calcite.sql.util.SqlVisitor;
1920
import org.apache.calcite.tools.Frameworks;
2021
import org.apache.calcite.tools.Planner;
21-
import org.opensearch.sql.api.parser.NamedArgRewriter;
2222
import org.opensearch.sql.api.parser.UnifiedQueryParser;
2323
import org.opensearch.sql.ast.tree.UnresolvedPlan;
2424
import org.opensearch.sql.calcite.CalciteRelNodeVisitor;
@@ -87,7 +87,12 @@ public RelNode plan(String query) throws Exception {
8787
"Only query statements are supported. Got: " + parsed.getKind());
8888
}
8989

90-
SqlNode rewritten = parsed.accept(NamedArgRewriter.INSTANCE);
90+
// TODO: move post-parse rewriting into CalciteSqlQueryParser
91+
SqlNode rewritten = parsed;
92+
for (SqlVisitor<SqlNode> visitor : context.getLangSpec().postParseRules()) {
93+
rewritten = rewritten.accept(visitor);
94+
}
95+
9196
SqlNode validated = planner.validate(rewritten);
9297
RelRoot relRoot = planner.rel(validated);
9398
return relRoot.project();
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.api.spec;
7+
8+
import java.util.ArrayList;
9+
import java.util.List;
10+
import org.apache.calcite.sql.SqlNode;
11+
import org.apache.calcite.sql.SqlOperatorTable;
12+
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
13+
import org.apache.calcite.sql.parser.SqlParser;
14+
import org.apache.calcite.sql.util.SqlOperatorTables;
15+
import org.apache.calcite.sql.util.SqlVisitor;
16+
import org.apache.calcite.sql.validate.SqlValidator;
17+
18+
/**
19+
* Language specification defining the dialect the engine accepts. Provides parser configuration,
20+
* validator configuration, and composable {@link LanguageExtension}s that contribute operators and
21+
* post-parse rewrite rules.
22+
*
23+
* <p>Implementations define a complete language surface — for example, {@link UnifiedSqlSpec}
24+
* provides ANSI and extended SQL modes. A future PPL spec would implement this same interface once
25+
* PPL converges on the Calcite pipeline.
26+
*/
27+
public interface LanguageSpec {
28+
29+
/**
30+
* A composable language extension that contributes operators and post-parse rewrite rules. All
31+
* methods have defaults so extensions only override what they need.
32+
*/
33+
interface LanguageExtension {
34+
35+
/**
36+
* Operators (functions, aggregates) this extension adds. Chained with the standard operator
37+
* table during validation.
38+
*/
39+
default SqlOperatorTable operators() {
40+
return SqlOperatorTables.of();
41+
}
42+
43+
/**
44+
* AST rewrite rules applied after parsing and before validation. Each visitor transforms the
45+
* parse tree (e.g., rewriting named arguments into MAP literals).
46+
*/
47+
default List<SqlVisitor<SqlNode>> postParseRules() {
48+
return List.of();
49+
}
50+
}
51+
52+
/**
53+
* Parser configuration controlling how SQL text is tokenized and parsed into a parse tree,
54+
* including parser factory, lexical rules, and conformance.
55+
*/
56+
SqlParser.Config parserConfig();
57+
58+
/**
59+
* Validator configuration controlling what SQL semantics the validator accepts, such as GROUP BY
60+
* behavior, LIMIT syntax, and type coercion.
61+
*/
62+
SqlValidator.Config validatorConfig();
63+
64+
/**
65+
* Language extensions registered with this spec. Each extension contributes operators and
66+
* post-parse rewrite rules that are composed by {@link #operatorTable()} and {@link
67+
* #postParseRules()}.
68+
*/
69+
List<LanguageExtension> extensions();
70+
71+
/**
72+
* Chained operator table combining the standard Calcite operators with all operators contributed
73+
* by registered extensions.
74+
*/
75+
default SqlOperatorTable operatorTable() {
76+
List<SqlOperatorTable> tables = new ArrayList<>();
77+
tables.add(SqlStdOperatorTable.instance());
78+
extensions().forEach(ext -> tables.add(ext.operators()));
79+
return SqlOperatorTables.chain(tables);
80+
}
81+
82+
/**
83+
* All post-parse rewrite rules from registered extensions, flattened in registration order.
84+
* Applied to the parse tree after parsing and before validation.
85+
*/
86+
default List<SqlVisitor<SqlNode>> postParseRules() {
87+
return extensions().stream().flatMap(ext -> ext.postParseRules().stream()).toList();
88+
}
89+
}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.api.spec;
7+
8+
import java.util.List;
9+
import lombok.AccessLevel;
10+
import lombok.NoArgsConstructor;
11+
import org.apache.calcite.sql.parser.SqlParser;
12+
import org.apache.calcite.sql.validate.SqlValidator;
13+
14+
/**
15+
* PPL language specification.
16+
*
17+
* <p>Note: PPL currently has its own parsing and analyzing pipeline, so only configuration and
18+
* extensions applied after RelNode construction are in use. The parser and validator configs
19+
* returned here are inert for the PPL path.
20+
*/
21+
@NoArgsConstructor(access = AccessLevel.PRIVATE)
22+
public class UnifiedPplSpec implements LanguageSpec {
23+
24+
public static UnifiedPplSpec create() {
25+
return new UnifiedPplSpec();
26+
}
27+
28+
@Override
29+
public SqlParser.Config parserConfig() {
30+
return SqlParser.config();
31+
}
32+
33+
@Override
34+
public SqlValidator.Config validatorConfig() {
35+
return SqlValidator.Config.DEFAULT;
36+
}
37+
38+
@Override
39+
public List<LanguageExtension> extensions() {
40+
return List.of();
41+
}
42+
}
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.api.spec;
7+
8+
import java.util.List;
9+
import lombok.AccessLevel;
10+
import lombok.Getter;
11+
import lombok.RequiredArgsConstructor;
12+
import lombok.experimental.Accessors;
13+
import org.apache.calcite.config.Lex;
14+
import org.apache.calcite.sql.parser.SqlParser;
15+
import org.apache.calcite.sql.parser.SqlParserImplFactory;
16+
import org.apache.calcite.sql.parser.babel.SqlBabelParserImpl;
17+
import org.apache.calcite.sql.validate.SqlConformanceEnum;
18+
import org.apache.calcite.sql.validate.SqlValidator;
19+
import org.opensearch.sql.api.spec.search.SearchExtension;
20+
21+
/**
22+
* SQL language specification. Configures Calcite's parser, validator, and composable extensions for
23+
* OpenSearch SQL compatibility.
24+
*
25+
* <p>Use {@link #extended()} for the default configuration with lenient syntax, hyphenated
26+
* identifiers, and search functions.
27+
*/
28+
@RequiredArgsConstructor(access = AccessLevel.PRIVATE)
29+
@Accessors(fluent = true)
30+
public class UnifiedSqlSpec implements LanguageSpec {
31+
32+
/** Lexical rules: identifier quoting, character escaping, and special identifier support. */
33+
private final Lex lex;
34+
35+
/** Parser implementation: controls keyword reservation and grammar extensions. */
36+
private final SqlParserImplFactory parserFactory;
37+
38+
/** Validation rules: what SQL semantics the validator accepts (GROUP BY, LIMIT, coercion). */
39+
private final SqlConformanceEnum conformance;
40+
41+
/** Composable extensions contributing operators and post-parse rewrite rules. */
42+
@Getter private final List<LanguageExtension> extensions;
43+
44+
/**
45+
* Extended SQL spec: Babel parser, BIG_QUERY lex (hyphenated identifiers, backtick quoting),
46+
* BABEL conformance (lenient GROUP BY, LIMIT, optional FROM), and search functions.
47+
*/
48+
public static UnifiedSqlSpec extended() {
49+
return new UnifiedSqlSpec(
50+
Lex.BIG_QUERY,
51+
SqlBabelParserImpl.FACTORY,
52+
SqlConformanceEnum.BABEL,
53+
List.of(new SearchExtension()));
54+
}
55+
56+
@Override
57+
public SqlParser.Config parserConfig() {
58+
return SqlParser.config()
59+
.withParserFactory(parserFactory)
60+
.withLex(lex)
61+
.withConformance(conformance);
62+
}
63+
64+
@Override
65+
public SqlValidator.Config validatorConfig() {
66+
return SqlValidator.Config.DEFAULT.withConformance(conformance);
67+
}
68+
}

api/src/main/java/org/opensearch/sql/api/parser/NamedArgRewriter.java renamed to api/src/main/java/org/opensearch/sql/api/spec/search/NamedArgRewriter.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* SPDX-License-Identifier: Apache-2.0
44
*/
55

6-
package org.opensearch.sql.api.parser;
6+
package org.opensearch.sql.api.spec.search;
77

88
import java.util.List;
99
import lombok.AccessLevel;
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.api.spec.search;
7+
8+
import java.util.List;
9+
import org.apache.calcite.sql.SqlNode;
10+
import org.apache.calcite.sql.SqlOperatorTable;
11+
import org.apache.calcite.sql.util.SqlVisitor;
12+
import org.opensearch.sql.api.spec.LanguageSpec;
13+
import org.opensearch.sql.api.spec.UnifiedFunctionSpec;
14+
15+
/** Search Extension: relevance functions and named argument rewriting. */
16+
public class SearchExtension implements LanguageSpec.LanguageExtension {
17+
18+
@Override
19+
public SqlOperatorTable operators() {
20+
return UnifiedFunctionSpec.RELEVANCE.operatorTable();
21+
}
22+
23+
@Override
24+
public List<SqlVisitor<SqlNode>> postParseRules() {
25+
return List.of(NamedArgRewriter.INSTANCE);
26+
}
27+
}

api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerSqlTest.java

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,9 @@ public void testNonQueryStatementsBlockedByWhitelist() {
237237
MERGE INTO catalog.employees AS t
238238
USING (SELECT 99 AS id) AS s ON t.id = s.id
239239
WHEN MATCHED THEN UPDATE SET name = 'hacked'\
240+
""",
241+
"""
242+
SHOW TABLES\
240243
""")
241244
.forEach(
242245
sql ->
@@ -245,17 +248,13 @@ public void testNonQueryStatementsBlockedByWhitelist() {
245248

246249
@Test
247250
public void testNonQueryStatementsBlockedByParser() {
248-
List.of(
251+
givenInvalidQuery(
249252
"""
250253
CREATE MATERIALIZED VIEW mv AS
251254
SELECT department, count(*)
252255
FROM catalog.employees
253256
GROUP BY department\
254-
""",
255-
"""
256-
SHOW TABLES\
257257
""")
258-
.forEach(
259-
sql -> givenInvalidQuery(sql).assertErrorMessage("Incorrect syntax near the keyword"));
258+
.assertErrorMessage("Encountered");
260259
}
261260
}

0 commit comments

Comments
 (0)