Skip to content

Commit c803ab0

Browse files
committed
add more necessary fields
Signed-off-by: Eric Wei <mengwei.eric@gmail.com>
1 parent da2e537 commit c803ab0

5 files changed

Lines changed: 202 additions & 0 deletions

File tree

plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,11 @@ private void serializeBundle(XContentBuilder builder, GrammarBundle bundle) thro
115115
builder.field("literalNames", bundle.getLiteralNames());
116116
builder.field("symbolicNames", bundle.getSymbolicNames());
117117

118+
// Autocomplete configuration
119+
builder.field("tokenDictionary", bundle.getTokenDictionary());
120+
builder.field("ignoredTokens", bundle.getIgnoredTokens());
121+
builder.field("rulesToVisit", bundle.getRulesToVisit());
122+
118123
builder.endObject();
119124
}
120125
}

plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,11 @@ public void testGetGrammar_ReturnsBundle() throws Exception {
9393
// Vocabulary (non-empty arrays)
9494
assertTrue(json.getJSONArray("literalNames").length() > 0);
9595
assertTrue(json.getJSONArray("symbolicNames").length() > 0);
96+
97+
// Autocomplete configuration
98+
assertTrue(json.getJSONObject("tokenDictionary").length() > 0);
99+
assertTrue(json.getJSONArray("ignoredTokens").length() > 0);
100+
assertTrue(json.getJSONArray("rulesToVisit").length() > 0);
96101
}
97102

98103
@Test

ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
package org.opensearch.sql.ppl.autocomplete;
77

8+
import java.util.Map;
89
import lombok.Builder;
910
import lombok.NonNull;
1011
import lombok.Value;
@@ -55,4 +56,26 @@ public class GrammarBundle {
5556
* tokens with no symbolic name; clients must handle sparse arrays.
5657
*/
5758
@NonNull private String[] symbolicNames;
59+
60+
/**
61+
* Autocomplete token dictionary — maps semantic names used by the autocomplete enrichment logic
62+
* (e.g. "SPACE", "PIPE", "SOURCE") to their token type IDs in this grammar. Clients use this
63+
* to configure token-aware enrichment without hardcoding token IDs.
64+
*/
65+
@NonNull private Map<String, Integer> tokenDictionary;
66+
67+
/**
68+
* Token type IDs that should be ignored by CodeCompletionCore during candidate collection.
69+
* These are tokens like functions, operators, and internal tokens that should not appear
70+
* as direct keyword suggestions (e.g. AVG, COUNT, PIPE operators).
71+
*/
72+
@NonNull private int[] ignoredTokens;
73+
74+
/**
75+
* Parser rule indices that CodeCompletionCore should treat as preferred rules.
76+
* When these rules are candidate alternatives, CodeCompletionCore reports them as rule
77+
* candidates instead of expanding into their child tokens. The autocomplete enrichment
78+
* uses these to trigger semantic suggestions (e.g. suggest fields, suggest tables).
79+
*/
80+
@NonNull private int[] rulesToVisit;
5881
}

ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,13 @@
88
import java.nio.charset.StandardCharsets;
99
import java.security.MessageDigest;
1010
import java.security.NoSuchAlgorithmException;
11+
import java.util.ArrayList;
1112
import java.util.Arrays;
13+
import java.util.HashSet;
14+
import java.util.LinkedHashMap;
15+
import java.util.List;
16+
import java.util.Map;
17+
import java.util.Set;
1218
import org.antlr.v4.runtime.CharStreams;
1319
import org.antlr.v4.runtime.CommonTokenStream;
1420
import org.antlr.v4.runtime.Vocabulary;
@@ -52,9 +58,136 @@ public GrammarBundle build() {
5258
.startRuleIndex(resolveStartRuleIndex(parser.getRuleNames()))
5359
.literalNames(literalNames)
5460
.symbolicNames(symbolicNames)
61+
.tokenDictionary(buildTokenDictionary(vocabulary))
62+
.ignoredTokens(buildIgnoredTokens())
63+
.rulesToVisit(buildRulesToVisit(parser.getRuleNames()))
5564
.build();
5665
}
5766

67+
/**
68+
* Build the token dictionary — semantic name → token type ID mapping. Uses lexer constants
69+
* since token type IDs are defined by the lexer. The frontend autocomplete enrichment uses
70+
* these to identify tokens like SPACE, PIPE, SOURCE by name.
71+
*/
72+
private static Map<String, Integer> buildTokenDictionary(Vocabulary vocabulary) {
73+
Map<String, Integer> dict = new LinkedHashMap<>();
74+
// SPACE token may not exist in this grammar (whitespace may be implicitly skipped).
75+
// Resolve by searching symbolic names; use -1 if not found.
76+
dict.put("WHITESPACE", OpenSearchPPLLexer.WHITESPACE);
77+
dict.put("FROM", OpenSearchPPLLexer.FROM);
78+
dict.put("OPENING_BRACKET", OpenSearchPPLLexer.LT_PRTHS);
79+
dict.put("CLOSING_BRACKET", OpenSearchPPLLexer.RT_PRTHS);
80+
dict.put("SEARCH", OpenSearchPPLLexer.SEARCH);
81+
dict.put("SOURCE", OpenSearchPPLLexer.SOURCE);
82+
dict.put("PIPE", OpenSearchPPLLexer.PIPE);
83+
dict.put("ID", OpenSearchPPLLexer.ID);
84+
dict.put("EQUAL", OpenSearchPPLLexer.EQUAL);
85+
dict.put("IN", OpenSearchPPLLexer.IN);
86+
dict.put("COMMA", OpenSearchPPLLexer.COMMA);
87+
dict.put("BACKTICK_QUOTE", OpenSearchPPLLexer.BQUOTA_STRING);
88+
dict.put("DOT", OpenSearchPPLLexer.DOT);
89+
return dict;
90+
}
91+
92+
/**
93+
* Build the list of token type IDs to ignore for autocomplete. Mirrors the frontend
94+
* getIgnoredTokens() logic: explicitly ignore AS/IN, then ignore two contiguous token ranges
95+
* minus operatorsToInclude.
96+
*
97+
* <p>Range 1 (relevance/internal tokens): MATCH .. ERROR_RECOGNITION — covers relevance
98+
* functions, search parameters, span literals, IDs, quoted strings, and error tokens.
99+
*
100+
* <p>Range 2 (keywords/functions/operators): CASE .. CAST — covers CASE/ELSE, IN, EXISTS,
101+
* NOT/OR/AND/XOR, TRUE/FALSE, REGEXP, datetime parts, data type keywords, punctuation,
102+
* aggregate functions, math/text/date functions, and CAST.
103+
*
104+
* <p>Tokens in {@code operatorsToInclude} are kept as suggestions even if they fall within
105+
* an ignored range.
106+
*/
107+
private static int[] buildIgnoredTokens() {
108+
// Verify range boundaries match expected token IDs. If the grammar changes and
109+
// shifts token ordinals, these assertions surface the problem at build time.
110+
assert OpenSearchPPLParser.MATCH == 427
111+
: "MATCH token ID shifted — update ignored range start";
112+
assert OpenSearchPPLParser.ERROR_RECOGNITION == 488
113+
: "ERROR_RECOGNITION token ID shifted — update ignored range end";
114+
assert OpenSearchPPLParser.CASE == 142
115+
: "CASE token ID shifted — update ignored range start";
116+
assert OpenSearchPPLParser.CAST == 387
117+
: "CAST token ID shifted — update ignored range end";
118+
119+
Set<Integer> operatorsToInclude = new HashSet<>(Arrays.asList(
120+
OpenSearchPPLParser.PIPE, OpenSearchPPLParser.EQUAL, OpenSearchPPLParser.COMMA,
121+
OpenSearchPPLParser.NOT_EQUAL, OpenSearchPPLParser.LESS, OpenSearchPPLParser.NOT_LESS,
122+
OpenSearchPPLParser.GREATER, OpenSearchPPLParser.NOT_GREATER,
123+
OpenSearchPPLParser.OR, OpenSearchPPLParser.AND,
124+
OpenSearchPPLParser.LT_PRTHS, OpenSearchPPLParser.RT_PRTHS,
125+
OpenSearchPPLParser.SPAN,
126+
OpenSearchPPLParser.MATCH, OpenSearchPPLParser.MATCH_PHRASE,
127+
OpenSearchPPLParser.MATCH_BOOL_PREFIX, OpenSearchPPLParser.MATCH_PHRASE_PREFIX,
128+
OpenSearchPPLParser.SQUOTA_STRING
129+
));
130+
131+
List<Integer> ignored = new ArrayList<>();
132+
ignored.add(OpenSearchPPLParser.AS);
133+
ignored.add(OpenSearchPPLParser.IN);
134+
135+
// Range 1: MATCH .. ERROR_RECOGNITION
136+
for (int i = OpenSearchPPLParser.MATCH; i <= OpenSearchPPLParser.ERROR_RECOGNITION; i++) {
137+
if (!operatorsToInclude.contains(i)) {
138+
ignored.add(i);
139+
}
140+
}
141+
142+
// Range 2: CASE .. CAST
143+
for (int i = OpenSearchPPLParser.CASE; i <= OpenSearchPPLParser.CAST; i++) {
144+
if (!operatorsToInclude.contains(i)) {
145+
ignored.add(i);
146+
}
147+
}
148+
149+
return ignored.stream().mapToInt(Integer::intValue).toArray();
150+
}
151+
152+
/**
153+
* Build the list of parser rule indices for CodeCompletionCore preferredRules.
154+
* These rules trigger semantic suggestions (suggest fields, tables, functions, etc.).
155+
*
156+
* @throws IllegalStateException if any expected rule name is not found in the parser grammar
157+
*/
158+
private static int[] buildRulesToVisit(String[] ruleNames) {
159+
List<String> ruleNamesToVisit = Arrays.asList(
160+
"statsFunctionName", "takeAggFunction", "integerLiteral", "decimalLiteral",
161+
"keywordsCanBeId", "renameClasue", "qualifiedName", "tableQualifiedName",
162+
"wcQualifiedName", "positionFunctionName", "searchableKeyWord", "stringLiteral",
163+
"searchCommand", "searchComparisonOperator", "comparisonOperator", "sqlLikeJoinType"
164+
);
165+
166+
List<String> ruleNamesList = Arrays.asList(ruleNames);
167+
int[] indices = new int[ruleNamesToVisit.size()];
168+
for (int i = 0; i < ruleNamesToVisit.size(); i++) {
169+
String name = ruleNamesToVisit.get(i);
170+
int idx = ruleNamesList.indexOf(name);
171+
if (idx < 0) {
172+
throw new IllegalStateException(
173+
"Parser rule '" + name + "' not found in grammar — "
174+
+ "was it renamed or removed from OpenSearchPPLParser.g4?");
175+
}
176+
indices[i] = idx;
177+
}
178+
return indices;
179+
}
180+
181+
/** Resolve a token type ID from the vocabulary by symbolic name. Returns -1 if not found. */
182+
private static int resolveTokenType(Vocabulary vocabulary, String name) {
183+
for (int i = 0; i <= vocabulary.getMaxTokenType(); i++) {
184+
if (name.equals(vocabulary.getSymbolicName(i))) {
185+
return i;
186+
}
187+
}
188+
return -1;
189+
}
190+
58191
private static int resolveStartRuleIndex(String[] ruleNames) {
59192
int idx = Arrays.asList(ruleNames).indexOf("root");
60193
return Math.max(idx, 0);

ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,10 @@
99
import static org.junit.Assert.assertNotNull;
1010
import static org.junit.Assert.assertTrue;
1111

12+
import java.util.Map;
1213
import org.junit.BeforeClass;
1314
import org.junit.Test;
15+
import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser;
1416

1517
public class PPLGrammarBundleBuilderTest {
1618

@@ -123,4 +125,38 @@ public void testBuildIsDeterministic() {
123125
bundle.getGrammarHash(),
124126
second.getGrammarHash());
125127
}
128+
129+
@Test
130+
public void testTokenDictionaryContainsExpectedEntries() {
131+
Map<String, Integer> dict = bundle.getTokenDictionary();
132+
assertNotNull(dict);
133+
assertEquals((Integer) OpenSearchPPLParser.PIPE, dict.get("PIPE"));
134+
assertEquals((Integer) OpenSearchPPLParser.SOURCE, dict.get("SOURCE"));
135+
assertEquals((Integer) OpenSearchPPLParser.FROM, dict.get("FROM"));
136+
assertEquals((Integer) OpenSearchPPLParser.EQUAL, dict.get("EQUAL"));
137+
assertEquals((Integer) OpenSearchPPLParser.ID, dict.get("ID"));
138+
}
139+
140+
@Test
141+
public void testIgnoredTokensAreNonEmpty() {
142+
assertNotNull(bundle.getIgnoredTokens());
143+
assertTrue("ignoredTokens should not be empty", bundle.getIgnoredTokens().length > 0);
144+
}
145+
146+
@Test
147+
public void testRulesToVisitAreNonEmpty() {
148+
assertNotNull(bundle.getRulesToVisit());
149+
assertTrue("rulesToVisit should not be empty", bundle.getRulesToVisit().length > 0);
150+
}
151+
152+
@Test
153+
public void testIgnoredRangeBoundariesMatchGrammar() {
154+
// These assertions mirror the runtime assertions in buildIgnoredTokens().
155+
// If the grammar changes token ordinals, both this test and the builder assertions
156+
// will flag the issue.
157+
assertEquals("MATCH token ID", 427, OpenSearchPPLParser.MATCH);
158+
assertEquals("ERROR_RECOGNITION token ID", 488, OpenSearchPPLParser.ERROR_RECOGNITION);
159+
assertEquals("CASE token ID", 142, OpenSearchPPLParser.CASE);
160+
assertEquals("CAST token ID", 387, OpenSearchPPLParser.CAST);
161+
}
126162
}

0 commit comments

Comments
 (0)