add more necessary fields

mengweieric · mengweieric · commit c803ab09fd58 · 2026-03-01T17:23:40.000-08:00
Signed-off-by: Eric Wei &lt;mengwei.eric@gmail.com&gt;
diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java b/plugin/src/main/java/org/opensearch/sql/plugin/rest/RestPPLGrammarAction.java
@@ -115,6 +115,11 @@ private void serializeBundle(XContentBuilder builder, GrammarBundle bundle) thro
     builder.field("literalNames", bundle.getLiteralNames());
     builder.field("symbolicNames", bundle.getSymbolicNames());
 
+    // Autocomplete configuration
+    builder.field("tokenDictionary", bundle.getTokenDictionary());
+    builder.field("ignoredTokens", bundle.getIgnoredTokens());
+    builder.field("rulesToVisit", bundle.getRulesToVisit());
+
     builder.endObject();
   }
 }
diff --git a/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java b/plugin/src/test/java/org/opensearch/sql/plugin/rest/RestPPLGrammarActionTest.java
@@ -93,6 +93,11 @@ public void testGetGrammar_ReturnsBundle() throws Exception {
     // Vocabulary (non-empty arrays)
     assertTrue(json.getJSONArray("literalNames").length() > 0);
     assertTrue(json.getJSONArray("symbolicNames").length() > 0);
+
+    // Autocomplete configuration
+    assertTrue(json.getJSONObject("tokenDictionary").length() > 0);
+    assertTrue(json.getJSONArray("ignoredTokens").length() > 0);
+    assertTrue(json.getJSONArray("rulesToVisit").length() > 0);
   }
 
   @Test
diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/GrammarBundle.java
@@ -5,6 +5,7 @@
 
 package org.opensearch.sql.ppl.autocomplete;
 
+import java.util.Map;
 import lombok.Builder;
 import lombok.NonNull;
 import lombok.Value;
@@ -55,4 +56,26 @@ public class GrammarBundle {
    * tokens with no symbolic name; clients must handle sparse arrays.
    */
   @NonNull private String[] symbolicNames;
+
+  /**
+   * Autocomplete token dictionary — maps semantic names used by the autocomplete enrichment logic
+   * (e.g. "SPACE", "PIPE", "SOURCE") to their token type IDs in this grammar. Clients use this
+   * to configure token-aware enrichment without hardcoding token IDs.
+   */
+  @NonNull private Map<String, Integer> tokenDictionary;
+
+  /**
+   * Token type IDs that should be ignored by CodeCompletionCore during candidate collection.
+   * These are tokens like functions, operators, and internal tokens that should not appear
+   * as direct keyword suggestions (e.g. AVG, COUNT, PIPE operators).
+   */
+  @NonNull private int[] ignoredTokens;
+
+  /**
+   * Parser rule indices that CodeCompletionCore should treat as preferred rules.
+   * When these rules are candidate alternatives, CodeCompletionCore reports them as rule
+   * candidates instead of expanding into their child tokens. The autocomplete enrichment
+   * uses these to trigger semantic suggestions (e.g. suggest fields, suggest tables).
+   */
+  @NonNull private int[] rulesToVisit;
 }
diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilder.java
@@ -8,7 +8,13 @@
 import java.nio.charset.StandardCharsets;
 import java.security.MessageDigest;
 import java.security.NoSuchAlgorithmException;
+import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
 import org.antlr.v4.runtime.CharStreams;
 import org.antlr.v4.runtime.CommonTokenStream;
 import org.antlr.v4.runtime.Vocabulary;
@@ -52,9 +58,136 @@ public GrammarBundle build() {
         .startRuleIndex(resolveStartRuleIndex(parser.getRuleNames()))
         .literalNames(literalNames)
         .symbolicNames(symbolicNames)
+        .tokenDictionary(buildTokenDictionary(vocabulary))
+        .ignoredTokens(buildIgnoredTokens())
+        .rulesToVisit(buildRulesToVisit(parser.getRuleNames()))
         .build();
   }
 
+  /**
+   * Build the token dictionary — semantic name → token type ID mapping. Uses lexer constants
+   * since token type IDs are defined by the lexer. The frontend autocomplete enrichment uses
+   * these to identify tokens like SPACE, PIPE, SOURCE by name.
+   */
+  private static Map<String, Integer> buildTokenDictionary(Vocabulary vocabulary) {
+    Map<String, Integer> dict = new LinkedHashMap<>();
+    // SPACE token may not exist in this grammar (whitespace may be implicitly skipped).
+    // Resolve by searching symbolic names; use -1 if not found.
+    dict.put("WHITESPACE", OpenSearchPPLLexer.WHITESPACE);
+    dict.put("FROM", OpenSearchPPLLexer.FROM);
+    dict.put("OPENING_BRACKET", OpenSearchPPLLexer.LT_PRTHS);
+    dict.put("CLOSING_BRACKET", OpenSearchPPLLexer.RT_PRTHS);
+    dict.put("SEARCH", OpenSearchPPLLexer.SEARCH);
+    dict.put("SOURCE", OpenSearchPPLLexer.SOURCE);
+    dict.put("PIPE", OpenSearchPPLLexer.PIPE);
+    dict.put("ID", OpenSearchPPLLexer.ID);
+    dict.put("EQUAL", OpenSearchPPLLexer.EQUAL);
+    dict.put("IN", OpenSearchPPLLexer.IN);
+    dict.put("COMMA", OpenSearchPPLLexer.COMMA);
+    dict.put("BACKTICK_QUOTE", OpenSearchPPLLexer.BQUOTA_STRING);
+    dict.put("DOT", OpenSearchPPLLexer.DOT);
+    return dict;
+  }
+
+  /**
+   * Build the list of token type IDs to ignore for autocomplete. Mirrors the frontend
+   * getIgnoredTokens() logic: explicitly ignore AS/IN, then ignore two contiguous token ranges
+   * minus operatorsToInclude.
+   *
+   * <p>Range 1 (relevance/internal tokens): MATCH .. ERROR_RECOGNITION — covers relevance
+   * functions, search parameters, span literals, IDs, quoted strings, and error tokens.
+   *
+   * <p>Range 2 (keywords/functions/operators): CASE .. CAST — covers CASE/ELSE, IN, EXISTS,
+   * NOT/OR/AND/XOR, TRUE/FALSE, REGEXP, datetime parts, data type keywords, punctuation,
+   * aggregate functions, math/text/date functions, and CAST.
+   *
+   * <p>Tokens in {@code operatorsToInclude} are kept as suggestions even if they fall within
+   * an ignored range.
+   */
+  private static int[] buildIgnoredTokens() {
+    // Verify range boundaries match expected token IDs. If the grammar changes and
+    // shifts token ordinals, these assertions surface the problem at build time.
+    assert OpenSearchPPLParser.MATCH == 427
+        : "MATCH token ID shifted — update ignored range start";
+    assert OpenSearchPPLParser.ERROR_RECOGNITION == 488
+        : "ERROR_RECOGNITION token ID shifted — update ignored range end";
+    assert OpenSearchPPLParser.CASE == 142
+        : "CASE token ID shifted — update ignored range start";
+    assert OpenSearchPPLParser.CAST == 387
+        : "CAST token ID shifted — update ignored range end";
+
+    Set<Integer> operatorsToInclude = new HashSet<>(Arrays.asList(
+        OpenSearchPPLParser.PIPE, OpenSearchPPLParser.EQUAL, OpenSearchPPLParser.COMMA,
+        OpenSearchPPLParser.NOT_EQUAL, OpenSearchPPLParser.LESS, OpenSearchPPLParser.NOT_LESS,
+        OpenSearchPPLParser.GREATER, OpenSearchPPLParser.NOT_GREATER,
+        OpenSearchPPLParser.OR, OpenSearchPPLParser.AND,
+        OpenSearchPPLParser.LT_PRTHS, OpenSearchPPLParser.RT_PRTHS,
+        OpenSearchPPLParser.SPAN,
+        OpenSearchPPLParser.MATCH, OpenSearchPPLParser.MATCH_PHRASE,
+        OpenSearchPPLParser.MATCH_BOOL_PREFIX, OpenSearchPPLParser.MATCH_PHRASE_PREFIX,
+        OpenSearchPPLParser.SQUOTA_STRING
+    ));
+
+    List<Integer> ignored = new ArrayList<>();
+    ignored.add(OpenSearchPPLParser.AS);
+    ignored.add(OpenSearchPPLParser.IN);
+
+    // Range 1: MATCH .. ERROR_RECOGNITION
+    for (int i = OpenSearchPPLParser.MATCH; i <= OpenSearchPPLParser.ERROR_RECOGNITION; i++) {
+      if (!operatorsToInclude.contains(i)) {
+        ignored.add(i);
+      }
+    }
+
+    // Range 2: CASE .. CAST
+    for (int i = OpenSearchPPLParser.CASE; i <= OpenSearchPPLParser.CAST; i++) {
+      if (!operatorsToInclude.contains(i)) {
+        ignored.add(i);
+      }
+    }
+
+    return ignored.stream().mapToInt(Integer::intValue).toArray();
+  }
+
+  /**
+   * Build the list of parser rule indices for CodeCompletionCore preferredRules.
+   * These rules trigger semantic suggestions (suggest fields, tables, functions, etc.).
+   *
+   * @throws IllegalStateException if any expected rule name is not found in the parser grammar
+   */
+  private static int[] buildRulesToVisit(String[] ruleNames) {
+    List<String> ruleNamesToVisit = Arrays.asList(
+        "statsFunctionName", "takeAggFunction", "integerLiteral", "decimalLiteral",
+        "keywordsCanBeId", "renameClasue", "qualifiedName", "tableQualifiedName",
+        "wcQualifiedName", "positionFunctionName", "searchableKeyWord", "stringLiteral",
+        "searchCommand", "searchComparisonOperator", "comparisonOperator", "sqlLikeJoinType"
+    );
+
+    List<String> ruleNamesList = Arrays.asList(ruleNames);
+    int[] indices = new int[ruleNamesToVisit.size()];
+    for (int i = 0; i < ruleNamesToVisit.size(); i++) {
+      String name = ruleNamesToVisit.get(i);
+      int idx = ruleNamesList.indexOf(name);
+      if (idx < 0) {
+        throw new IllegalStateException(
+            "Parser rule '" + name + "' not found in grammar — "
+                + "was it renamed or removed from OpenSearchPPLParser.g4?");
+      }
+      indices[i] = idx;
+    }
+    return indices;
+  }
+
+  /** Resolve a token type ID from the vocabulary by symbolic name. Returns -1 if not found. */
+  private static int resolveTokenType(Vocabulary vocabulary, String name) {
+    for (int i = 0; i <= vocabulary.getMaxTokenType(); i++) {
+      if (name.equals(vocabulary.getSymbolicName(i))) {
+        return i;
+      }
+    }
+    return -1;
+  }
+
   private static int resolveStartRuleIndex(String[] ruleNames) {
     int idx = Arrays.asList(ruleNames).indexOf("root");
     return Math.max(idx, 0);
diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/autocomplete/PPLGrammarBundleBuilderTest.java
@@ -9,8 +9,10 @@
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
 
+import java.util.Map;
 import org.junit.BeforeClass;
 import org.junit.Test;
+import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser;
 
 public class PPLGrammarBundleBuilderTest {
 
@@ -123,4 +125,38 @@ public void testBuildIsDeterministic() {
         bundle.getGrammarHash(),
         second.getGrammarHash());
   }
+
+  @Test
+  public void testTokenDictionaryContainsExpectedEntries() {
+    Map<String, Integer> dict = bundle.getTokenDictionary();
+    assertNotNull(dict);
+    assertEquals((Integer) OpenSearchPPLParser.PIPE, dict.get("PIPE"));
+    assertEquals((Integer) OpenSearchPPLParser.SOURCE, dict.get("SOURCE"));
+    assertEquals((Integer) OpenSearchPPLParser.FROM, dict.get("FROM"));
+    assertEquals((Integer) OpenSearchPPLParser.EQUAL, dict.get("EQUAL"));
+    assertEquals((Integer) OpenSearchPPLParser.ID, dict.get("ID"));
+  }
+
+  @Test
+  public void testIgnoredTokensAreNonEmpty() {
+    assertNotNull(bundle.getIgnoredTokens());
+    assertTrue("ignoredTokens should not be empty", bundle.getIgnoredTokens().length > 0);
+  }
+
+  @Test
+  public void testRulesToVisitAreNonEmpty() {
+    assertNotNull(bundle.getRulesToVisit());
+    assertTrue("rulesToVisit should not be empty", bundle.getRulesToVisit().length > 0);
+  }
+
+  @Test
+  public void testIgnoredRangeBoundariesMatchGrammar() {
+    // These assertions mirror the runtime assertions in buildIgnoredTokens().
+    // If the grammar changes token ordinals, both this test and the builder assertions
+    // will flag the issue.
+    assertEquals("MATCH token ID", 427, OpenSearchPPLParser.MATCH);
+    assertEquals("ERROR_RECOGNITION token ID", 488, OpenSearchPPLParser.ERROR_RECOGNITION);
+    assertEquals("CASE token ID", 142, OpenSearchPPLParser.CASE);
+    assertEquals("CAST token ID", 387, OpenSearchPPLParser.CAST);
+  }
 }

Original file line number	Diff line number	Diff line change
`@@ -115,6 +115,11 @@ private void serializeBundle(XContentBuilder builder, GrammarBundle bundle) thro`
`115`	`115`	`builder.field("literalNames", bundle.getLiteralNames());`
`116`	`116`	`builder.field("symbolicNames", bundle.getSymbolicNames());`
`117`	`117`
	`118`	`+ // Autocomplete configuration`
	`119`	`+ builder.field("tokenDictionary", bundle.getTokenDictionary());`
	`120`	`+ builder.field("ignoredTokens", bundle.getIgnoredTokens());`
	`121`	`+ builder.field("rulesToVisit", bundle.getRulesToVisit());`
	`122`	`+`
`118`	`123`	`builder.endObject();`
`119`	`124`	`}`
`120`	`125`	`}`