@@ -27,6 +27,22 @@ public class PPLGrammarBundleBuilder {
2727 private static final String ANTLR_VERSION =
2828 org .antlr .v4 .runtime .RuntimeMetaData .getRuntimeVersion ();
2929 private static final String BUNDLE_VERSION = "1.0" ;
30+ private static final Set <String > INTERNAL_NON_LITERAL_TOKENS =
31+ new HashSet <>(
32+ Arrays .asList (
33+ "ID" ,
34+ "NUMERIC_ID" ,
35+ "ID_DATE_SUFFIX" ,
36+ "CLUSTER" ,
37+ "TIME_SNAP" ,
38+ "SPANLENGTH" ,
39+ "DECIMAL_SPANLENGTH" ,
40+ "DQUOTA_STRING" ,
41+ "SQUOTA_STRING" ,
42+ "BQUOTA_STRING" ,
43+ "LINE_COMMENT" ,
44+ "BLOCK_COMMENT" ,
45+ "ERROR_RECOGNITION" ));
3046
3147 public GrammarBundle build () {
3248 OpenSearchPPLLexer lexer = new OpenSearchPPLLexer (CharStreams .fromString ("" ));
@@ -59,7 +75,7 @@ public GrammarBundle build() {
5975 .literalNames (literalNames )
6076 .symbolicNames (symbolicNames )
6177 .tokenDictionary (buildTokenDictionary (vocabulary ))
62- .ignoredTokens (buildIgnoredTokens ())
78+ .ignoredTokens (buildIgnoredTokens (vocabulary ))
6379 .rulesToVisit (buildRulesToVisit (parser .getRuleNames ()))
6480 .build ();
6581 }
@@ -90,65 +106,32 @@ private static Map<String, Integer> buildTokenDictionary(Vocabulary vocabulary)
90106 }
91107
92108 /**
93- * Build the list of token type IDs to ignore for autocomplete. Mirrors the frontend
94- * getIgnoredTokens() logic: explicitly ignore AS/IN, then ignore two contiguous token ranges
95- * minus operatorsToInclude.
109+ * Build token type IDs to ignore for autocomplete.
96110 *
97- * <p>Range 1 (relevance/internal tokens): MATCH .. ERROR_RECOGNITION — covers relevance
98- * functions, search parameters, span literals, IDs, quoted strings, and error tokens.
99- *
100- * <p>Range 2 (keywords/functions/operators): CASE .. CAST — covers CASE/ELSE, IN, EXISTS,
101- * NOT/OR/AND/XOR, TRUE/FALSE, REGEXP, datetime parts, data type keywords, punctuation,
102- * aggregate functions, math/text/date functions, and CAST.
103- *
104- * <p>Tokens in {@code operatorsToInclude} are kept as suggestions even if they fall within
105- * an ignored range.
111+ * <p>Only lexical/internal tokens are ignored (identifiers, literals, quoted-string tokens,
112+ * comments, and error token). User-facing commands/functions/operators are intentionally kept so
113+ * completion dynamically reflects grammar changes.
106114 */
107- private static int [] buildIgnoredTokens () {
108- // Verify range boundaries match expected token IDs. If the grammar changes and
109- // shifts token ordinals, these assertions surface the problem at build time.
110- assert OpenSearchPPLParser .MATCH == 427
111- : "MATCH token ID shifted — update ignored range start" ;
112- assert OpenSearchPPLParser .ERROR_RECOGNITION == 488
113- : "ERROR_RECOGNITION token ID shifted — update ignored range end" ;
114- assert OpenSearchPPLParser .CASE == 142
115- : "CASE token ID shifted — update ignored range start" ;
116- assert OpenSearchPPLParser .CAST == 387
117- : "CAST token ID shifted — update ignored range end" ;
118-
119- Set <Integer > operatorsToInclude = new HashSet <>(Arrays .asList (
120- OpenSearchPPLParser .PIPE , OpenSearchPPLParser .EQUAL , OpenSearchPPLParser .COMMA ,
121- OpenSearchPPLParser .NOT_EQUAL , OpenSearchPPLParser .LESS , OpenSearchPPLParser .NOT_LESS ,
122- OpenSearchPPLParser .GREATER , OpenSearchPPLParser .NOT_GREATER ,
123- OpenSearchPPLParser .OR , OpenSearchPPLParser .AND ,
124- OpenSearchPPLParser .LT_PRTHS , OpenSearchPPLParser .RT_PRTHS ,
125- OpenSearchPPLParser .SPAN ,
126- OpenSearchPPLParser .MATCH , OpenSearchPPLParser .MATCH_PHRASE ,
127- OpenSearchPPLParser .MATCH_BOOL_PREFIX , OpenSearchPPLParser .MATCH_PHRASE_PREFIX ,
128- OpenSearchPPLParser .SQUOTA_STRING
129- ));
130-
115+ private static int [] buildIgnoredTokens (Vocabulary vocabulary ) {
131116 List <Integer > ignored = new ArrayList <>();
132- ignored .add (OpenSearchPPLParser .AS );
133- ignored .add (OpenSearchPPLParser .IN );
134117
135- // Range 1: MATCH .. ERROR_RECOGNITION
136- for (int i = OpenSearchPPLParser .MATCH ; i <= OpenSearchPPLParser .ERROR_RECOGNITION ; i ++) {
137- if (!operatorsToInclude .contains (i )) {
138- ignored .add (i );
139- }
140- }
141-
142- // Range 2: CASE .. CAST
143- for (int i = OpenSearchPPLParser .CASE ; i <= OpenSearchPPLParser .CAST ; i ++) {
144- if (!operatorsToInclude .contains (i )) {
145- ignored .add (i );
118+ for (int tokenType = 0 ; tokenType <= vocabulary .getMaxTokenType (); tokenType ++) {
119+ String symbolicName = vocabulary .getSymbolicName (tokenType );
120+ if (isLexicalInternalToken (symbolicName )) {
121+ ignored .add (tokenType );
146122 }
147123 }
148124
149125 return ignored .stream ().mapToInt (Integer ::intValue ).toArray ();
150126 }
151127
128+ private static boolean isLexicalInternalToken (String symbolicName ) {
129+ if (symbolicName == null ) {
130+ return false ;
131+ }
132+ return symbolicName .endsWith ("_LITERAL" ) || INTERNAL_NON_LITERAL_TOKENS .contains (symbolicName );
133+ }
134+
152135 /**
153136 * Build the list of parser rule indices for CodeCompletionCore preferredRules.
154137 * These rules trigger semantic suggestions (suggest fields, tables, functions, etc.).
0 commit comments