mengweieric
diff --git a/‎async-query-core/build.gradle‎
Lines changed: 1 addition & 1 deletion b/‎async-query-core/build.gradle‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎common/build.gradle‎
Lines changed: 1 addition & 1 deletion b/‎common/build.gradle‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/src/main/java/org/opensearch/sql/executor/autocomplete/AutocompleteArtifact.java‎
Lines changed: 121 additions & 0 deletions b/‎core/src/main/java/org/opensearch/sql/executor/autocomplete/AutocompleteArtifact.java‎
Lines changed: 121 additions & 0 deletions
diff --git a/‎core/src/main/java/org/opensearch/sql/executor/autocomplete/GrammarArtifactBuilder.java‎
Lines changed: 202 additions & 0 deletions b/‎core/src/main/java/org/opensearch/sql/executor/autocomplete/GrammarArtifactBuilder.java‎
Lines changed: 202 additions & 0 deletions
diff --git a/‎legacy/build.gradle‎
Lines changed: 2 additions & 2 deletions b/‎legacy/build.gradle‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java‎
Lines changed: 2 additions & 0 deletions b/‎plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java‎
Lines changed: 2 additions & 0 deletions
@@ -43,7 +43,7 @@ configurations {
 }
 
 dependencies {
-    antlr "org.antlr:antlr4:4.7.1"
+    antlr "org.antlr:antlr4:4.13.2"
 
     implementation project(':core')
     implementation 'org.json:json:20231013'
 
@@ -33,7 +33,7 @@ repositories {
 }
 
 dependencies {
-    api "org.antlr:antlr4-runtime:4.7.1"
+    api "org.antlr:antlr4-runtime:4.13.2"
     api group: 'com.google.guava', name: 'guava', version: "${guava_version}"
     api group: 'org.apache.logging.log4j', name: 'log4j-core', version:"${versions.log4j}"
     api group: 'org.apache.commons', name: 'commons-lang3', version: "${commons_lang3_version}"
 
@@ -0,0 +1,121 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.executor.autocomplete;
+
+import java.util.Map;
+import lombok.Builder;
+import lombok.Data;
+
+/**
+ * Autocomplete artifact bundle containing everything needed for client-side grammar-based
+ * autocomplete.
+ *
+ * <p>This bundle is language-agnostic and can be used for PPL, SQL, or any ANTLR-based language. It
+ * contains:
+ *
+ * <ul>
+ *   <li>Serialized ATN data for lexer and parser (for antlr4ng runtime)
+ *   <li>Vocabulary and rule names (for token/rule interpretation)
+ *   <li>Static catalogs (commands, functions, keywords, snippets)
+ *   <li>Token classification mapping (for suggestion categorization)
+ * </ul>
+ *
+ * <p>Frontend uses this bundle to:
+ *
+ * <ol>
+ *   <li>Deserialize ATNs with antlr4ng
+ *   <li>Create LexerInterpreter and ParserInterpreter
+ *   <li>Use antlr4-c3 to find valid tokens at cursor
+ *   <li>Generate suggestions from catalogs
+ * </ol>
+ */
+@Data
+@Builder
+public class AutocompleteArtifact {
+
+  // ============================================================================
+  // Identity & versioning
+  // ============================================================================
+
+  /** Bundle version (increment when format changes) */
+  private String bundleVersion;
+
+  /**
+   * Hash of grammar sources + ANTLR version. Used for cache validation via ETag. Format:
+   * "sha256:abc123..."
+   */
+  private String grammarHash;
+
+  // ============================================================================
+  // Lexer ATN & metadata
+  // ============================================================================
+
+  /**
+   * Serialized lexer ATN as int array. Frontend uses directly: new
+   * ATNDeserializer().deserialize(lexerSerializedATN)
+   */
+  private int[] lexerSerializedATN;
+
+  /** Lexer rule names (e.g., ["SEARCH", "WHERE", "PIPE", ...]) */
+  private String[] lexerRuleNames;
+
+  /** Channel names (e.g., ["DEFAULT_TOKEN_CHANNEL", "WHITESPACE", "ERRORCHANNEL"]) */
+  private String[] channelNames;
+
+  /** Mode names (e.g., ["DEFAULT_MODE"]) */
+  private String[] modeNames;
+
+  // ============================================================================
+  // Parser ATN & metadata
+  // ============================================================================
+
+  /**
+   * Serialized parser ATN as int array. Frontend uses directly: new
+   * ATNDeserializer().deserialize(parserSerializedATN)
+   */
+  private int[] parserSerializedATN;
+
+  /** Parser rule names (e.g., ["root", "pplStatement", "commands", ...]) */
+  private String[] parserRuleNames;
+
+  /** Start rule index (usually 0 for "root" rule) */
+  private int startRuleIndex;
+
+  // ============================================================================
+  // Vocabulary
+  // ============================================================================
+
+  /**
+   * Literal names from vocabulary. Index = token type. Values are literal tokens with quotes, or
+   * null. Example: ["<INVALID>", "'search'", "'where'", "'|'", null, null, ...]
+   */
+  private String[] literalNames;
+
+  /**
+   * Symbolic names from vocabulary. Index = token type. Values are token symbolic names, or null.
+   * Example: ["<INVALID>", "SEARCH", "WHERE", "PIPE", "ID", "INTEGER", ...]
+   */
+  private String[] symbolicNames;
+
+  /**
+   * Optional display names (user-friendly token names). If not provided, frontend uses literal or
+   * symbolic names.
+   */
+  private String[] displayNames;
+
+  // ============================================================================
+  // Token classification
+  // ============================================================================
+
+  /**
+   * Mapping from token symbolic name to suggestion category. Used by frontend to classify antlr4-c3
+   * token candidates into suggestion types.
+   *
+   * <p>Example: { "SEARCH": "COMMAND", "WHERE": "COMMAND", "BY": "KEYWORD", "COUNT": "FUNCTION",
+   * "AND": "OPERATOR" }
+   */
+  private Map<String, String> tokenTypeToCategory;
+}
@@ -0,0 +1,202 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.executor.autocomplete;
+
+import java.nio.charset.StandardCharsets;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import lombok.extern.log4j.Log4j2;
+import org.antlr.v4.runtime.Lexer;
+import org.antlr.v4.runtime.Parser;
+import org.antlr.v4.runtime.Vocabulary;
+
+/**
+ * Utility class for extracting ANTLR grammar artifacts (ATN, vocabulary, rule names) from generated
+ * parser/lexer classes.
+ *
+ * <p>This class handles the low-level details of:
+ *
+ * <ul>
+ *   <li>Converting ANTLR's Java String ATN format to int[] for JSON transfer
+ *   <li>Extracting vocabulary (literal and symbolic names)
+ *   <li>Extracting rule names via public ANTLR APIs
+ *   <li>Computing grammar hash for versioning
+ * </ul>
+ *
+ * <p>Language-specific builders (PPL, SQL) use this class to build their autocomplete bundles.
+ */
+@Log4j2
+public class GrammarArtifactBuilder {
+
+  /**
+   * Extract literal names from vocabulary.
+   *
+   * <p>Returns array where index = token type, value = literal token (with quotes) or null.
+   *
+   * <p>Example: ["<INVALID>", "'search'", "'where'", "'|'", null, ...]
+   *
+   * @param vocabulary Parser vocabulary
+   * @return Array of literal names
+   */
+  public static String[] extractLiteralNames(Vocabulary vocabulary) {
+    int maxTokenType = vocabulary.getMaxTokenType();
+    String[] names = new String[maxTokenType + 1];
+
+    for (int i = 0; i <= maxTokenType; i++) {
+      String literal = vocabulary.getLiteralName(i);
+      // Keep nulls as nulls (no literal representation)
+      names[i] = literal;
+    }
+
+    log.debug("Extracted {} literal names", names.length);
+    return names;
+  }
+
+  /**
+   * Extract symbolic names from vocabulary.
+   *
+   * <p>Returns array where index = token type, value = symbolic token name or null.
+   *
+   * <p>Example: ["<INVALID>", "SEARCH", "WHERE", "PIPE", "ID", ...]
+   *
+   * @param vocabulary Parser vocabulary
+   * @return Array of symbolic names
+   */
+  public static String[] extractSymbolicNames(Vocabulary vocabulary) {
+    int maxTokenType = vocabulary.getMaxTokenType();
+    String[] names = new String[maxTokenType + 1];
+
+    for (int i = 0; i <= maxTokenType; i++) {
+      String symbolic = vocabulary.getSymbolicName(i);
+      // Keep nulls as nulls (no symbolic name)
+      names[i] = symbolic;
+    }
+
+    log.debug("Extracted {} symbolic names", names.length);
+    return names;
+  }
+
+  /**
+   * Extract rule names from parser.
+   *
+   * <p>Parser.getRuleNames() is public API.
+   *
+   * @param parser Parser instance
+   * @return Array of rule names
+   */
+  public static String[] extractParserRuleNames(Parser parser) {
+    String[] ruleNames = parser.getRuleNames();
+    log.debug("Extracted {} parser rule names", ruleNames.length);
+    return ruleNames;
+  }
+
+  /**
+   * Extract rule names from lexer.
+   *
+   * <p>Lexer.getRuleNames() is public API (Lexer extends Recognizer).
+   *
+   * @param lexer Lexer instance
+   * @return Array of lexer rule names
+   */
+  public static String[] extractLexerRuleNames(Lexer lexer) {
+    String[] ruleNames = lexer.getRuleNames();
+    log.debug("Extracted {} lexer rule names", ruleNames.length);
+    return ruleNames;
+  }
+
+  /**
+   * Extract channel names from lexer.
+   *
+   * <p>ANTLR 4.x exposes channel names via getChannelNames() method in generated lexers.
+   * This method dynamically extracts the actual channel names from the lexer instance.
+   *
+   * @param lexer Lexer instance
+   * @return Array of channel names
+   */
+  public static String[] extractChannelNames(Lexer lexer) {
+    String[] channelNames = lexer.getChannelNames();
+    log.debug("Extracted {} channel names from lexer", channelNames.length);
+    return channelNames;
+  }
+
+  /**
+   * Extract mode names from lexer.
+   *
+   * <p>ANTLR 4.x exposes mode names via getModeNames() method in generated lexers.
+   * This method dynamically extracts the actual mode names from the lexer instance.
+   *
+   * @param lexer Lexer instance
+   * @return Array of mode names
+   */
+  public static String[] extractModeNames(Lexer lexer) {
+    String[] modeNames = lexer.getModeNames();
+    log.debug("Extracted {} mode names from lexer", modeNames.length);
+    return modeNames;
+  }
+
+  /**
+   * Compute grammar hash from ATN data (recommended).
+   *
+   * <p>This method hashes the serialized ATN arrays directly, which:
+   *
+   * <ul>
+   *   <li>Always available at runtime (no classpath dependencies)
+   *   <li>Reflects the actual artifact being served
+   *   <li>Changes when grammar changes (ATN structure changes)
+   * </ul>
+   *
+   * @param lexerATN Serialized lexer ATN as int array
+   * @param parserATN Serialized parser ATN as int array
+   * @param antlrVersion ANTLR tool version (e.g., "4.13.2")
+   * @return Hash string in format "sha256:abc123..."
+   */
+  public static String computeGrammarHash(int[] lexerATN, int[] parserATN, String antlrVersion) {
+    try {
+      MessageDigest digest = MessageDigest.getInstance("SHA-256");
+
+      // Hash lexer ATN data
+      for (int value : lexerATN) {
+        digest.update((byte) (value >> 8));
+        digest.update((byte) value);
+      }
+
+      // Hash parser ATN data
+      for (int value : parserATN) {
+        digest.update((byte) (value >> 8));
+        digest.update((byte) value);
+      }
+
+      // Hash ANTLR version to detect generator changes
+      digest.update(antlrVersion.getBytes(StandardCharsets.UTF_8));
+
+      // Compute hash
+      byte[] hashBytes = digest.digest();
+      String result = "sha256:" + bytesToHex(hashBytes);
+
+      log.info("Computed grammar hash from ATN data: {}", result);
+      return result;
+
+    } catch (NoSuchAlgorithmException e) {
+      // SHA-256 is required by Java specification, this should never happen
+      throw new IllegalStateException("SHA-256 algorithm not available", e);
+    }
+  }
+
+
+  /**
+   * Convert byte array to hex string.
+   *
+   * @param bytes Input bytes
+   * @return Hex string (lowercase)
+   */
+  private static String bytesToHex(byte[] bytes) {
+    StringBuilder sb = new StringBuilder(bytes.length * 2);
+    for (byte b : bytes) {
+      sb.append(String.format("%02x", b & 0xFF));
+    }
+    return sb.toString();
+  }
+}
@@ -120,8 +120,8 @@ dependencies {
     api project(':opensearch')
 
     // ANTLR gradle plugin and runtime dependency
-    antlr "org.antlr:antlr4:4.7.1"
-    implementation "org.antlr:antlr4-runtime:4.7.1"
+    antlr "org.antlr:antlr4:4.13.2"
+    implementation "org.antlr:antlr4-runtime:4.13.2"
     compileOnly group: 'javax.servlet', name: 'servlet-api', version:'2.5'
 
     testImplementation group: 'org.hamcrest', name: 'hamcrest-core', version:'2.2'
 
@@ -94,6 +94,7 @@
 import org.opensearch.sql.opensearch.storage.OpenSearchDataSourceFactory;
 import org.opensearch.sql.opensearch.storage.script.CompoundedScriptEngine;
 import org.opensearch.sql.plugin.config.OpenSearchPluginModule;
+import org.opensearch.sql.plugin.rest.RestPPLGrammarAction;
 import org.opensearch.sql.plugin.rest.RestPPLQueryAction;
 import org.opensearch.sql.plugin.rest.RestPPLStatsAction;
 import org.opensearch.sql.plugin.rest.RestQuerySettingsAction;
@@ -163,6 +164,7 @@ public List<RestHandler> getRestHandlers(
 
     return Arrays.asList(
         new RestPPLQueryAction(),
+        new RestPPLGrammarAction(),
         new RestSqlAction(settings, injector),
         new RestSqlStatsAction(settings, restController),
         new RestPPLStatsAction(settings, restController),
Original file line number	Diff line number	Diff line change
`@@ -43,7 +43,7 @@ configurations {`
`43`	`43`	`}`
`44`	`44`
`45`	`45`	`dependencies {`
`46`		`- antlr "org.antlr:antlr4:4.7.1"`
	`46`	`+ antlr "org.antlr:antlr4:4.13.2"`
`47`	`47`
`48`	`48`	`implementation project(':core')`
`49`	`49`	`implementation 'org.json:json:20231013'`
Original file line number	Diff line number	Diff line change
`@@ -33,7 +33,7 @@ repositories {`
`33`	`33`	`}`
`34`	`34`
`35`	`35`	`dependencies {`
`36`		`- api "org.antlr:antlr4-runtime:4.7.1"`
	`36`	`+ api "org.antlr:antlr4-runtime:4.13.2"`
`37`	`37`	`api group: 'com.google.guava', name: 'guava', version: "${guava_version}"`
`38`	`38`	`api group: 'org.apache.logging.log4j', name: 'log4j-core', version:"${versions.log4j}"`
`39`	`39`	`api group: 'org.apache.commons', name: 'commons-lang3', version: "${commons_lang3_version}"`