theodevelop
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.vscodeignore‎
Lines changed: 2 additions & 0 deletions b/‎.vscodeignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎CHANGELOG.md‎
Lines changed: 34 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎images/icon.png‎
1.36 MB b/‎images/icon.png‎
1.36 MB
diff --git a/‎images/icon.svg‎
Lines changed: 45432 additions & 16 deletions b/‎images/icon.svg‎
Lines changed: 45432 additions & 16 deletions
diff --git a/‎package.json‎
Lines changed: 1 addition & 1 deletion b/‎package.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎server/src/parser/bisonParser.ts‎
Lines changed: 86 additions & 25 deletions b/‎server/src/parser/bisonParser.ts‎
Lines changed: 86 additions & 25 deletions
diff --git a/‎server/src/parser/flexParser.ts‎
Lines changed: 18 additions & 4 deletions b/‎server/src/parser/flexParser.ts‎
Lines changed: 18 additions & 4 deletions
@@ -3,3 +3,4 @@ dist/
 client/out/
 server/out/
 *.vsix
+.env
@@ -22,3 +22,5 @@ tsconfig.base.json
 package-lock.json
 *.vsix
 .gitattributes
+.env
+.env.*
@@ -2,6 +2,40 @@
 
 All notable changes to the **Bison/Flex Language Support** extension will be documented in this file.
 
+## [1.1.1] - 2026-03-19
+
+### Fixed
+
+- **Bison — token aliases**: Tokens declared with a string alias (e.g. `%token LBRACE "{"`)
+  are no longer falsely reported as unused when the alias form is used in rules
+- **Bison — `$N` out-of-bounds**: String literal tokens (e.g. `"-"` in `"-" exp`) are now
+  counted as positional symbols, eliminating false `$2 is out of bounds` errors
+- **Bison — shift/reduce false positive**: The S/R heuristic now suppresses warnings when
+  all alternatives sharing a first token have distinct second tokens (e.g. `ID "("`,
+  `ID "{"`, `ID "["` in expression rules)
+- **Bison — `UMINUS` / precedence tokens**: Tokens declared only via `%left`/`%right`/
+  `%nonassoc` are no longer reported as undeclared
+- **Bison — EOF token**: The end-of-input token (value 0) is no longer reported as unused
+- **Bison — `%token` after `%%`**: Token declarations appearing in the rules section
+  (valid Bison syntax) are now correctly registered
+- **Flex — `/* comment */` in rules section**: Single-line block comments in the rules
+  section were incorrectly parsed as Flex rules, producing false duplicate-pattern warnings
+- **Flex — `rawPattern` with spaces in character classes**: Patterns like `\\[ \t\n]+\\`
+  were truncated at the space inside `[...]`, producing false "invalid regex" errors
+- **Flex — RE-flex directives**: `%namespace`, `%lexer`, `%lex`, `%unicode`, and other
+  RE-flex-specific directives no longer trigger "unknown directive" errors
+- **Flex — RE-flex `noyywrap`**: RE-flex files no longer trigger the missing `noyywrap` warning
+- **Flex — `<SC><<EOF>>`**: EOF rules after a catch-all pattern are no longer flagged
+  as inaccessible
+- **Security**: `.env` file excluded from packaged VSIX (was inadvertently included)
+
+### Added
+
+- Hover and completion documentation for RE-flex built-in methods:
+  `size()`, `lineno()`, `columno()`, `in()`, `out()`
+
+---
+
 ## [1.1.0] - 2026-03-18
 
 ### Added
 
@@ -2,7 +2,7 @@
   "name": "bison-flex-lang",
   "displayName": "Bison/Flex Language Support",
   "description": "Full-featured language support for GNU Bison (.y, .yy) and Flex/RE-flex (.l, .ll) — syntax highlighting with embedded C/C++, real-time diagnostics, intelligent autocompletion, and hover documentation for all directives.",
-  "version": "1.1.0",
+  "version": "1.1.1",
   "publisher": "theodevelop",
   "license": "MIT",
   "repository": {
 
@@ -227,6 +227,14 @@ export function parseBisonDocument(text: string): BisonDocument {
     // Skip empty lines and comments
     if (!trimmed || trimmed.startsWith('//') || trimmed.startsWith('/*')) continue;
 
+    // %token directive inside the rules section (Bison allows declaring tokens after %%).
+    // Must be handled BEFORE rule-body processing to avoid contaminating rule symbols.
+    if (trimmed.startsWith('%token') && braceDepth === 0) {
+      const tm = trimmed.match(/^%token(?:\s+<([^>]+)>)?\s+(.+)/);
+      if (tm) parseTokenNames(tm[2], tm[1], i, doc);
+      continue;
+    }
+
     // Inside a multi-line action block: scan for $n refs and track brace depth.
     if (braceDepth > 0) {
       if (currentRule) {
@@ -325,34 +333,52 @@ export function parseBisonDocument(text: string): BisonDocument {
       if (ch === '{') braceDepth++;
       if (ch === '}') braceDepth = Math.max(0, braceDepth - 1);
     }
-
-    // %token directive in rules section (e.g., %token CHUNKS "_chunks")
-    const inlineTokenMatch = trimmed.match(/^%token\s+([A-Z_][A-Z0-9_]*)\s*(".*")?/);
-    if (inlineTokenMatch) {
-      const name = inlineTokenMatch[1];
-      const alias = inlineTokenMatch[2]?.replace(/"/g, '');
-      const col = line.indexOf(name);
-      doc.tokens.set(name, {
-        name,
-        alias,
-        location: Range.create(i, col, i, col + name.length),
-      });
-    }
   }
 
   return doc;
 }
 
+/**
+ * Encode a Bison string literal (the quoted content) into a unique, safe
+ * identifier-like placeholder.  The hex encoding ensures that "+" and "{"
+ * produce DIFFERENT placeholders — critical for second-token disambiguation
+ * in the shift/reduce heuristic.
+ *
+ * e.g.  "+"  →  __s2b__
+ *        "("  →  __s28__
+ *        "{"  →  __s7b__
+ *        "function"  →  __s66756e6374696f6e__
+ *
+ * All placeholders start with "__s" (lowercase) so they are valid identifiers
+ * but FAIL the all-caps token check -- never mistaken for grammar terminals.
+ */
+function strLiteralPlaceholder(content: string): string {
+  const hex = Array.from(content)
+    .map(c => c.charCodeAt(0).toString(16).padStart(2, '0'))
+    .join('');
+  return `__s${hex}__`;
+}
+
+/** Replace every `"..."` in `text` with its unique strLiteralPlaceholder. */
+function replaceStringLiterals(text: string): string {
+  return text.replace(/"((?:[^"\\]|\\.)*)"/g, (_, content) => ` ${strLiteralPlaceholder(content)} `);
+}
+
 /**
  * Extract all grammar symbols (identifiers) from a production RHS in order.
+ *
+ * String literals ("+" , "{", "function", …) ARE counted as symbols because
+ * Bison treats them exactly like tokens in the $N position numbering.
+ * They are replaced with unique hex-encoded placeholders so that the
+ * second-symbol disambiguation in the shift/reduce heuristic can tell
+ * `"("` apart from `"{"` (both have different placeholders).
  */
 function extractSymbols(text: string): string[] {
-  const cleaned = text
-    .replace(/"(?:[^"\\]|\\.)*"/g, ' ')    // remove strings
-    .replace(/\{[^}]*\}/g, ' ')            // remove inline actions
-    .replace(/%prec\s+\S+/g, ' ')          // remove %prec TOKEN
-    .replace(/%empty/g, ' ')               // remove %empty
-    .replace(/\/\/.*$/g, ' ')              // remove line comments
+  const cleaned = replaceStringLiterals(text)
+    .replace(/\{[^}]*\}/g, ' ')                   // remove inline actions
+    .replace(/%prec\s+\S+/g, ' ')                 // remove %prec TOKEN
+    .replace(/%empty/g, ' ')                      // remove %empty
+    .replace(/\/\/.*$/g, ' ')                     // remove line comments
     .trim();
   const symbols: string[] = [];
   const regex = /\b([a-zA-Z_][a-zA-Z0-9_.]*)\b/g;
@@ -366,14 +392,17 @@ function extractSymbols(text: string): string[] {
 /**
  * Extract the first terminal or non-terminal symbol from a production RHS.
  * Returns undefined for empty productions (%empty) or pure action blocks.
+ *
+ * String literals are replaced with unique hex-encoded placeholders so that
+ * an alternative starting with "function" has a firstSymbol starting with
+ * `__s` (not all-caps) and is therefore not confused with a real terminal.
  */
 function getFirstSymbol(text: string): string | undefined {
-  const cleaned = text
-    .replace(/"(?:[^"\\]|\\.)*"/g, ' ')    // remove strings
-    .replace(/\{[^}]*\}/g, ' ')            // remove inline actions
-    .replace(/%prec\s+\S+/g, ' ')          // remove %prec TOKEN
-    .replace(/%empty/g, ' ')               // remove %empty
-    .replace(/\/\/.*$/g, ' ')             // remove line comments
+  const cleaned = replaceStringLiterals(text)
+    .replace(/\{[^}]*\}/g, ' ')                   // remove inline actions
+    .replace(/%prec\s+\S+/g, ' ')                 // remove %prec TOKEN
+    .replace(/%empty/g, ' ')                      // remove %empty
+    .replace(/\/\/.*$/g, ' ')                     // remove line comments
     .trim();
   const m = cleaned.match(/^([a-zA-Z_][a-zA-Z0-9_.]*)/);
   return m ? m[1] : undefined;
@@ -427,6 +456,38 @@ function extractDollarRefs(text: string, lineNum: number, fullLine: string): Dol
 }
 
 function extractRuleReferences(text: string, lineNum: number, fullLine: string, doc: BisonDocument): void {
+  // Track string literals used as token aliases in rule bodies (e.g. "+" instead of PLUS,
+  // "{" instead of LBRACE).  We use a char-by-char scanner so that:
+  //   • `"{"` at brace-depth 0 → alias `{`  (rule body)
+  //   • `"{"` inside `{ std::string s = "{"; }` → ignored (brace-depth > 0, action block)
+  {
+    let braceDepth = 0;
+    let inString = false;
+    let strStart = -1;
+    for (let ci = 0; ci < text.length; ci++) {
+      const ch = text[ci];
+      if (inString) {
+        if (ch === '\\') { ci++; continue; }          // escape: skip next char
+        if (ch === '"') {
+          const alias = text.substring(strStart, ci); // content between quotes
+          if (alias) {
+            const rawStr = '"' + alias + '"';
+            const col = fullLine.indexOf(rawStr);
+            if (!doc.ruleReferences.has(alias)) doc.ruleReferences.set(alias, []);
+            doc.ruleReferences.get(alias)!.push(
+              Range.create(lineNum, col >= 0 ? col : 0, lineNum, (col >= 0 ? col : 0) + rawStr.length),
+            );
+          }
+          inString = false;
+        }
+      } else {
+        if (ch === '{') { braceDepth++; }
+        else if (ch === '}') { braceDepth = Math.max(0, braceDepth - 1); }
+        else if (ch === '"' && braceDepth === 0) { inString = true; strStart = ci + 1; }
+      }
+    }
+  }
+
   // Find identifiers in rule bodies (potential token/nonterminal references)
   // Skip: strings, actions (braces), %prec keyword (but keep its token), %empty, comments
   const cleaned = text
 
@@ -12,9 +12,23 @@ import {
  * Anything starting with % that isn't in this set → unknown directive diagnostic.
  */
 const KNOWN_FLEX_DIRECTIVES = new Set([
+  // Standard Flex directives
   'option', 'x', 's',
-  'top', 'class',           // RE-flex extensions
   'pointer', 'array',       // old Flex memory model
+  // RE-flex block directives (content is treated as embedded C++)
+  'top', 'class',
+  // RE-flex standalone directives (equivalents of %option name=value)
+  'namespace',              // %namespace foo  →  %option namespace=foo
+  'lexer',                  // %lexer ClassName
+  'lex',                    // %lex name
+  'exception',              // %exception type
+  'flex',                   // %flex (enable Flex compatibility)
+  'graphs-file',            // %graphs-file
+  'header-file',            // %header-file "name"
+  'regexp-file',            // %regexp-file
+  'tabs',                   // %tabs n
+  'unicode',                // %unicode
+  'yywrap',                 // %yywrap (use yywrap() callback)
 ]);
 
 /**
@@ -196,9 +210,9 @@ export function parseFlexDocument(text: string): FlexDocument {
       if (trimmed.includes('*/')) inBlockComment = false;
       continue;
     }
-    if (trimmed.startsWith('/*') && !trimmed.includes('*/')) {
-      inBlockComment = true;
-      continue;
+    if (trimmed.startsWith('/*')) {
+      if (!trimmed.includes('*/')) inBlockComment = true;
+      continue; // skip both single-line /* ... */ and multi-line start
     }
 
     // Skip empty lines and line comments