Skip to content

Commit 4aaa465

Browse files
authored
Merge pull request #3 from theodevelop/dev
v1.1.1
2 parents 5d79301 + a2b22cd commit 4aaa465

11 files changed

Lines changed: 46020 additions & 77 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@ dist/
33
client/out/
44
server/out/
55
*.vsix
6+
.env

.vscodeignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,5 @@ tsconfig.base.json
2222
package-lock.json
2323
*.vsix
2424
.gitattributes
25+
.env
26+
.env.*

CHANGELOG.md

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,40 @@
22

33
All notable changes to the **Bison/Flex Language Support** extension will be documented in this file.
44

5+
## [1.1.1] - 2026-03-19
6+
7+
### Fixed
8+
9+
- **Bison — token aliases**: Tokens declared with a string alias (e.g. `%token LBRACE "{"`)
10+
are no longer falsely reported as unused when the alias form is used in rules
11+
- **Bison — `$N` out-of-bounds**: String literal tokens (e.g. `"-"` in `"-" exp`) are now
12+
counted as positional symbols, eliminating false `$2 is out of bounds` errors
13+
- **Bison — shift/reduce false positive**: The S/R heuristic now suppresses warnings when
14+
all alternatives sharing a first token have distinct second tokens (e.g. `ID "("`,
15+
`ID "{"`, `ID "["` in expression rules)
16+
- **Bison — `UMINUS` / precedence tokens**: Tokens declared only via `%left`/`%right`/
17+
`%nonassoc` are no longer reported as undeclared
18+
- **Bison — EOF token**: The end-of-input token (value 0) is no longer reported as unused
19+
- **Bison — `%token` after `%%`**: Token declarations appearing in the rules section
20+
(valid Bison syntax) are now correctly registered
21+
- **Flex — `/* comment */` in rules section**: Single-line block comments in the rules
22+
section were incorrectly parsed as Flex rules, producing false duplicate-pattern warnings
23+
- **Flex — `rawPattern` with spaces in character classes**: Patterns like `\\[ \t\n]+\\`
24+
were truncated at the space inside `[...]`, producing false "invalid regex" errors
25+
- **Flex — RE-flex directives**: `%namespace`, `%lexer`, `%lex`, `%unicode`, and other
26+
RE-flex-specific directives no longer trigger "unknown directive" errors
27+
- **Flex — RE-flex `noyywrap`**: RE-flex files no longer trigger the missing `noyywrap` warning
28+
- **Flex — `<SC><<EOF>>`**: EOF rules after a catch-all pattern are no longer flagged
29+
as inaccessible
30+
- **Security**: `.env` file excluded from packaged VSIX (was inadvertently included)
31+
32+
### Added
33+
34+
- Hover and completion documentation for RE-flex built-in methods:
35+
`size()`, `lineno()`, `columno()`, `in()`, `out()`
36+
37+
---
38+
539
## [1.1.0] - 2026-03-18
640

741
### Added

images/icon.png

1.36 MB
Loading

images/icon.svg

Lines changed: 45432 additions & 16 deletions
Loading

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"name": "bison-flex-lang",
33
"displayName": "Bison/Flex Language Support",
44
"description": "Full-featured language support for GNU Bison (.y, .yy) and Flex/RE-flex (.l, .ll) — syntax highlighting with embedded C/C++, real-time diagnostics, intelligent autocompletion, and hover documentation for all directives.",
5-
"version": "1.1.0",
5+
"version": "1.1.1",
66
"publisher": "theodevelop",
77
"license": "MIT",
88
"repository": {

server/src/parser/bisonParser.ts

Lines changed: 86 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,14 @@ export function parseBisonDocument(text: string): BisonDocument {
227227
// Skip empty lines and comments
228228
if (!trimmed || trimmed.startsWith('//') || trimmed.startsWith('/*')) continue;
229229

230+
// %token directive inside the rules section (Bison allows declaring tokens after %%).
231+
// Must be handled BEFORE rule-body processing to avoid contaminating rule symbols.
232+
if (trimmed.startsWith('%token') && braceDepth === 0) {
233+
const tm = trimmed.match(/^%token(?:\s+<([^>]+)>)?\s+(.+)/);
234+
if (tm) parseTokenNames(tm[2], tm[1], i, doc);
235+
continue;
236+
}
237+
230238
// Inside a multi-line action block: scan for $n refs and track brace depth.
231239
if (braceDepth > 0) {
232240
if (currentRule) {
@@ -325,34 +333,52 @@ export function parseBisonDocument(text: string): BisonDocument {
325333
if (ch === '{') braceDepth++;
326334
if (ch === '}') braceDepth = Math.max(0, braceDepth - 1);
327335
}
328-
329-
// %token directive in rules section (e.g., %token CHUNKS "_chunks")
330-
const inlineTokenMatch = trimmed.match(/^%token\s+([A-Z_][A-Z0-9_]*)\s*(".*")?/);
331-
if (inlineTokenMatch) {
332-
const name = inlineTokenMatch[1];
333-
const alias = inlineTokenMatch[2]?.replace(/"/g, '');
334-
const col = line.indexOf(name);
335-
doc.tokens.set(name, {
336-
name,
337-
alias,
338-
location: Range.create(i, col, i, col + name.length),
339-
});
340-
}
341336
}
342337

343338
return doc;
344339
}
345340

341+
/**
342+
* Encode a Bison string literal (the quoted content) into a unique, safe
343+
* identifier-like placeholder. The hex encoding ensures that "+" and "{"
344+
* produce DIFFERENT placeholders — critical for second-token disambiguation
345+
* in the shift/reduce heuristic.
346+
*
347+
* e.g. "+" → __s2b__
348+
* "(" → __s28__
349+
* "{" → __s7b__
350+
* "function" → __s66756e6374696f6e__
351+
*
352+
* All placeholders start with "__s" (lowercase) so they are valid identifiers
353+
* but FAIL the all-caps token check -- never mistaken for grammar terminals.
354+
*/
355+
function strLiteralPlaceholder(content: string): string {
356+
const hex = Array.from(content)
357+
.map(c => c.charCodeAt(0).toString(16).padStart(2, '0'))
358+
.join('');
359+
return `__s${hex}__`;
360+
}
361+
362+
/** Replace every `"..."` in `text` with its unique strLiteralPlaceholder. */
363+
function replaceStringLiterals(text: string): string {
364+
return text.replace(/"((?:[^"\\]|\\.)*)"/g, (_, content) => ` ${strLiteralPlaceholder(content)} `);
365+
}
366+
346367
/**
347368
* Extract all grammar symbols (identifiers) from a production RHS in order.
369+
*
370+
* String literals ("+" , "{", "function", …) ARE counted as symbols because
371+
* Bison treats them exactly like tokens in the $N position numbering.
372+
* They are replaced with unique hex-encoded placeholders so that the
373+
* second-symbol disambiguation in the shift/reduce heuristic can tell
374+
* `"("` apart from `"{"` (both have different placeholders).
348375
*/
349376
function extractSymbols(text: string): string[] {
350-
const cleaned = text
351-
.replace(/"(?:[^"\\]|\\.)*"/g, ' ') // remove strings
352-
.replace(/\{[^}]*\}/g, ' ') // remove inline actions
353-
.replace(/%prec\s+\S+/g, ' ') // remove %prec TOKEN
354-
.replace(/%empty/g, ' ') // remove %empty
355-
.replace(/\/\/.*$/g, ' ') // remove line comments
377+
const cleaned = replaceStringLiterals(text)
378+
.replace(/\{[^}]*\}/g, ' ') // remove inline actions
379+
.replace(/%prec\s+\S+/g, ' ') // remove %prec TOKEN
380+
.replace(/%empty/g, ' ') // remove %empty
381+
.replace(/\/\/.*$/g, ' ') // remove line comments
356382
.trim();
357383
const symbols: string[] = [];
358384
const regex = /\b([a-zA-Z_][a-zA-Z0-9_.]*)\b/g;
@@ -366,14 +392,17 @@ function extractSymbols(text: string): string[] {
366392
/**
367393
* Extract the first terminal or non-terminal symbol from a production RHS.
368394
* Returns undefined for empty productions (%empty) or pure action blocks.
395+
*
396+
* String literals are replaced with unique hex-encoded placeholders so that
397+
* an alternative starting with "function" has a firstSymbol starting with
398+
* `__s` (not all-caps) and is therefore not confused with a real terminal.
369399
*/
370400
function getFirstSymbol(text: string): string | undefined {
371-
const cleaned = text
372-
.replace(/"(?:[^"\\]|\\.)*"/g, ' ') // remove strings
373-
.replace(/\{[^}]*\}/g, ' ') // remove inline actions
374-
.replace(/%prec\s+\S+/g, ' ') // remove %prec TOKEN
375-
.replace(/%empty/g, ' ') // remove %empty
376-
.replace(/\/\/.*$/g, ' ') // remove line comments
401+
const cleaned = replaceStringLiterals(text)
402+
.replace(/\{[^}]*\}/g, ' ') // remove inline actions
403+
.replace(/%prec\s+\S+/g, ' ') // remove %prec TOKEN
404+
.replace(/%empty/g, ' ') // remove %empty
405+
.replace(/\/\/.*$/g, ' ') // remove line comments
377406
.trim();
378407
const m = cleaned.match(/^([a-zA-Z_][a-zA-Z0-9_.]*)/);
379408
return m ? m[1] : undefined;
@@ -427,6 +456,38 @@ function extractDollarRefs(text: string, lineNum: number, fullLine: string): Dol
427456
}
428457

429458
function extractRuleReferences(text: string, lineNum: number, fullLine: string, doc: BisonDocument): void {
459+
// Track string literals used as token aliases in rule bodies (e.g. "+" instead of PLUS,
460+
// "{" instead of LBRACE). We use a char-by-char scanner so that:
461+
// • `"{"` at brace-depth 0 → alias `{` (rule body)
462+
// • `"{"` inside `{ std::string s = "{"; }` → ignored (brace-depth > 0, action block)
463+
{
464+
let braceDepth = 0;
465+
let inString = false;
466+
let strStart = -1;
467+
for (let ci = 0; ci < text.length; ci++) {
468+
const ch = text[ci];
469+
if (inString) {
470+
if (ch === '\\') { ci++; continue; } // escape: skip next char
471+
if (ch === '"') {
472+
const alias = text.substring(strStart, ci); // content between quotes
473+
if (alias) {
474+
const rawStr = '"' + alias + '"';
475+
const col = fullLine.indexOf(rawStr);
476+
if (!doc.ruleReferences.has(alias)) doc.ruleReferences.set(alias, []);
477+
doc.ruleReferences.get(alias)!.push(
478+
Range.create(lineNum, col >= 0 ? col : 0, lineNum, (col >= 0 ? col : 0) + rawStr.length),
479+
);
480+
}
481+
inString = false;
482+
}
483+
} else {
484+
if (ch === '{') { braceDepth++; }
485+
else if (ch === '}') { braceDepth = Math.max(0, braceDepth - 1); }
486+
else if (ch === '"' && braceDepth === 0) { inString = true; strStart = ci + 1; }
487+
}
488+
}
489+
}
490+
430491
// Find identifiers in rule bodies (potential token/nonterminal references)
431492
// Skip: strings, actions (braces), %prec keyword (but keep its token), %empty, comments
432493
const cleaned = text

server/src/parser/flexParser.ts

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,23 @@ import {
1212
* Anything starting with % that isn't in this set → unknown directive diagnostic.
1313
*/
1414
const KNOWN_FLEX_DIRECTIVES = new Set([
15+
// Standard Flex directives
1516
'option', 'x', 's',
16-
'top', 'class', // RE-flex extensions
1717
'pointer', 'array', // old Flex memory model
18+
// RE-flex block directives (content is treated as embedded C++)
19+
'top', 'class',
20+
// RE-flex standalone directives (equivalents of %option name=value)
21+
'namespace', // %namespace foo → %option namespace=foo
22+
'lexer', // %lexer ClassName
23+
'lex', // %lex name
24+
'exception', // %exception type
25+
'flex', // %flex (enable Flex compatibility)
26+
'graphs-file', // %graphs-file
27+
'header-file', // %header-file "name"
28+
'regexp-file', // %regexp-file
29+
'tabs', // %tabs n
30+
'unicode', // %unicode
31+
'yywrap', // %yywrap (use yywrap() callback)
1832
]);
1933

2034
/**
@@ -196,9 +210,9 @@ export function parseFlexDocument(text: string): FlexDocument {
196210
if (trimmed.includes('*/')) inBlockComment = false;
197211
continue;
198212
}
199-
if (trimmed.startsWith('/*') && !trimmed.includes('*/')) {
200-
inBlockComment = true;
201-
continue;
213+
if (trimmed.startsWith('/*')) {
214+
if (!trimmed.includes('*/')) inBlockComment = true;
215+
continue; // skip both single-line /* ... */ and multi-line start
202216
}
203217

204218
// Skip empty lines and line comments

0 commit comments

Comments
 (0)