Skip to content

Commit e38a7f9

Browse files
committed
Add unknown/unused directive and rule diagnostics
Improve parsing and diagnostics for Bison and Flex files. Introduces UnknownDirective and augments Bison/Flex document models to collect unknown %directives. Parsers (bisonParser/flexParser) now recognize known directive sets and record unknown directives; bisonParser also records rule alternatives with a firstSymbol (via getFirstSymbol) to aid conflict detection. computeBisonDiagnostics gained checks for unknown directives, unused non-terminals, unused tokens, and a heuristic for shift/reduce conflicts; computeFlexDiagnostics now flags unknown directives and inaccessible rules (duplicate patterns and catch-all shadowing). Tests updated/added to cover the new diagnostics and heuristics.
1 parent 4b4a258 commit e38a7f9

5 files changed

Lines changed: 445 additions & 4 deletions

File tree

server/src/parser/bisonParser.ts

Lines changed: 58 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,25 @@ import {
66
DefineDeclaration,
77
PrecedenceDeclaration,
88
RuleDefinition,
9+
RuleAlternative,
910
} from './types';
1011

12+
/**
13+
* All directives recognized by GNU Bison (and common deprecated aliases).
14+
* Anything starting with % that isn't in this set → unknown directive diagnostic.
15+
*/
16+
const KNOWN_BISON_DIRECTIVES = new Set([
17+
'token', 'type', 'nterm', 'define', 'code',
18+
'left', 'right', 'nonassoc', 'precedence',
19+
'start', 'union', 'expect', 'expect-rr', 'require',
20+
'language', 'skeleton', 'glr-parser', 'locations',
21+
'defines', 'debug', 'param', 'parse-param', 'lex-param',
22+
'printer', 'destructor', 'empty', 'prec',
23+
'initial-action', 'verbose', 'no-lines', 'token-table',
24+
'output', 'file-prefix', 'header', 'name-prefix',
25+
'pure-parser', 'error-verbose',
26+
]);
27+
1128
export function parseBisonDocument(text: string): BisonDocument {
1229
const lines = text.split(/\r?\n/);
1330
const doc: BisonDocument = {
@@ -19,6 +36,7 @@ export function parseBisonDocument(text: string): BisonDocument {
1936
rules: new Map(),
2037
separators: [],
2138
ruleReferences: new Map(),
39+
unknownDirectives: [],
2240
};
2341

2442
// Phase 1: Find %% separators (skip those inside code blocks)
@@ -163,6 +181,17 @@ export function parseBisonDocument(text: string): BisonDocument {
163181
}
164182

165183
lastTokenDirectiveLine = -1;
184+
185+
// Unknown directive: any %word that didn't match a known pattern above
186+
if (trimmed.startsWith('%') && !trimmed.startsWith('%%')) {
187+
const directiveMatch = trimmed.match(/^%([a-zA-Z][a-zA-Z0-9_-]*)/);
188+
if (directiveMatch && !KNOWN_BISON_DIRECTIVES.has(directiveMatch[1])) {
189+
doc.unknownDirectives.push({
190+
name: '%' + directiveMatch[1],
191+
location: Range.create(i, 0, i, directiveMatch[0].length),
192+
});
193+
}
194+
}
166195
}
167196

168197
// Phase 3: Parse rules section
@@ -197,11 +226,21 @@ export function parseBisonDocument(text: string): BisonDocument {
197226
alternatives: [],
198227
});
199228
}
200-
// Parse the rest of the line after ':'
229+
// Parse the rest of the line after ':' as the first alternative
201230
const rest = trimmed.substring(ruleDefMatch[0].length);
231+
const altRange = Range.create(i, 0, i, line.length);
232+
const alt: RuleAlternative = { range: altRange, firstSymbol: getFirstSymbol(rest) };
233+
doc.rules.get(currentRule)!.alternatives.push(alt);
202234
extractRuleReferences(rest, i, line, doc);
203-
} else if (trimmed.startsWith('|') || currentRule) {
204-
// Alternative or continuation
235+
} else if (trimmed.startsWith('|') && currentRule) {
236+
// New alternative: track first symbol
237+
const altBody = trimmed.slice(1); // strip leading '|'
238+
const altRange = Range.create(i, 0, i, line.length);
239+
const alt: RuleAlternative = { range: altRange, firstSymbol: getFirstSymbol(altBody) };
240+
doc.rules.get(currentRule)?.alternatives.push(alt);
241+
extractRuleReferences(trimmed, i, line, doc);
242+
} else if (currentRule) {
243+
// Continuation of current alternative (no '|', no rule def)
205244
extractRuleReferences(trimmed, i, line, doc);
206245
}
207246

@@ -228,6 +267,22 @@ export function parseBisonDocument(text: string): BisonDocument {
228267
return doc;
229268
}
230269

270+
/**
271+
* Extract the first terminal or non-terminal symbol from a production RHS.
272+
* Returns undefined for empty productions (%empty) or pure action blocks.
273+
*/
274+
function getFirstSymbol(text: string): string | undefined {
275+
const cleaned = text
276+
.replace(/"(?:[^"\\]|\\.)*"/g, ' ') // remove strings
277+
.replace(/\{[^}]*\}/g, ' ') // remove inline actions
278+
.replace(/%prec\s+\S+/g, ' ') // remove %prec TOKEN
279+
.replace(/%empty/g, ' ') // remove %empty
280+
.replace(/\/\/.*$/g, ' ') // remove line comments
281+
.trim();
282+
const m = cleaned.match(/^([a-zA-Z_][a-zA-Z0-9_.]*)/);
283+
return m ? m[1] : undefined;
284+
}
285+
231286
function parseTokenNames(text: string, type: string | undefined, lineNum: number, doc: BisonDocument): void {
232287
// Match patterns like: NAME "alias" VALUE or just NAME
233288
const regex = /([A-Z_][A-Z0-9_]*)\s*(?:("(?:[^"\\]|\\.)*")\s*)?(?:(\d+)\s*)?/g;

server/src/parser/flexParser.ts

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,16 @@ import {
77
FlexRule,
88
} from './types';
99

10+
/**
11+
* All directives recognized by Flex / RE-flex.
12+
* Anything starting with % that isn't in this set → unknown directive diagnostic.
13+
*/
14+
const KNOWN_FLEX_DIRECTIVES = new Set([
15+
'option', 'x', 's',
16+
'top', 'class', // RE-flex extensions
17+
'pointer', 'array', // old Flex memory model
18+
]);
19+
1020
/**
1121
* Code block types in Flex/RE-flex files:
1222
* - %{ ... %} prologue block
@@ -82,6 +92,7 @@ export function parseFlexDocument(text: string): FlexDocument {
8292
separators: [],
8393
startConditionRefs: new Map(),
8494
abbreviationRefs: new Map(),
95+
unknownDirectives: [],
8596
};
8697

8798
// Build skip map for code blocks
@@ -159,6 +170,17 @@ export function parseFlexDocument(text: string): FlexDocument {
159170
});
160171
continue;
161172
}
173+
174+
// Unknown directive: any %word that didn't match a known pattern above
175+
if (trimmed.startsWith('%') && !trimmed.startsWith('%%') && !trimmed.startsWith('%{') && !trimmed.startsWith('%}')) {
176+
const directiveMatch = trimmed.match(/^%([a-zA-Z][a-zA-Z0-9_-]*)/);
177+
if (directiveMatch && !KNOWN_FLEX_DIRECTIVES.has(directiveMatch[1])) {
178+
doc.unknownDirectives.push({
179+
name: '%' + directiveMatch[1],
180+
location: Range.create(i, 0, i, directiveMatch[0].length),
181+
});
182+
}
183+
}
162184
}
163185

164186
// Phase 3: Parse rules section

server/src/parser/types.ts

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
import { Range } from 'vscode-languageserver';
22

3+
export interface UnknownDirective {
4+
name: string; // e.g. "%prout"
5+
location: Range;
6+
}
7+
38
export interface TokenDeclaration {
49
name: string;
510
type?: string; // e.g., "int", "std::string"
@@ -31,10 +36,15 @@ export interface CodeBlock {
3136
range: Range;
3237
}
3338

39+
export interface RuleAlternative {
40+
range: Range;
41+
firstSymbol?: string; // first terminal/non-terminal of this production (for conflict detection)
42+
}
43+
3444
export interface RuleDefinition {
3545
name: string;
3646
location: Range;
37-
alternatives: Range[];
47+
alternatives: RuleAlternative[];
3848
}
3949

4050
export interface BisonDocument {
@@ -47,6 +57,7 @@ export interface BisonDocument {
4757
separators: number[]; // line numbers of %%
4858
startSymbol?: string;
4959
ruleReferences: Map<string, Range[]>; // symbol name -> locations used in rules RHS
60+
unknownDirectives: UnknownDirective[];
5061
}
5162

5263
export interface FlexOption {
@@ -82,6 +93,7 @@ export interface FlexDocument {
8293
separators: number[];
8394
startConditionRefs: Map<string, Range[]>; // SC name -> locations used in rules
8495
abbreviationRefs: Map<string, Range[]>; // abbrev name -> locations used in rules
96+
unknownDirectives: UnknownDirective[];
8597
}
8698

8799
export type DocumentModel = BisonDocument | FlexDocument;

server/src/providers/diagnostics.ts

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,16 @@ export function computeBisonDiagnostics(doc: BisonDocument, text: string): Diagn
1616
return diagnostics; // Can't do much more without sections
1717
}
1818

19+
// ── TASK 1: Unknown directives ──────────────────────────────────────────────
20+
for (const unk of doc.unknownDirectives) {
21+
diagnostics.push({
22+
severity: DiagnosticSeverity.Error,
23+
range: unk.location,
24+
message: `Unknown Bison directive '${unk.name}'. Check the Bison manual for valid directives.`,
25+
source: 'bison',
26+
});
27+
}
28+
1929
// 2. Duplicate token declarations
2030
const tokenCounts = new Map<string, Range[]>();
2131
for (const [name, decl] of doc.tokens) {
@@ -94,6 +104,60 @@ export function computeBisonDiagnostics(doc: BisonDocument, text: string): Diagn
94104
});
95105
}
96106

107+
// ── TASK 2: Unused rules (non-terminals never referenced) ───────────────────
108+
// If %start is not declared, Bison uses the first rule as the implicit start symbol
109+
const effectiveStart = doc.startSymbol ?? (doc.rules.size > 0 ? [...doc.rules.keys()][0] : undefined);
110+
111+
for (const [name, rule] of doc.rules) {
112+
// The start symbol is the grammar entry point — always "used"
113+
if (name === effectiveStart) continue;
114+
// If this name never appears in any rule body, it is unreachable
115+
if (!doc.ruleReferences.has(name)) {
116+
diagnostics.push({
117+
severity: DiagnosticSeverity.Warning,
118+
range: rule.location,
119+
message: `Non-terminal '${name}' is defined but never referenced in any rule. It is unreachable from the grammar.`,
120+
source: 'bison',
121+
});
122+
}
123+
}
124+
125+
// ── TASK 3: Unused tokens ────────────────────────────────────────────────────
126+
for (const [name, decl] of doc.tokens) {
127+
if (!doc.ruleReferences.has(name)) {
128+
diagnostics.push({
129+
severity: DiagnosticSeverity.Warning,
130+
range: decl.location,
131+
message: `Token '${name}' is declared with %token but never used in any rule.`,
132+
source: 'bison',
133+
});
134+
}
135+
}
136+
137+
// ── TASK 4: Obvious shift/reduce conflicts ───────────────────────────────────
138+
// Heuristic: same terminal token appears as first symbol in ≥2 alternatives
139+
// of the same rule, with no %prec disambiguation tracked.
140+
for (const [name, rule] of doc.rules) {
141+
// Count how many alternatives start with each terminal (ALL_CAPS)
142+
const firstTerminalCount = new Map<string, number>();
143+
for (const alt of rule.alternatives) {
144+
const sym = alt.firstSymbol;
145+
if (sym && /^[A-Z_][A-Z0-9_]*$/.test(sym) && doc.tokens.has(sym)) {
146+
firstTerminalCount.set(sym, (firstTerminalCount.get(sym) ?? 0) + 1);
147+
}
148+
}
149+
for (const [token, count] of firstTerminalCount) {
150+
if (count >= 2) {
151+
diagnostics.push({
152+
severity: DiagnosticSeverity.Warning,
153+
range: rule.location,
154+
message: `Potential shift/reduce conflict in rule '${name}': token '${token}' starts ${count} alternatives without precedence disambiguation (%prec / %left / %right).`,
155+
source: 'bison',
156+
});
157+
}
158+
}
159+
}
160+
97161
return diagnostics;
98162
}
99163

@@ -112,6 +176,16 @@ export function computeFlexDiagnostics(doc: FlexDocument, text: string): Diagnos
112176
return diagnostics;
113177
}
114178

179+
// ── TASK 1: Unknown directives ──────────────────────────────────────────────
180+
for (const unk of doc.unknownDirectives) {
181+
diagnostics.push({
182+
severity: DiagnosticSeverity.Error,
183+
range: unk.location,
184+
message: `Unknown Flex directive '${unk.name}'. Valid directives are %option, %x, %s, %top, %class.`,
185+
source: 'flex',
186+
});
187+
}
188+
115189
// 2. Undefined start conditions used in rules
116190
for (const [name, refs] of doc.startConditionRefs) {
117191
if (!doc.startConditions.has(name) && name !== 'INITIAL') {
@@ -180,5 +254,74 @@ export function computeFlexDiagnostics(doc: FlexDocument, text: string): Diagnos
180254
});
181255
}
182256

257+
// ── TASK 5: Inaccessible Flex rules ─────────────────────────────────────────
258+
// Heuristic A: Exact duplicate pattern → second one is always shadowed.
259+
// Heuristic B: Catch-all pattern (. or .* or .*\n etc.) before specific patterns
260+
// in the same start-condition context → subsequent rules unreachable.
261+
262+
// Build a canonical "context key" for a rule: sorted start conditions, or "INITIAL"
263+
const contextKey = (rule: typeof doc.rules[0]): string =>
264+
rule.startConditions.length > 0 ? [...rule.startConditions].sort().join(',') : 'INITIAL';
265+
266+
/**
267+
* Extract just the regex part of a Flex rule pattern string.
268+
* doc.rules[].pattern is the full trimmed line: "<SC> pattern { action }"
269+
* We strip the optional <SC> prefix, then take the first non-space token (the regex).
270+
* In Flex, patterns cannot contain unescaped spaces, so the pattern ends at
271+
* the first whitespace after the regex.
272+
*/
273+
const rawPattern = (pattern: string): string => {
274+
// Remove optional <SC> or <SC1,SC2> prefix
275+
let p = pattern.replace(/^<[A-Z_*][A-Z0-9_,*]*>\s*/, '').trimStart();
276+
// The pattern is the first "word" — Flex patterns have no unescaped spaces
277+
const m = p.match(/^(\S+)/);
278+
return m ? m[1] : p;
279+
};
280+
281+
// Catch-all patterns that would shadow everything after them
282+
const CATCHALL_PATTERNS = new Set(['.', '.*', '.+', '.|\\n', '(.|\n)*', '(.|\n)+', '(.|\\n)*', '(.|\\n)+']);
283+
284+
// Track: first seen pattern per context (for duplicate detection)
285+
const seenPatterns = new Map<string, number>(); // "context|pattern" -> line number of first occurrence
286+
287+
// Track: catch-all line per context key
288+
const catchallLine = new Map<string, number>(); // context -> line number
289+
290+
for (const rule of doc.rules) {
291+
const ctx = contextKey(rule);
292+
const pat = rawPattern(rule.pattern);
293+
const lineNum = rule.location.start.line;
294+
const dupKey = `${ctx}|${pat}`;
295+
296+
// Heuristic B: is this rule after a catch-all in the same context?
297+
if (catchallLine.has(ctx) && !CATCHALL_PATTERNS.has(pat)) {
298+
const catchLine = catchallLine.get(ctx)!;
299+
diagnostics.push({
300+
severity: DiagnosticSeverity.Warning,
301+
range: rule.location,
302+
message: `Flex rule '${pat}' may be inaccessible: catch-all pattern at line ${catchLine + 1} will always match first.`,
303+
source: 'flex',
304+
});
305+
}
306+
307+
// Heuristic A: duplicate pattern in same context?
308+
if (seenPatterns.has(dupKey)) {
309+
const firstLine = seenPatterns.get(dupKey)!;
310+
diagnostics.push({
311+
severity: DiagnosticSeverity.Warning,
312+
range: rule.location,
313+
message: `Flex rule '${pat}' is inaccessible: identical pattern already defined at line ${firstLine + 1}.`,
314+
source: 'flex',
315+
});
316+
} else {
317+
seenPatterns.set(dupKey, lineNum);
318+
}
319+
320+
// Register catch-all (only on first occurrence in this context)
321+
if (CATCHALL_PATTERNS.has(pat) && !catchallLine.has(ctx)) {
322+
catchallLine.set(ctx, lineNum);
323+
}
324+
}
325+
183326
return diagnostics;
184327
}

0 commit comments

Comments
 (0)