@@ -16,6 +16,16 @@ export function computeBisonDiagnostics(doc: BisonDocument, text: string): Diagn
1616 return diagnostics ; // Can't do much more without sections
1717 }
1818
19+ // ── TASK 1: Unknown directives ──────────────────────────────────────────────
20+ for ( const unk of doc . unknownDirectives ) {
21+ diagnostics . push ( {
22+ severity : DiagnosticSeverity . Error ,
23+ range : unk . location ,
24+ message : `Unknown Bison directive '${ unk . name } '. Check the Bison manual for valid directives.` ,
25+ source : 'bison' ,
26+ } ) ;
27+ }
28+
1929 // 2. Duplicate token declarations
2030 const tokenCounts = new Map < string , Range [ ] > ( ) ;
2131 for ( const [ name , decl ] of doc . tokens ) {
@@ -94,6 +104,60 @@ export function computeBisonDiagnostics(doc: BisonDocument, text: string): Diagn
94104 } ) ;
95105 }
96106
107+ // ── TASK 2: Unused rules (non-terminals never referenced) ───────────────────
108+ // If %start is not declared, Bison uses the first rule as the implicit start symbol
109+ const effectiveStart = doc . startSymbol ?? ( doc . rules . size > 0 ? [ ...doc . rules . keys ( ) ] [ 0 ] : undefined ) ;
110+
111+ for ( const [ name , rule ] of doc . rules ) {
112+ // The start symbol is the grammar entry point — always "used"
113+ if ( name === effectiveStart ) continue ;
114+ // If this name never appears in any rule body, it is unreachable
115+ if ( ! doc . ruleReferences . has ( name ) ) {
116+ diagnostics . push ( {
117+ severity : DiagnosticSeverity . Warning ,
118+ range : rule . location ,
119+ message : `Non-terminal '${ name } ' is defined but never referenced in any rule. It is unreachable from the grammar.` ,
120+ source : 'bison' ,
121+ } ) ;
122+ }
123+ }
124+
125+ // ── TASK 3: Unused tokens ────────────────────────────────────────────────────
126+ for ( const [ name , decl ] of doc . tokens ) {
127+ if ( ! doc . ruleReferences . has ( name ) ) {
128+ diagnostics . push ( {
129+ severity : DiagnosticSeverity . Warning ,
130+ range : decl . location ,
131+ message : `Token '${ name } ' is declared with %token but never used in any rule.` ,
132+ source : 'bison' ,
133+ } ) ;
134+ }
135+ }
136+
137+ // ── TASK 4: Obvious shift/reduce conflicts ───────────────────────────────────
138+ // Heuristic: same terminal token appears as first symbol in ≥2 alternatives
139+ // of the same rule, with no %prec disambiguation tracked.
140+ for ( const [ name , rule ] of doc . rules ) {
141+ // Count how many alternatives start with each terminal (ALL_CAPS)
142+ const firstTerminalCount = new Map < string , number > ( ) ;
143+ for ( const alt of rule . alternatives ) {
144+ const sym = alt . firstSymbol ;
145+ if ( sym && / ^ [ A - Z _ ] [ A - Z 0 - 9 _ ] * $ / . test ( sym ) && doc . tokens . has ( sym ) ) {
146+ firstTerminalCount . set ( sym , ( firstTerminalCount . get ( sym ) ?? 0 ) + 1 ) ;
147+ }
148+ }
149+ for ( const [ token , count ] of firstTerminalCount ) {
150+ if ( count >= 2 ) {
151+ diagnostics . push ( {
152+ severity : DiagnosticSeverity . Warning ,
153+ range : rule . location ,
154+ message : `Potential shift/reduce conflict in rule '${ name } ': token '${ token } ' starts ${ count } alternatives without precedence disambiguation (%prec / %left / %right).` ,
155+ source : 'bison' ,
156+ } ) ;
157+ }
158+ }
159+ }
160+
97161 return diagnostics ;
98162}
99163
@@ -112,6 +176,16 @@ export function computeFlexDiagnostics(doc: FlexDocument, text: string): Diagnos
112176 return diagnostics ;
113177 }
114178
179+ // ── TASK 1: Unknown directives ──────────────────────────────────────────────
180+ for ( const unk of doc . unknownDirectives ) {
181+ diagnostics . push ( {
182+ severity : DiagnosticSeverity . Error ,
183+ range : unk . location ,
184+ message : `Unknown Flex directive '${ unk . name } '. Valid directives are %option, %x, %s, %top, %class.` ,
185+ source : 'flex' ,
186+ } ) ;
187+ }
188+
115189 // 2. Undefined start conditions used in rules
116190 for ( const [ name , refs ] of doc . startConditionRefs ) {
117191 if ( ! doc . startConditions . has ( name ) && name !== 'INITIAL' ) {
@@ -180,5 +254,74 @@ export function computeFlexDiagnostics(doc: FlexDocument, text: string): Diagnos
180254 } ) ;
181255 }
182256
257+ // ── TASK 5: Inaccessible Flex rules ─────────────────────────────────────────
258+ // Heuristic A: Exact duplicate pattern → second one is always shadowed.
259+ // Heuristic B: Catch-all pattern (. or .* or .*\n etc.) before specific patterns
260+ // in the same start-condition context → subsequent rules unreachable.
261+
262+ // Build a canonical "context key" for a rule: sorted start conditions, or "INITIAL"
263+ const contextKey = ( rule : typeof doc . rules [ 0 ] ) : string =>
264+ rule . startConditions . length > 0 ? [ ...rule . startConditions ] . sort ( ) . join ( ',' ) : 'INITIAL' ;
265+
266+ /**
267+ * Extract just the regex part of a Flex rule pattern string.
268+ * doc.rules[].pattern is the full trimmed line: "<SC> pattern { action }"
269+ * We strip the optional <SC> prefix, then take the first non-space token (the regex).
270+ * In Flex, patterns cannot contain unescaped spaces, so the pattern ends at
271+ * the first whitespace after the regex.
272+ */
273+ const rawPattern = ( pattern : string ) : string => {
274+ // Remove optional <SC> or <SC1,SC2> prefix
275+ let p = pattern . replace ( / ^ < [ A - Z _ * ] [ A - Z 0 - 9 _ , * ] * > \s * / , '' ) . trimStart ( ) ;
276+ // The pattern is the first "word" — Flex patterns have no unescaped spaces
277+ const m = p . match ( / ^ ( \S + ) / ) ;
278+ return m ? m [ 1 ] : p ;
279+ } ;
280+
281+ // Catch-all patterns that would shadow everything after them
282+ const CATCHALL_PATTERNS = new Set ( [ '.' , '.*' , '.+' , '.|\\n' , '(.|\n)*' , '(.|\n)+' , '(.|\\n)*' , '(.|\\n)+' ] ) ;
283+
284+ // Track: first seen pattern per context (for duplicate detection)
285+ const seenPatterns = new Map < string , number > ( ) ; // "context|pattern" -> line number of first occurrence
286+
287+ // Track: catch-all line per context key
288+ const catchallLine = new Map < string , number > ( ) ; // context -> line number
289+
290+ for ( const rule of doc . rules ) {
291+ const ctx = contextKey ( rule ) ;
292+ const pat = rawPattern ( rule . pattern ) ;
293+ const lineNum = rule . location . start . line ;
294+ const dupKey = `${ ctx } |${ pat } ` ;
295+
296+ // Heuristic B: is this rule after a catch-all in the same context?
297+ if ( catchallLine . has ( ctx ) && ! CATCHALL_PATTERNS . has ( pat ) ) {
298+ const catchLine = catchallLine . get ( ctx ) ! ;
299+ diagnostics . push ( {
300+ severity : DiagnosticSeverity . Warning ,
301+ range : rule . location ,
302+ message : `Flex rule '${ pat } ' may be inaccessible: catch-all pattern at line ${ catchLine + 1 } will always match first.` ,
303+ source : 'flex' ,
304+ } ) ;
305+ }
306+
307+ // Heuristic A: duplicate pattern in same context?
308+ if ( seenPatterns . has ( dupKey ) ) {
309+ const firstLine = seenPatterns . get ( dupKey ) ! ;
310+ diagnostics . push ( {
311+ severity : DiagnosticSeverity . Warning ,
312+ range : rule . location ,
313+ message : `Flex rule '${ pat } ' is inaccessible: identical pattern already defined at line ${ firstLine + 1 } .` ,
314+ source : 'flex' ,
315+ } ) ;
316+ } else {
317+ seenPatterns . set ( dupKey , lineNum ) ;
318+ }
319+
320+ // Register catch-all (only on first occurrence in this context)
321+ if ( CATCHALL_PATTERNS . has ( pat ) && ! catchallLine . has ( ctx ) ) {
322+ catchallLine . set ( ctx , lineNum ) ;
323+ }
324+ }
325+
183326 return diagnostics ;
184327}
0 commit comments