Skip to content

Commit e382ada

Browse files
authored
fix: add cap to path discovery, add window clause and trailing comma support (#24)
1 parent ae523d8 commit e382ada

15 files changed

Lines changed: 1231 additions & 28 deletions

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@
6868
},
6969
"license": "Apache-2.0",
7070
"dependencies": {
71-
"chevrotain": "^11.1.1"
71+
"chevrotain": "11.1.1"
7272
},
7373
"devDependencies": {
7474
"@chevrotain/cst-dts-gen": "^11.1.1",
Lines changed: 329 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,329 @@
1+
// =============================================================================
2+
// Budgeted content-assist
3+
// =============================================================================
4+
// Wraps Chevrotain's content-assist path exploration with a hard budget on the
5+
// number of paths explored. Chevrotain's pruning only activates once some
6+
// parse path consumes the input to its end; for malformed input where no
7+
// complete parse exists (unbalanced parens, an unsupported clause, stray
8+
// terminators) the DFS fans out — often exponentially in the size of the
9+
// select list — and never terminates in practice.
10+
//
11+
// The implementation below is a verbatim port of Chevrotain 11's
12+
// `nextPossibleTokensAfter` from `@chevrotain/lib/src/parse/grammar/interpreter.js`
13+
// with a single change: after each path is popped from the stack we increment
14+
// a counter and abort once it exceeds `maxPaths`. On abort we return whatever
15+
// complete paths were found so far — for the pathological inputs we care about
16+
// that set is typically empty, which the caller interprets as "no suggestions".
17+
// =============================================================================
18+
19+
import type { IToken, TokenType } from "chevrotain"
20+
import {
21+
Alternation,
22+
Alternative,
23+
NonTerminal,
24+
Option,
25+
Repetition,
26+
RepetitionMandatory,
27+
RepetitionMandatoryWithSeparator,
28+
RepetitionWithSeparator,
29+
Rule,
30+
Terminal,
31+
} from "chevrotain"
32+
import { parser } from "../parser/parser"
33+
34+
export interface ContentAssistSuggestion {
35+
nextTokenType: TokenType
36+
nextTokenOccurrence: number
37+
ruleStack: string[]
38+
occurrenceStack: number[]
39+
}
40+
41+
export interface BudgetedResult {
42+
suggestions: ContentAssistSuggestion[]
43+
/** True if the path budget was hit before exploration finished. */
44+
aborted: boolean
45+
}
46+
47+
// Budget chosen by measurement: valid queries with up to ~200 select items
48+
// explore ~66k paths. Pathological inputs (trailing comma + function calls in
49+
// select list) grow ~4x per item and never terminate. 500k gives ~10x headroom
50+
// over the largest valid case while aborting pathological inputs in <200ms.
51+
export const DEFAULT_MAX_PATHS = 500_000
52+
53+
// Chevrotain's path interpreter mixes grammar productions with string sentinels
54+
// in the `def` arrays and stacks. We model both slots as `unknown[]` to mirror
55+
// Chevrotain's own loose JS typing and cast through IProduction only where the
56+
// productions' constructors demand it.
57+
const EXIT_NON_TERMINAL = "EXIT_NONE_TERMINAL"
58+
const EXIT_NON_TERMINAL_ARR: readonly unknown[] = [EXIT_NON_TERMINAL]
59+
const EXIT_ALTERNATIVE = "EXIT_ALTERNATIVE"
60+
61+
// Accessors for parser internals that Chevrotain's ContentAssist trait uses.
62+
// These fields are not in the public .d.ts but are set by the framework on the
63+
// parser instance and are stable across the Chevrotain 11.x line.
64+
interface ParserInternals {
65+
tokenMatcher: (token: IToken, tokenType: TokenType) => boolean
66+
maxLookahead: number
67+
gastProductionsCache: Record<string, Rule>
68+
}
69+
70+
const parserInternals = parser as unknown as ParserInternals
71+
72+
interface Path {
73+
idx: number
74+
def: unknown[]
75+
ruleStack: string[]
76+
occurrenceStack: number[]
77+
}
78+
79+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
80+
type ProductionDef = any[] // concrete type hidden by Chevrotain's API
81+
82+
function concatDef(...parts: unknown[][]): unknown[] {
83+
const out: unknown[] = []
84+
for (const p of parts) out.push(...p)
85+
return out
86+
}
87+
88+
function nextPossibleTokensAfterBudgeted(
89+
initialDef: unknown[],
90+
tokenVector: IToken[],
91+
tokMatcher: (t: IToken, tt: TokenType) => boolean,
92+
maxLookAhead: number,
93+
maxPaths: number,
94+
): BudgetedResult {
95+
let foundCompletePath = false
96+
const tokenVectorLength = tokenVector.length
97+
const minimalAlternativesIndex = tokenVectorLength - maxLookAhead - 1
98+
const result: ContentAssistSuggestion[] = []
99+
const possiblePaths: unknown[] = []
100+
possiblePaths.push({
101+
idx: -1,
102+
def: initialDef,
103+
ruleStack: [],
104+
occurrenceStack: [],
105+
} satisfies Path)
106+
107+
let pathsExplored = 0
108+
while (possiblePaths.length > 0) {
109+
pathsExplored++
110+
if (pathsExplored > maxPaths) {
111+
return { suggestions: result, aborted: true }
112+
}
113+
114+
const currPath = possiblePaths.pop()
115+
if (currPath === EXIT_ALTERNATIVE) {
116+
if (foundCompletePath) {
117+
const top = possiblePaths[possiblePaths.length - 1]
118+
if (
119+
typeof top === "object" &&
120+
top !== null &&
121+
typeof (top as Path).idx === "number" &&
122+
(top as Path).idx <= minimalAlternativesIndex
123+
) {
124+
possiblePaths.pop()
125+
}
126+
}
127+
continue
128+
}
129+
if (typeof currPath !== "object" || currPath === null) continue
130+
131+
const path = currPath as Path
132+
const currDef = path.def
133+
const currIdx = path.idx
134+
const currRuleStack = path.ruleStack
135+
const currOccurrenceStack = path.occurrenceStack
136+
if (currDef.length === 0) continue
137+
138+
const prod = currDef[0]
139+
if (prod === EXIT_NON_TERMINAL) {
140+
possiblePaths.push({
141+
idx: currIdx,
142+
def: currDef.slice(1),
143+
ruleStack: currRuleStack.slice(0, -1),
144+
occurrenceStack: currOccurrenceStack.slice(0, -1),
145+
} satisfies Path)
146+
} else if (prod instanceof Terminal) {
147+
if (currIdx < tokenVectorLength - 1) {
148+
const nextIdx = currIdx + 1
149+
const actualToken = tokenVector[nextIdx]
150+
if (tokMatcher(actualToken, prod.terminalType)) {
151+
possiblePaths.push({
152+
idx: nextIdx,
153+
def: currDef.slice(1),
154+
ruleStack: currRuleStack,
155+
occurrenceStack: currOccurrenceStack,
156+
} satisfies Path)
157+
}
158+
} else if (currIdx === tokenVectorLength - 1) {
159+
result.push({
160+
nextTokenType: prod.terminalType,
161+
nextTokenOccurrence: prod.idx,
162+
ruleStack: currRuleStack,
163+
occurrenceStack: currOccurrenceStack,
164+
})
165+
foundCompletePath = true
166+
}
167+
} else if (prod instanceof NonTerminal) {
168+
const newRuleStack = currRuleStack.slice()
169+
newRuleStack.push(prod.nonTerminalName)
170+
const newOccurrenceStack = currOccurrenceStack.slice()
171+
newOccurrenceStack.push(prod.idx)
172+
possiblePaths.push({
173+
idx: currIdx,
174+
def: concatDef(
175+
prod.definition as ProductionDef,
176+
EXIT_NON_TERMINAL_ARR as unknown[],
177+
currDef.slice(1),
178+
),
179+
ruleStack: newRuleStack,
180+
occurrenceStack: newOccurrenceStack,
181+
} satisfies Path)
182+
} else if (prod instanceof Option) {
183+
possiblePaths.push({
184+
idx: currIdx,
185+
def: currDef.slice(1),
186+
ruleStack: currRuleStack,
187+
occurrenceStack: currOccurrenceStack,
188+
} satisfies Path)
189+
possiblePaths.push(EXIT_ALTERNATIVE)
190+
possiblePaths.push({
191+
idx: currIdx,
192+
def: concatDef(prod.definition as ProductionDef, currDef.slice(1)),
193+
ruleStack: currRuleStack,
194+
occurrenceStack: currOccurrenceStack,
195+
} satisfies Path)
196+
} else if (prod instanceof RepetitionMandatory) {
197+
const secondIteration = new Repetition({
198+
definition: prod.definition,
199+
idx: prod.idx,
200+
})
201+
possiblePaths.push({
202+
idx: currIdx,
203+
def: concatDef(
204+
prod.definition as ProductionDef,
205+
[secondIteration],
206+
currDef.slice(1),
207+
),
208+
ruleStack: currRuleStack,
209+
occurrenceStack: currOccurrenceStack,
210+
} satisfies Path)
211+
} else if (prod instanceof RepetitionMandatoryWithSeparator) {
212+
const separatorGast = new Terminal({ terminalType: prod.separator })
213+
const secondIteration = new Repetition({
214+
definition: [separatorGast, ...prod.definition],
215+
idx: prod.idx,
216+
})
217+
possiblePaths.push({
218+
idx: currIdx,
219+
def: concatDef(
220+
prod.definition as ProductionDef,
221+
[secondIteration],
222+
currDef.slice(1),
223+
),
224+
ruleStack: currRuleStack,
225+
occurrenceStack: currOccurrenceStack,
226+
} satisfies Path)
227+
} else if (prod instanceof RepetitionWithSeparator) {
228+
possiblePaths.push({
229+
idx: currIdx,
230+
def: currDef.slice(1),
231+
ruleStack: currRuleStack,
232+
occurrenceStack: currOccurrenceStack,
233+
} satisfies Path)
234+
possiblePaths.push(EXIT_ALTERNATIVE)
235+
const separatorGast = new Terminal({ terminalType: prod.separator })
236+
const nthRepetition = new Repetition({
237+
definition: [separatorGast, ...prod.definition],
238+
idx: prod.idx,
239+
})
240+
possiblePaths.push({
241+
idx: currIdx,
242+
def: concatDef(
243+
prod.definition as ProductionDef,
244+
[nthRepetition],
245+
currDef.slice(1),
246+
),
247+
ruleStack: currRuleStack,
248+
occurrenceStack: currOccurrenceStack,
249+
} satisfies Path)
250+
} else if (prod instanceof Repetition) {
251+
possiblePaths.push({
252+
idx: currIdx,
253+
def: currDef.slice(1),
254+
ruleStack: currRuleStack,
255+
occurrenceStack: currOccurrenceStack,
256+
} satisfies Path)
257+
possiblePaths.push(EXIT_ALTERNATIVE)
258+
const nthRepetition = new Repetition({
259+
definition: prod.definition,
260+
idx: prod.idx,
261+
})
262+
possiblePaths.push({
263+
idx: currIdx,
264+
def: concatDef(
265+
prod.definition as ProductionDef,
266+
[nthRepetition],
267+
currDef.slice(1),
268+
),
269+
ruleStack: currRuleStack,
270+
occurrenceStack: currOccurrenceStack,
271+
} satisfies Path)
272+
} else if (prod instanceof Alternation) {
273+
for (let i = prod.definition.length - 1; i >= 0; i--) {
274+
const currAlt = prod.definition[i] as Alternative
275+
possiblePaths.push({
276+
idx: currIdx,
277+
def: concatDef(currAlt.definition as ProductionDef, currDef.slice(1)),
278+
ruleStack: currRuleStack,
279+
occurrenceStack: currOccurrenceStack,
280+
} satisfies Path)
281+
possiblePaths.push(EXIT_ALTERNATIVE)
282+
}
283+
} else if (prod instanceof Alternative) {
284+
possiblePaths.push({
285+
idx: currIdx,
286+
def: concatDef(prod.definition as ProductionDef, currDef.slice(1)),
287+
ruleStack: currRuleStack,
288+
occurrenceStack: currOccurrenceStack,
289+
} satisfies Path)
290+
} else if (prod instanceof Rule) {
291+
const newRuleStack = currRuleStack.slice()
292+
newRuleStack.push(prod.name)
293+
const newCurrOccurrenceStack = currOccurrenceStack.slice()
294+
newCurrOccurrenceStack.push(1)
295+
possiblePaths.push({
296+
idx: currIdx,
297+
def: prod.definition as ProductionDef,
298+
ruleStack: newRuleStack,
299+
occurrenceStack: newCurrOccurrenceStack,
300+
} satisfies Path)
301+
}
302+
}
303+
304+
return { suggestions: result, aborted: false }
305+
}
306+
307+
/**
308+
* Drop-in replacement for `parser.computeContentAssist` that aborts after a
309+
* fixed number of explored paths. Returns the partial set of suggestions
310+
* collected before abort (typically empty for inputs that trigger the
311+
* exponential blow-up) along with an `aborted` flag.
312+
*/
313+
export function computeContentAssistBudgeted(
314+
ruleName: string,
315+
tokens: IToken[],
316+
maxPaths: number = DEFAULT_MAX_PATHS,
317+
): BudgetedResult {
318+
const gast = parserInternals.gastProductionsCache[ruleName]
319+
if (!gast) {
320+
throw new Error(`Rule ->${ruleName}<- does not exist in this grammar.`)
321+
}
322+
return nextPossibleTokensAfterBudgeted(
323+
[gast],
324+
tokens,
325+
parserInternals.tokenMatcher,
326+
parserInternals.maxLookahead,
327+
maxPaths,
328+
)
329+
}

src/autocomplete/content-assist.ts

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
import { type ILexingError, IToken, TokenType } from "chevrotain"
2-
import { parser, parse as parseRaw } from "../parser/parser"
2+
import { parse as parseRaw } from "../parser/parser"
33
import { visitor } from "../parser/visitor"
44
import { QuestDBLexer } from "../parser/lexer"
55
import type { Statement } from "../parser/ast"
66
import {
77
IDENTIFIER_KEYWORD_TOKENS,
88
EXPRESSION_OPERATORS,
99
} from "./token-classification"
10+
import { computeContentAssistBudgeted } from "./budgeted-content-assist"
1011

1112
// =============================================================================
1213
// Constants
@@ -850,7 +851,9 @@ function computeSuggestions(tokens: IToken[]): ComputeResult {
850851
const ruleName = tokens.some((t) => t.tokenType.name === "Semicolon")
851852
? "statements"
852853
: "statement"
853-
const suggestions = parser.computeContentAssist(ruleName, tokens)
854+
// Budgeted: aborts after a fixed number of path-exploration steps.
855+
// The budget keeps autocomplete responsive and falls back to empty suggestions when hit.
856+
const { suggestions } = computeContentAssistBudgeted(ruleName, tokens)
854857
const result = suggestions.map((s) => s.nextTokenType)
855858

856859
// Walk every IdentifierKeyword path and union the category flags valid at
@@ -875,7 +878,10 @@ function computeSuggestions(tokens: IToken[]): ComputeResult {
875878
const collapsed = collapseTrailingQualifiedRef(tokens)
876879
if (collapsed) {
877880
try {
878-
const extra = parser.computeContentAssist(ruleName, collapsed)
881+
const extra = computeContentAssistBudgeted(
882+
ruleName,
883+
collapsed,
884+
).suggestions
879885
const seen = new Set(result.map((t) => t.name))
880886
for (const s of extra) {
881887
if (!seen.has(s.nextTokenType.name)) {

0 commit comments

Comments
 (0)