11import { ExternalTokenizer , InputStream , Stack } from "@lezer/lr" ;
2- import { negate , openParen , closeParen , word , or , ParenExpr } from "./parser.terms" ;
2+ import { negate , openParen , closeParen , word , or , Dialect_regex } from "./parser.terms" ;
33
44// Character codes
55const SPACE = 32 ;
@@ -243,9 +243,14 @@ function isInsideParenExpr(input: InputStream, stack: Stack): boolean {
243243 * This allows words like "(pr" or "func(arg)" to be parsed as single terms
244244 * while "(foo bar)" is parsed as a ParenExpr.
245245 */
246- export const parenToken = new ExternalTokenizer ( ( input ) => {
246+ export const parenToken = new ExternalTokenizer ( ( input , stack ) => {
247247 if ( input . next !== OPEN_PAREN ) return ;
248-
248+
249+ // In regex mode, parens are just word characters — don't emit openParen
250+ if ( stack . dialectEnabled ( Dialect_regex ) ) {
251+ return ;
252+ }
253+
249254 if ( hasBalancedParensAt ( input , 0 ) ) {
250255 // Found balanced parens - emit openParen (just the '(')
251256 input . advance ( ) ;
@@ -263,6 +268,11 @@ export const parenToken = new ExternalTokenizer((input) => {
263268export const closeParenToken = new ExternalTokenizer ( ( input , stack ) => {
264269 if ( input . next !== CLOSE_PAREN ) return ;
265270
271+ // In regex mode, parens are just word characters — don't emit closeParen
272+ if ( stack . dialectEnabled ( Dialect_regex ) ) {
273+ return ;
274+ }
275+
266276 // Check if we should emit closeParen (when inside a ParenExpr)
267277 if ( isInsideParenExpr ( input , stack ) ) {
268278 input . advance ( ) ;
@@ -312,7 +322,20 @@ export const wordToken = new ExternalTokenizer((input, stack) => {
312322 if ( startsWithPrefix ( input ) ) {
313323 return ;
314324 }
315-
325+
326+ // In regex mode: consume all non-whitespace characters as a single word.
327+ // Parens and | are valid regex metacharacters, not query syntax in this mode.
328+ if ( stack . dialectEnabled ( Dialect_regex ) ) {
329+ const startPos = input . pos ;
330+ while ( input . next !== EOF && ! isWhitespace ( input . next ) ) {
331+ input . advance ( ) ;
332+ }
333+ if ( input . pos > startPos ) {
334+ input . acceptToken ( word ) ;
335+ }
336+ return ;
337+ }
338+
316339 // If starts with '(' and has balanced parens, determine whether this is a
317340 // regex alternation value (e.g. file:(test|spec)) or a ParenExpr grouping.
318341 // We're in a value context when the immediately preceding non-whitespace char
@@ -419,24 +442,28 @@ export const orToken = new ExternalTokenizer((input) => {
419442 * External tokenizer for negation.
420443 * Only tokenizes `-` as negate when followed by a prefix keyword or balanced `(`.
421444 */
422- export const negateToken = new ExternalTokenizer ( ( input ) => {
445+ export const negateToken = new ExternalTokenizer ( ( input , stack ) => {
423446 if ( input . next !== DASH ) return ;
424-
447+
425448 // Look ahead using peek to see what follows the dash (skipping whitespace)
426449 let offset = 1 ;
427450 while ( isWhitespace ( input . peek ( offset ) ) ) {
428451 offset ++ ;
429452 }
430-
453+
431454 const chAfterDash = input . peek ( offset ) ;
432-
433- // Check if followed by opening paren that starts a balanced ParenExpr
434- if ( chAfterDash === OPEN_PAREN && hasBalancedParensAt ( input , offset ) ) {
435- input . advance ( ) ;
436- input . acceptToken ( negate ) ;
437- return ;
455+
456+ // In normal mode: also check for balanced paren (negated group e.g. -(foo bar))
457+ // In regex mode: skip this — parens are not query grouping operators, so emitting
458+ // negate before a '(' would leave the parser without a matching ParenExpr to parse.
459+ if ( ! stack . dialectEnabled ( Dialect_regex ) ) {
460+ if ( chAfterDash === OPEN_PAREN && hasBalancedParensAt ( input , offset ) ) {
461+ input . advance ( ) ;
462+ input . acceptToken ( negate ) ;
463+ return ;
464+ }
438465 }
439-
466+
440467 // Check if followed by a prefix keyword (by checking for keyword followed by colon)
441468 let foundColon = false ;
442469 let peekOffset = offset ;
0 commit comments