Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed
- Fixed Ask GitHub landing page chat box placement to be centered on the page instead of at the bottom. [#1046](https://github.com/sourcebot-dev/sourcebot/pull/1046)
- Fixed issue where local git connections (`file://`) would fail when matching a file instead of a directory. [#1049](https://github.com/sourcebot-dev/sourcebot/pull/1049)
- Fixed regex queries containing parentheses (e.g. `(test|render)<`) being incorrectly split into multiple search terms instead of treated as a single regex pattern. [#1050](https://github.com/sourcebot-dev/sourcebot/pull/1050)

## [4.16.2] - 2026-03-25

Expand Down
3 changes: 2 additions & 1 deletion packages/queryLanguage/src/parser.terms.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,5 @@ export const
RepoSetExpr = 16,
ParenExpr = 17,
QuotedTerm = 18,
Term = 19
Term = 19,
Dialect_regex = 0
1 change: 1 addition & 0 deletions packages/queryLanguage/src/parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ export const parser = LRParser.deserialize({
tokenData: "/U~R_XY!QYZ!Qpq!Qrs!`#T#U$S#V#W%i#Y#Z'R#`#a(_#b#c(|#c#d)X#d#e)p#f#g+]#g#h,w#j#k-`#m#n.s~!VRm~XY!QYZ!Qpq!Q~!cWOY!`Zr!`rs!{s#O!`#O#P#Q#P;'S!`;'S;=`#|<%lO!`~#QOw~~#TRO;'S!`;'S;=`#^;=`O!`~#aXOY!`Zr!`rs!{s#O!`#O#P#Q#P;'S!`;'S;=`#|;=`<%l!`<%lO!`~$PP;=`<%l!`~$VQ#b#c$]#f#g$h~$`P#m#n$c~$hO!R~~$kP#V#W$n~$qP#[#]$t~$wP#]#^$z~$}P#j#k%Q~%TP#X#Y%W~%ZP#W#X%^~%aP![!]%d~%iOq~~%lQ![!]%r#c#d%w~%wOx~~%zP#b#c%}~&QP#h#i&T~&WP#X#Y&Z~&^Q#b#c&d#l#m&p~&gP#h#i&j~&mP![!]%r~&sP#h#i&v~&yP![!]&|~'ROy~~'UR![!]'_#]#^'d#c#d'v~'dOz~~'gP#`#a'j~'mP#X#Y'p~'sP![!]'_~'yP#f#g'|~(PP#_#`(S~(VP![!](Y~(_O{~~(bP#T#U(e~(hP#b#c(k~(nP#Z#[(q~(tP![!](w~(|O!T~~)PP#c#d)S~)XOs~~)[P#b#c)_~)bP#`#a)e~)hP#m#n)k~)pOt~~)sQ#f#g)y#i#j*n~)|P#]#^*P~*SP#j#k*V~*YP#T#U*]~*`P#h#i*c~*fP#X#Y*i~*nO!Q~~*qP#U#V*t~*wP#`#a*z~*}P#]#^+Q~+TP#V#W+W~+]O!P~~+`Q![!]+f#X#Y+k~+kO!S~~+nQ#d#e+t#j#k,l~+wP#c#d+z~+}Q![!]+f#g#h,T~,WP#X#Y,Z~,^P#h#i,a~,dP![!],g~,lO!V~~,oP![!],r~,wOu~~,zP#m#n,}~-QP#a#b-T~-WP![!]-Z~-`O!U~~-cP#]#^-f~-iP#g#h-l~-oP#]#^-r~-uP#U#V-x~-{P#]#^.O~.RP#`#a.U~.XP#]#^.[~._P#h#i.b~.eP#m#n.h~.kP![!].n~.sO}~~.vP#X#Y.y~.|P#g#h/P~/UOr~",
tokenizers: [negateToken, parenToken, wordToken, closeParenToken, orToken, 0],
topRules: {"Program":[0,1]},
dialects: {regex: 0},
tokenPrec: 200,
termNames: {"0":"⚠","1":"@top","2":"OrExpr","3":"AndExpr","4":"NegateExpr","5":"PrefixExpr","6":"ArchivedExpr","7":"RevisionExpr","8":"ContentExpr","9":"ContextExpr","10":"FileExpr","11":"ForkExpr","12":"VisibilityExpr","13":"RepoExpr","14":"LangExpr","15":"SymExpr","16":"RepoSetExpr","17":"ParenExpr","18":"QuotedTerm","19":"Term","20":"expr+","21":"(or andExpr)+","22":"␄","23":"negate","24":"openParen","25":"word","26":"closeParen","27":"or","28":"%mainskip","29":"space","30":"query","31":"andExpr","32":"expr","33":"archivedKw","34":"\"yes\"","35":"\"no\"","36":"\"only\"","37":"revisionKw","38":"value","39":"quotedString","40":"contentKw","41":"contextKw","42":"fileKw","43":"forkKw","44":"forkValue","45":"visibilityKw","46":"visibilityValue","47":"\"public\"","48":"\"private\"","49":"\"any\"","50":"repoKw","51":"langKw","52":"symKw","53":"reposetKw"}
})
2 changes: 2 additions & 0 deletions packages/queryLanguage/src/query.grammar
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
@external tokens closeParenToken from "./tokens" { closeParen }
@external tokens orToken from "./tokens" { or }

@dialects { regex }

@top Program { query }

@precedence {
Expand Down
55 changes: 41 additions & 14 deletions packages/queryLanguage/src/tokens.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { ExternalTokenizer, InputStream, Stack } from "@lezer/lr";
import { negate, openParen, closeParen, word, or, ParenExpr } from "./parser.terms";
import { negate, openParen, closeParen, word, or, Dialect_regex } from "./parser.terms";

// Character codes
const SPACE = 32;
Expand Down Expand Up @@ -243,9 +243,14 @@ function isInsideParenExpr(input: InputStream, stack: Stack): boolean {
* This allows words like "(pr" or "func(arg)" to be parsed as single terms
* while "(foo bar)" is parsed as a ParenExpr.
*/
export const parenToken = new ExternalTokenizer((input) => {
export const parenToken = new ExternalTokenizer((input, stack) => {
if (input.next !== OPEN_PAREN) return;


// In regex mode, parens are just word characters — don't emit openParen
if (stack.dialectEnabled(Dialect_regex)) {
return;
}

if (hasBalancedParensAt(input, 0)) {
// Found balanced parens - emit openParen (just the '(')
input.advance();
Expand All @@ -263,6 +268,11 @@ export const parenToken = new ExternalTokenizer((input) => {
export const closeParenToken = new ExternalTokenizer((input, stack) => {
if (input.next !== CLOSE_PAREN) return;

// In regex mode, parens are just word characters — don't emit closeParen
if (stack.dialectEnabled(Dialect_regex)) {
return;
}

// Check if we should emit closeParen (when inside a ParenExpr)
if (isInsideParenExpr(input, stack)) {
input.advance();
Expand Down Expand Up @@ -312,7 +322,20 @@ export const wordToken = new ExternalTokenizer((input, stack) => {
if (startsWithPrefix(input)) {
return;
}


// In regex mode: consume all non-whitespace characters as a single word.
// Parens and | are valid regex metacharacters, not query syntax in this mode.
if (stack.dialectEnabled(Dialect_regex)) {
const startPos = input.pos;
while (input.next !== EOF && !isWhitespace(input.next)) {
input.advance();
}
if (input.pos > startPos) {
input.acceptToken(word);
}
return;
}

// If starts with '(' and has balanced parens, determine whether this is a
// regex alternation value (e.g. file:(test|spec)) or a ParenExpr grouping.
// We're in a value context when the immediately preceding non-whitespace char
Expand Down Expand Up @@ -419,24 +442,28 @@ export const orToken = new ExternalTokenizer((input) => {
* External tokenizer for negation.
* Only tokenizes `-` as negate when followed by a prefix keyword or balanced `(`.
*/
export const negateToken = new ExternalTokenizer((input) => {
export const negateToken = new ExternalTokenizer((input, stack) => {
if (input.next !== DASH) return;

// Look ahead using peek to see what follows the dash (skipping whitespace)
let offset = 1;
while (isWhitespace(input.peek(offset))) {
offset++;
}

const chAfterDash = input.peek(offset);

// Check if followed by opening paren that starts a balanced ParenExpr
if (chAfterDash === OPEN_PAREN && hasBalancedParensAt(input, offset)) {
input.advance();
input.acceptToken(negate);
return;

// In normal mode: also check for balanced paren (negated group e.g. -(foo bar))
// In regex mode: skip this — parens are not query grouping operators, so emitting
// negate before a '(' would leave the parser without a matching ParenExpr to parse.
if (!stack.dialectEnabled(Dialect_regex)) {
if (chAfterDash === OPEN_PAREN && hasBalancedParensAt(input, offset)) {
input.advance();
input.acceptToken(negate);
return;
}
}

// Check if followed by a prefix keyword (by checking for keyword followed by colon)
let foundColon = false;
let peekOffset = offset;
Expand Down
15 changes: 15 additions & 0 deletions packages/queryLanguage/test/grammar.regex.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import { parser as _parser } from "../src/parser";
import { fileTests } from "@lezer/generator/dist/test";
import { describe, it } from "vitest";
import { fileURLToPath } from "url";
import * as fs from "fs";
import * as path from "path";

const regexParser = _parser.configure({ dialect: "regex" });
const caseDir = path.dirname(fileURLToPath(import.meta.url));

describe("regex", () => {
for (const { name, run } of fileTests(fs.readFileSync(path.join(caseDir, "regex.txt"), "utf8"), "regex.txt")) {
it(name, () => run(regexParser));
}
});
4 changes: 4 additions & 0 deletions packages/queryLanguage/test/grammar.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ for (const file of fs.readdirSync(caseDir)) {
if (!/\.txt$/.test(file)) {
continue;
}
// regex.txt is tested separately in grammar.regex.test.ts using the regex dialect parser
if (file === "regex.txt") {
continue;
}

let name = /^[^\.]*/.exec(file)?.[0];
describe(name ?? "unknown", () => {
Expand Down
84 changes: 84 additions & 0 deletions packages/queryLanguage/test/regex.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# Regex alternation in parens followed by literal char
(test|render)<
==>
Program(Term)

# Pipe character in a term (no parens)
test|render
==>
Program(Term)

# Parens with alternation — no trailing char
(foo|bar)
==>
Program(Term)

# Parens with alternation and trailing chars
(foo|bar)baz
==>
Program(Term)

# Complex regex: quantifiers, anchors, character classes
^foo.*bar$
==>
Program(Term)

# Character class
[abc]+
==>
Program(Term)

# Regex with escaped paren
func\(arg\)
==>
Program(Term)

# Two regex terms joined with OR keyword
(test|render) or (foo|bar)
==>
Program(OrExpr(Term,Term))

# Two regex terms implicitly ANDed
(test|render) (foo|bar)
==>
Program(AndExpr(Term,Term))

# File prefix still works in regex mode
file:test.js
==>
Program(PrefixExpr(FileExpr))

# Prefix filter combined with regex term
file:test.js (test|render)<
==>
Program(AndExpr(PrefixExpr(FileExpr),Term))

# Negation of prefix still works in regex mode
-file:test.js
==>
Program(NegateExpr(PrefixExpr(FileExpr)))

# Quoted string still works in regex mode
"(test|render)"
==>
Program(QuotedTerm)

# Multiple prefix filters with regex term
file:test.js lang:TypeScript (render|mount)
==>
Program(AndExpr(PrefixExpr(FileExpr),PrefixExpr(LangExpr),Term))

# Dash without prefix is a plain word (not negation)
-pattern
==>
Program(Term)

# 'or' at start of input is a plain word
or
==>
Program(Term)

# Regex with pipe at top level between prefix and term
repo:myorg (init|setup)
==>
Program(AndExpr(PrefixExpr(RepoExpr),Term))
11 changes: 10 additions & 1 deletion packages/web/src/features/search/parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,13 @@ const parser = _parser.configure({
strict: true,
});

// In regex mode, parens and | are regex metacharacters, not query grouping operators.
// The "regex" dialect makes the tokenizer treat them as plain word characters.
const regexParser = _parser.configure({
strict: true,
dialect: "regex",
});

type ArchivedValue = 'yes' | 'no' | 'only';
type VisibilityValue = 'public' | 'private' | 'any';
type ForkValue = 'yes' | 'no' | 'only';
Expand Down Expand Up @@ -82,7 +89,9 @@ export const parseQuerySyntaxIntoIR = async ({

try {
// First parse the query into a Lezer tree.
const tree = parser.parse(query);
// In regex mode, use the regex dialect so parens/| are treated as word characters.
const activeParser = (options.isRegexEnabled ?? false) ? regexParser : parser;
const tree = activeParser.parse(query);

// Then transform the tree into the intermediate representation.
return transformTreeToIR({
Expand Down
Loading