Skip to content

Commit 226fa9c

Browse files
committed
properly quote regexp filters in tool calls if it includes a parenthesis
1 parent 6f22ffa commit 226fa9c

6 files changed

Lines changed: 44 additions & 7 deletions

File tree

packages/mcp/src/index.ts

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
// Entry point for the MCP server
44
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
55
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
6+
import { preprocessRegexp } from '@sourcebot/shared';
67
import escapeStringRegexp from 'escape-string-regexp';
78
import { z } from 'zod';
89
import { listRepos, search, getFileSource } from './client.js';
@@ -25,7 +26,8 @@ server.tool(
2526
If you receive an error that indicates that you're not authenticated, please inform the user to set the SOURCEBOT_API_KEY environment variable.
2627
If the \`includeCodeSnippets\` property is true, code snippets containing the matches will be included in the response. Only set this to true if the request requires code snippets (e.g., show me examples where library X is used).
2728
When referencing a file in your response, **ALWAYS** include the file's external URL as a link. This makes it easier for the user to view the file, even if they don't have it locally checked out.
28-
**ONLY USE** the \`filterByRepoIds\` property if the request requires searching a specific repo(s). Otherwise, leave it empty.`,
29+
**ONLY USE** the \`filterByRepoIds\` property if the request requires searching a specific repo(s). Otherwise, leave it empty.
30+
If the request is asking to search for a specific file or results for files in a specific file path, **YOU MUST** ensure that the \`filterByFile\` property is used.`,
2931
{
3032
query: z
3133
.string()
@@ -41,6 +43,10 @@ server.tool(
4143
.array(z.string())
4244
.describe(`Scope the search to the provided languages. The language MUST be formatted as a GitHub linguist language. Examples: Python, JavaScript, TypeScript, Java, C#, C++, PHP, Go, Rust, Ruby, Swift, Kotlin, Shell, C, Dart, HTML, CSS, PowerShell, SQL, R`)
4345
.optional(),
46+
filterByFile: z
47+
.array(z.string())
48+
.describe("Scope the search to results inside filepaths that match the provided regex expression. By default all files are searched, so **only use this filter if you need to filter on specific files**. **YOU MUST** ensure that this is a valid regex expression and any special characters are properly escaped. If the regex expresion includes a paranthesis **YOU MUST** wrap this value in quotes when passing it in.")
49+
.optional(),
4450
caseSensitive: z
4551
.boolean()
4652
.describe(`Whether the search should be case sensitive (default: false).`)
@@ -58,6 +64,7 @@ server.tool(
5864
query,
5965
filterByRepoIds: repoIds = [],
6066
filterByLanguages: languages = [],
67+
filterByFile: filePath = [],
6168
maxTokens = env.DEFAULT_MINIMUM_TOKENS,
6269
includeCodeSnippets = false,
6370
caseSensitive = false,
@@ -70,6 +77,11 @@ server.tool(
7077
query += ` ( lang:${languages.join(' or lang:')} )`;
7178
}
7279

80+
if (filePath.length > 0) {
81+
const quotedFilters = filePath.map(preprocessRegexp);
82+
query += ` ( file:${quotedFilters.join(' or file:')} )`;
83+
}
84+
7385
const response = await search({
7486
query,
7587
matches: env.DEFAULT_MATCHES,

packages/shared/src/index.client.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,7 @@ export {
44
} from "./env.client.js";
55
export {
66
SOURCEBOT_VERSION,
7-
} from "./version.js";
7+
} from "./version.js";
8+
export {
9+
preprocessRegexp,
10+
} from "./query.js";

packages/shared/src/index.server.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,4 +49,7 @@ export {
4949
} from "./db.js";
5050
export {
5151
SOURCEBOT_VERSION,
52-
} from "./version.js";
52+
} from "./version.js";
53+
export {
54+
preprocessRegexp,
55+
} from "./query.js";

packages/shared/src/query.ts

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
/**
2+
* Wraps a value in quotes if it contains parentheses and isn't already quoted.
3+
*
4+
* This is needed because the query language does not allow values to include parenthesis unless they're quoted. This is
5+
* due to the ParenExpr symbol which parses on these parenthesis. We instruct the agent to wrap the regexp in quotes
6+
* but it's flaky, so we due it here as well as a backup plan to ensure the parser doesn't fail.
7+
*/
8+
export const preprocessRegexp = (value: string): string => {
9+
const hasParentheses = value.includes('(') || value.includes(')');
10+
const isAlreadyQuoted = value.startsWith('"') && value.endsWith('"');
11+
if (hasParentheses && !isAlreadyQuoted) {
12+
return `"${value}"`;
13+
}
14+
return value;
15+
};

packages/web/src/features/chat/tools.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,8 @@ export type ReadFilesToolUIPart = ToolUIPart<{ [toolNames.readFiles]: ReadFilesT
140140

141141
export const createCodeSearchTool = (selectedRepos: string[]) => tool({
142142
description: `Fetches code that matches the provided regex pattern in \`query\`. This is NOT a semantic search.
143-
Results are returned as an array of matching files, with the file's URL, repository, and language.`,
143+
Results are returned as an array of matching files, with the file's URL, repository, and language.
144+
If the request is asking to search for a file, or asking to only search for results within a specific filepath, **YOU MUST** use the fileNamesFilterRegexp to properly fulfil this request.`,
144145
inputSchema: z.object({
145146
queryRegexp: z
146147
.string()
@@ -168,7 +169,7 @@ Multiple expressions can be or'd together with or, negated with -, or grouped wi
168169
.optional(),
169170
fileNamesFilterRegexp: z
170171
.array(z.string())
171-
.describe(`Filter results from filepaths that match the regex. When this option is not specified, all files are searched.`)
172+
.describe(`Filter results from filepaths that match the regex. When this option is not specified, all files are searched. If the regex expresion includes a paranthesis **YOU MUST** wrap this value in quotes when passing it in.`)
172173
.optional(),
173174
limit: z.number().default(10).describe("Maximum number of matches to return (default: 100)"),
174175
}),

packages/web/src/features/chat/utils.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { CreateUIMessage, TextUIPart, UIMessagePart } from "ai";
22
import { Descendant, Editor, Point, Range, Transforms } from "slate";
3+
import { preprocessRegexp } from "@sourcebot/shared/client";
34
import { ANSWER_TAG, FILE_REFERENCE_PREFIX, FILE_REFERENCE_REGEX } from "./constants";
45
import {
56
CustomEditor,
@@ -358,11 +359,13 @@ export const buildSearchQuery = (options: {
358359
}
359360

360361
if (fileNamesFilterRegexp && fileNamesFilterRegexp.length > 0) {
361-
query += ` ( file:${fileNamesFilterRegexp.join(' or file:')} )`;
362+
const quotedFilters = fileNamesFilterRegexp.map(preprocessRegexp);
363+
query += ` ( file:${quotedFilters.join(' or file:')} )`;
362364
}
363365

364366
if (repoNamesFilterRegexp && repoNamesFilterRegexp.length > 0) {
365-
query += ` ( repo:${repoNamesFilterRegexp.join(' or repo:')} )`;
367+
const quotedFilters = repoNamesFilterRegexp.map(preprocessRegexp);
368+
query += ` ( repo:${quotedFilters.join(' or repo:')} )`;
366369
}
367370

368371
return query;

0 commit comments

Comments
 (0)