Skip to content

Commit 83b5c2d

Browse files
wip: use external url for search as well
1 parent 3441ab7 commit 83b5c2d

8 files changed

Lines changed: 67 additions & 60 deletions

File tree

packages/mcp/src/index.ts

Lines changed: 38 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,13 @@
33
// Entry point for the MCP server
44
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
55
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
6+
import _dedent from "dedent";
67
import escapeStringRegexp from 'escape-string-regexp';
78
import { z } from 'zod';
8-
import { getFileSource, listRepos, search, listCommits } from './client.js';
9+
import { getFileSource, listCommits, listRepos, search } from './client.js';
910
import { env, numberSchema } from './env.js';
1011
import { fileSourceRequestSchema, listCommitsQueryParamsSchema, listReposQueryParamsSchema } from './schemas.js';
1112
import { FileSourceRequest, ListCommitsQueryParamsSchema, ListReposQueryParams, TextContent } from './types.js';
12-
import _dedent from "dedent";
13-
import { addLineNumbers } from './utils.js';
1413

1514
const dedent = _dedent.withOptions({ alignValues: true });
1615

@@ -24,39 +23,41 @@ const server = new McpServer({
2423
server.tool(
2524
"search_code",
2625
dedent`
27-
Fetches code that matches the provided regex pattern in \`query\`.
28-
29-
Results are returned as an array of matching files, with the file's URL, repository, and language.
30-
31-
If the \`includeCodeSnippets\` property is true, code snippets containing the matches will be included in the response. Only set this to true if the request requires code snippets (e.g., show me examples where library X is used).
32-
When referencing a file in your response, **ALWAYS** include the file's external URL as a link. This makes it easier for the user to view the file, even if they don't have it locally checked out.
33-
**ONLY USE** the \`filterByRepoIds\` property if the request requires searching a specific repo(s). Otherwise, leave it empty.`,
26+
Searches for code that matches the provided search query as a substring by default, or as a regular expression if useRegex is true. Useful for exploring remote repositories by searching for exact symbols, functions, variables, or specific code patterns. To determine if a repository is indexed, use the \`list_repos\` tool. By default, searches are global and will search the default branch of all repositories. Searches can be scoped to specific repositories, languages, and branches. When referencing code outputted by this tool, always include the file's external URL as a link. This makes it easier for the user to view the file, even if they don't have it locally checked out.
27+
`,
3428
{
3529
query: z
3630
.string()
37-
.describe(`The regex pattern to search for. RULES:
38-
1. When a regex special character needs to be escaped, ALWAYS use a single backslash (\) (e.g., 'console\.log')
39-
2. **ALWAYS** escape spaces with a single backslash (\) (e.g., 'console\ log')
40-
`),
41-
filterByRepoIds: z
31+
.describe(`The search pattern to match against code contents. Do not escape quotes in your query.`)
32+
// Wrap in quotes so the query is treated as a literal phrase (like grep).
33+
.transform((val) => `"${val.replace(/"/g, '\\"')}"`),
34+
useRegex: z
35+
.boolean()
36+
.describe(`Whether to use regular expression matching to match the search query against code contents. When false, substring matching is used. (default: false)`)
37+
.optional(),
38+
filterByRepos: z
4239
.array(z.string())
43-
.describe(`Scope the search to the provided repositories to the Sourcebot compatible repository IDs. **DO NOT** use this property if you want to search all repositories. **YOU MUST** call 'list_repos' first to obtain the exact repository ID.`)
40+
.describe(`Scope the search to the provided repositories.`)
4441
.optional(),
4542
filterByLanguages: z
4643
.array(z.string())
47-
.describe(`Scope the search to the provided languages. The language MUST be formatted as a GitHub linguist language. Examples: Python, JavaScript, TypeScript, Java, C#, C++, PHP, Go, Rust, Ruby, Swift, Kotlin, Shell, C, Dart, HTML, CSS, PowerShell, SQL, R`)
44+
.describe(`Scope the search to the provided languages.`)
45+
.optional(),
46+
filterByFilepaths: z
47+
.array(z.string())
48+
.describe(`Scope the search to the provided filepaths. Interpretted as a regex.`)
4849
.optional(),
4950
caseSensitive: z
5051
.boolean()
5152
.describe(`Whether the search should be case sensitive (default: false).`)
5253
.optional(),
5354
includeCodeSnippets: z
5455
.boolean()
55-
.describe(`Whether to include the code snippets in the response (default: false). If false, only the file's URL, repository, and language will be returned. Set to false to get a more concise response.`)
56+
.describe(`Whether to include the code snippets in the response. If false, only the file's URL, repository, and language will be returned. (default: false)`)
5657
.optional(),
57-
gitRevision: z
58+
ref: z
5859
.string()
59-
.describe(`The git revision to search in (e.g., 'main', 'HEAD', 'v1.0.0', 'a1b2c3d'). If not provided, defaults to the default branch (usually 'main' or 'master').`)
60+
.describe(`Commit SHA, branch or tag name to search on. If not provided, defaults to the default branch (usually 'main' or 'master').`)
6061
.optional(),
6162
maxTokens: numberSchema
6263
.describe(`The maximum number of tokens to return (default: ${env.DEFAULT_MINIMUM_TOKENS}). Higher values provide more context but consume more tokens. Values less than ${env.DEFAULT_MINIMUM_TOKENS} will be ignored.`)
@@ -65,30 +66,36 @@ server.tool(
6566
},
6667
async ({
6768
query,
68-
filterByRepoIds: repoIds = [],
69+
filterByRepos: repos = [],
6970
filterByLanguages: languages = [],
71+
filterByFilepaths: filepaths = [],
7072
maxTokens = env.DEFAULT_MINIMUM_TOKENS,
7173
includeCodeSnippets = false,
7274
caseSensitive = false,
73-
gitRevision,
75+
ref,
76+
useRegex = false,
7477
}) => {
75-
if (repoIds.length > 0) {
76-
query += ` ( repo:${repoIds.map(id => escapeStringRegexp(id)).join(' or repo:')} )`;
78+
if (repos.length > 0) {
79+
query += ` (repo:${repos.map(id => escapeStringRegexp(id)).join(' or repo:')})`;
7780
}
7881

7982
if (languages.length > 0) {
80-
query += ` ( lang:${languages.join(' or lang:')} )`;
83+
query += ` (lang:${languages.join(' or lang:')})`;
84+
}
85+
86+
if (filepaths.length > 0) {
87+
query += ` (file:${filepaths.map(filepath => escapeStringRegexp(filepath)).join(' or file:')})`;
8188
}
8289

83-
if (gitRevision) {
84-
query += ` ( rev:${gitRevision} )`;
90+
if (ref) {
91+
query += ` ( rev:${ref} )`;
8592
}
8693

8794
const response = await search({
8895
query,
8996
matches: env.DEFAULT_MATCHES,
9097
contextLines: env.DEFAULT_CONTEXT_LINES,
91-
isRegexEnabled: true,
98+
isRegexEnabled: useRegex,
9299
isCaseSensitivityEnabled: caseSensitive,
93100
source: 'mcp',
94101
});
@@ -111,11 +118,10 @@ server.tool(
111118
(acc, chunk) => acc + chunk.matchRanges.length,
112119
0,
113120
);
114-
const fileIdentifier = file.webUrl ?? file.fileName.text;
115121
let text = dedent`
116-
file: ${fileIdentifier}
122+
file: ${file.webUrl}
117123
num_matches: ${numMatches}
118-
repository: ${file.repository}
124+
repo: ${file.repository}
119125
language: ${file.language}
120126
`;
121127

@@ -219,7 +225,7 @@ server.tool(
219225
return {
220226
content: [{
221227
type: "text", text: JSON.stringify({
222-
source: addLineNumbers(response.source),
228+
source: response.source,
223229
language: response.language,
224230
path: response.path,
225231
url: response.webUrl,

packages/mcp/src/schemas.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,8 @@ export const searchResponseSchema = z.object({
124124
// Any matching ranges
125125
matchRanges: z.array(rangeSchema),
126126
}),
127-
webUrl: z.string().optional(),
127+
webUrl: z.string(),
128+
externalWebUrl: z.string().optional(),
128129
repository: z.string(),
129130
repositoryId: z.number(),
130131
language: z.string(),

packages/mcp/src/utils.ts

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,3 @@ export class ServiceErrorException extends Error {
1414
super(JSON.stringify(serviceError));
1515
}
1616
}
17-
18-
export const addLineNumbers = (source: string, lineOffset = 1) => {
19-
return source.split('\n').map((line, index) => `${index + lineOffset}:${line}`).join('\n');
20-
}

packages/web/src/app/[domain]/search/components/codePreviewPanel/index.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ export const CodePreviewPanel = ({
4242
content: data.source,
4343
filepath: previewedFile.fileName.text,
4444
matches: previewedFile.chunks,
45-
link: previewedFile.webUrl,
45+
link: previewedFile.externalWebUrl,
4646
language: previewedFile.language,
4747
revision: branch ?? "HEAD",
4848
};

packages/web/src/features/codeNav/types.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ export const findRelatedSymbolsResponseSchema = z.object({
2525
fileName: z.string(),
2626
repository: z.string(),
2727
repositoryId: z.number(),
28-
webUrl: z.string().optional(),
28+
webUrl: z.string(),
2929
language: z.string(),
3030
matches: z.array(z.object({
3131
lineContent: z.string(),

packages/web/src/features/search/fileSourceApi.ts

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,20 +7,13 @@ import { sew } from "@/actions";
77
import { withOptionalAuthV2 } from "@/withAuthV2";
88
import { QueryIR } from './ir';
99
import escapeStringRegexp from "escape-string-regexp";
10-
import { SINGLE_TENANT_ORG_DOMAIN } from '@/lib/constants';
11-
import { getBrowsePath } from '@/app/[domain]/browse/hooks/utils';
12-
import { headers } from 'next/headers';
13-
import { getBaseUrl } from '@/lib/utils.server';
1410

1511
// @todo (bkellam) #574 : We should really be using `git show <hash>:<path>` to fetch file contents here.
1612
// This will allow us to support permalinks to files at a specific revision that may not be indexed
1713
// by zoekt. We should also refactor this out of the /search folder.
1814

1915
export const getFileSource = async ({ fileName, repository, branch }: FileSourceRequest): Promise<FileSourceResponse | ServiceError> => sew(() =>
2016
withOptionalAuthV2(async () => {
21-
const headersList = await headers();
22-
const baseUrl = getBaseUrl(headersList);
23-
2417
const query: QueryIR = {
2518
and: {
2619
children: [
@@ -85,14 +78,8 @@ export const getFileSource = async ({ fileName, repository, branch }: FileSource
8578
repoDisplayName: repoInfo.displayName,
8679
repoExternalWebUrl: repoInfo.webUrl,
8780
branch,
88-
webUrl: `${baseUrl}${getBrowsePath({
89-
repoName: repository,
90-
revisionName: branch,
91-
path: fileName,
92-
pathType: 'blob',
93-
domain: SINGLE_TENANT_ORG_DOMAIN,
94-
})}`,
95-
externalWebUrl: file.webUrl,
81+
webUrl: file.webUrl,
82+
externalWebUrl: file.externalWebUrl,
9683
} satisfies FileSourceResponse;
9784

9885
}));

packages/web/src/features/search/types.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,8 @@ export const searchFileSchema = z.object({
6363
// Any matching ranges
6464
matchRanges: z.array(rangeSchema),
6565
}),
66-
webUrl: z.string().optional(),
66+
webUrl: z.string(),
67+
externalWebUrl: z.string().optional(),
6768
repository: z.string(),
6869
repositoryId: z.number(),
6970
language: z.string(),

packages/web/src/features/search/zoektSearcher.ts

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { getCodeHostBrowseFileAtBranchUrl } from "@/lib/utils";
2-
import { unexpectedError } from "@/lib/serviceError";
2+
import { ServiceErrorException, unexpectedError } from "@/lib/serviceError";
33
import type { ProtoGrpcType } from '@/proto/webserver';
44
import { FileMatch__Output as ZoektGrpcFileMatch } from "@/proto/zoekt/webserver/v1/FileMatch";
55
import { FlushReason as ZoektGrpcFlushReason } from "@/proto/zoekt/webserver/v1/FlushReason";
@@ -18,6 +18,10 @@ import path from 'path';
1818
import { isBranchQuery, QueryIR, someInQueryIR } from './ir';
1919
import { RepositoryInfo, SearchResponse, SearchResultFile, SearchStats, SourceRange, StreamedSearchErrorResponse, StreamedSearchResponse } from "./types";
2020
import { captureEvent } from "@/lib/posthog";
21+
import { getBrowsePath } from "@/app/[domain]/browse/hooks/utils";
22+
import { SINGLE_TENANT_ORG_DOMAIN } from "@/lib/constants";
23+
import { headers } from "next/headers";
24+
import { getBaseUrl } from "@/lib/utils.server";
2125

2226
const logger = createLogger("zoekt-searcher");
2327

@@ -123,7 +127,7 @@ export const zoektSearch = async (searchRequest: ZoektGrpcSearchRequest, prisma:
123127
return new Promise((resolve, reject) => {
124128
client.Search(searchRequest, metadata, (error, response) => {
125129
if (error || !response) {
126-
reject(error || new Error('No response received'));
130+
reject(new ServiceErrorException(unexpectedError(error?.details || 'No response received')))
127131
return;
128132
}
129133

@@ -379,6 +383,9 @@ const transformZoektSearchResponse = async (response: ZoektGrpcSearchResponse, r
379383
files: SearchResultFile[],
380384
repositoryInfo: RepositoryInfo[],
381385
}> => {
386+
const headersList = await headers();
387+
const baseUrl = getBaseUrl(headersList);
388+
382389
const files = response.files.map((file) => {
383390
const fileNameChunks = file.chunk_matches.filter((chunk) => chunk.file_name);
384391
const repoId = getRepoIdForFile(file);
@@ -413,6 +420,9 @@ const transformZoektSearchResponse = async (response: ZoektGrpcSearchResponse, r
413420
}
414421
})
415422

423+
// If a file has multiple branches, default to the first one.
424+
const branchName = file.branches.length > 0 ? file.branches[0] : undefined;
425+
416426
return {
417427
fileName: {
418428
text: fileName,
@@ -421,11 +431,17 @@ const transformZoektSearchResponse = async (response: ZoektGrpcSearchResponse, r
421431
repository: repo.name,
422432
repositoryId: repo.id,
423433
language: file.language,
424-
webUrl: getCodeHostBrowseFileAtBranchUrl({
434+
webUrl: `${baseUrl}${getBrowsePath({
435+
repoName: repo.name,
436+
path: fileName,
437+
pathType: 'blob',
438+
domain: SINGLE_TENANT_ORG_DOMAIN,
439+
revisionName: branchName,
440+
})}`,
441+
externalWebUrl: getCodeHostBrowseFileAtBranchUrl({
425442
webUrl: repo.webUrl,
426443
codeHostType: repo.external_codeHostType,
427-
// If a file has multiple branches, default to the first one.
428-
branchName: file.branches?.[0] ?? 'HEAD',
444+
branchName: branchName ?? 'HEAD',
429445
filePath: fileName,
430446
}),
431447
chunks: file.chunk_matches

0 commit comments

Comments
 (0)