-
Notifications
You must be signed in to change notification settings - Fork 260
Expand file tree
/
Copy pathserver.ts
More file actions
528 lines (470 loc) · 22.8 KB
/
server.ts
File metadata and controls
528 lines (470 loc) · 22.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
import { listRepos } from '@/app/api/(server)/repos/listReposApi';
import { getConfiguredLanguageModelsInfo } from "../chat/utils.server";
import { askCodebase } from '@/features/mcp/askCodebase';
import {
languageModelInfoSchema,
} from '@/features/chat/types';
import { getFileSource, getTree, listCommits } from '@/features/git';
import { search } from '@/features/search/searchApi';
import { isServiceError } from '@/lib/utils';
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
import { ChatVisibility } from '@sourcebot/db';
import { SOURCEBOT_VERSION } from '@sourcebot/shared';
import _dedent from 'dedent';
import escapeStringRegexp from 'escape-string-regexp';
import { z } from 'zod';
import {
ListTreeEntry,
TextContent,
} from './types';
import { buildTreeNodeIndex, joinTreePath, normalizeTreePath, sortTreeEntries } from './utils';
const dedent = _dedent.withOptions({ alignValues: true });
const DEFAULT_MINIMUM_TOKENS = 10000;
const DEFAULT_MATCHES = 10000;
const DEFAULT_CONTEXT_LINES = 5;
const DEFAULT_TREE_DEPTH = 1;
const MAX_TREE_DEPTH = 10;
const DEFAULT_MAX_TREE_ENTRIES = 1000;
const MAX_MAX_TREE_ENTRIES = 10000;
const TOOL_DESCRIPTIONS = {
search_code: dedent`
Searches for code that matches the provided search query as a substring by default, or as a regular expression if useRegex is true. Useful for exploring remote repositories by
searching for exact symbols, functions, variables, or specific code patterns.
To determine if a repository is indexed, use the \`list_repos\` tool. By default, searches are global and will search the default branch of all repositories. Searches can be
scoped to specific repositories, languages, and branches.
When referencing code outputted by this tool, always include the file's external URL as a link. This makes it easier for the user to view the file, even if they don't have it locally checked out.
`,
list_commits: dedent`Get a list of commits for a given repository.`,
list_repos: dedent`Lists repositories in the organization with optional filtering and pagination.`,
read_file: dedent`Reads the source code for a given file.`,
list_tree: dedent`
Lists files and directories from a repository path. This can be used as a repo tree tool or directory listing tool.
Returns a flat list of entries with path metadata and depth relative to the requested path.
`,
list_language_models: dedent`Lists the available language models configured on the Sourcebot instance. Use this to discover which models can be specified when calling ask_codebase.`,
ask_codebase: dedent`
DO NOT USE THIS TOOL UNLESS EXPLICITLY ASKED TO. THE PROMPT MUST SPECIFICALLY ASK TO USE THE ask_codebase TOOL.
Ask a natural language question about the codebase. This tool uses an AI agent to autonomously search code, read files, and find symbol references/definitions to answer your question.
This is a blocking operation that may take 60+ seconds to research the codebase, so only invoke it if the user has explicitly asked you to by specifying the ask_codebase tool call in the prompt.
The agent will:
- Analyze your question and determine what context it needs
- Search the codebase using multiple strategies (code search, symbol lookup, file reading)
- Synthesize findings into a comprehensive answer with code references
Returns a detailed answer in markdown format with code references, plus a link to view the full research session (including all tool calls and reasoning) in the Sourcebot web UI.
When using this in shared environments (e.g., Slack), you can set the visibility parameter to 'PUBLIC' to ensure everyone can access the chat link.
`,
};
export function createMcpServer(): McpServer {
const server = new McpServer({
name: 'sourcebot-mcp-server',
version: SOURCEBOT_VERSION,
});
server.registerTool(
"search_code",
{
description: TOOL_DESCRIPTIONS.search_code,
inputSchema: {
query: z
.string()
.describe(`The search pattern to match against code contents. Do not escape quotes in your query.`)
.transform((val) => {
const escaped = val.replace(/\\/g, '\\\\').replace(/"/g, '\\"');
return `"${escaped}"`;
}),
useRegex: z
.boolean()
.describe(`Whether to use regular expression matching. When false, substring matching is used. (default: false)`)
.optional(),
filterByRepos: z
.array(z.string())
.describe(`Scope the search to the provided repositories.`)
.optional(),
filterByLanguages: z
.array(z.string())
.describe(`Scope the search to the provided languages.`)
.optional(),
filterByFilepaths: z
.array(z.string())
.describe(`Scope the search to the provided filepaths. Each filepath is a regular expression matched against the full file path.`)
.optional(),
caseSensitive: z
.boolean()
.describe(`Whether the search should be case sensitive (default: false).`)
.optional(),
includeCodeSnippets: z
.boolean()
.describe(`Whether to include code snippets in the response. If false, only the file's URL, repository, and language will be returned. (default: false)`)
.optional(),
ref: z
.string()
.describe(`Commit SHA, branch or tag name to search on. If not provided, defaults to the default branch.`)
.optional(),
maxTokens: z
.number()
.describe(`The maximum number of tokens to return (default: ${DEFAULT_MINIMUM_TOKENS}).`)
.transform((val) => (val < DEFAULT_MINIMUM_TOKENS ? DEFAULT_MINIMUM_TOKENS : val))
.optional(),
},
},
async ({
query,
filterByRepos: repos = [],
filterByLanguages: languages = [],
filterByFilepaths: filepaths = [],
maxTokens = DEFAULT_MINIMUM_TOKENS,
includeCodeSnippets = false,
caseSensitive = false,
ref,
useRegex = false,
}: {
query: string;
useRegex?: boolean;
filterByRepos?: string[];
filterByLanguages?: string[];
filterByFilepaths?: string[];
caseSensitive?: boolean;
includeCodeSnippets?: boolean;
ref?: string;
maxTokens?: number;
}) => {
if (repos.length > 0) {
query += ` (repo:${repos.map(id => escapeStringRegexp(id)).join(' or repo:')})`;
}
if (languages.length > 0) {
query += ` (lang:${languages.join(' or lang:')})`;
}
if (filepaths.length > 0) {
query += ` (file:${filepaths.join(' or file:')})`;
}
if (ref) {
query += ` ( rev:${ref} )`;
}
const response = await search({
queryType: 'string',
query,
options: {
matches: DEFAULT_MATCHES,
contextLines: DEFAULT_CONTEXT_LINES,
isRegexEnabled: useRegex,
isCaseSensitivityEnabled: caseSensitive,
},
source: 'mcp',
});
if (isServiceError(response)) {
return {
content: [{ type: "text", text: `Search failed: ${response.message}` }],
};
}
if (response.files.length === 0) {
return {
content: [{ type: "text", text: `No results found for the query: ${query}` }],
};
}
const content: TextContent[] = [];
let totalTokens = 0;
let isResponseTruncated = false;
for (const file of response.files) {
const numMatches = file.chunks.reduce((acc, chunk) => acc + chunk.matchRanges.length, 0);
let text = dedent`
file: ${file.webUrl}
num_matches: ${numMatches}
repo: ${file.repository}
language: ${file.language}
`;
if (includeCodeSnippets) {
const snippets = file.chunks.map(chunk => `\`\`\`\n${chunk.content}\n\`\`\``).join('\n');
text += `\n\n${snippets}`;
}
const tokens = text.length / 4;
if ((totalTokens + tokens) > maxTokens) {
const remainingTokens = maxTokens - totalTokens;
if (remainingTokens > 100) {
const maxLength = Math.floor(remainingTokens * 4);
content.push({
type: "text",
text: text.substring(0, maxLength) + "\n\n...[content truncated due to token limit]",
});
totalTokens += remainingTokens;
}
isResponseTruncated = true;
break;
}
totalTokens += tokens;
content.push({ type: "text", text });
}
if (isResponseTruncated) {
content.push({
type: "text",
text: `The response was truncated because the number of tokens exceeded the maximum limit of ${maxTokens}.`,
});
}
return { content };
}
);
server.registerTool(
"list_commits",
{
description: TOOL_DESCRIPTIONS.list_commits,
inputSchema: z.object({
repo: z.string().describe("The name of the repository to list commits for."),
query: z.string().describe("Search query to filter commits by message content (case-insensitive).").optional(),
since: z.string().describe("Show commits more recent than this date. Supports ISO 8601 or relative formats (e.g., '30 days ago').").optional(),
until: z.string().describe("Show commits older than this date. Supports ISO 8601 or relative formats (e.g., 'yesterday').").optional(),
author: z.string().describe("Filter commits by author name or email (case-insensitive).").optional(),
ref: z.string().describe("Commit SHA, branch or tag name to list commits of. If not provided, uses the default branch.").optional(),
page: z.number().int().positive().describe("Page number for pagination (min 1). Default: 1").optional().default(1),
perPage: z.number().int().positive().max(100).describe("Results per page for pagination (min 1, max 100). Default: 50").optional().default(50),
}),
},
async ({ repo, query, since, until, author, ref, page, perPage }) => {
const skip = (page - 1) * perPage;
const result = await listCommits({
repo,
query,
since,
until,
author,
ref,
maxCount: perPage,
skip,
});
if (isServiceError(result)) {
return {
content: [{ type: "text", text: `Failed to list commits: ${result.message}` }],
};
}
return { content: [{ type: "text", text: JSON.stringify(result) }] };
}
);
server.registerTool(
"list_repos",
{
description: TOOL_DESCRIPTIONS.list_repos,
inputSchema: z.object({
query: z.string().describe("Filter repositories by name (case-insensitive)").optional(),
page: z.number().int().positive().describe("Page number for pagination (min 1). Default: 1").optional().default(1),
perPage: z.number().int().positive().max(100).describe("Results per page for pagination (min 1, max 100). Default: 30").optional().default(30),
sort: z.enum(['name', 'pushed']).describe("Sort repositories by 'name' or 'pushed' (most recent commit). Default: 'name'").optional().default('name'),
direction: z.enum(['asc', 'desc']).describe("Sort direction: 'asc' or 'desc'. Default: 'asc'").optional().default('asc'),
})
},
async ({ query, page, perPage, sort, direction }) => {
const result = await listRepos({ query, page, perPage, sort, direction, source: 'mcp' });
if (isServiceError(result)) {
return {
content: [{ type: "text", text: `Failed to list repositories: ${result.message}` }],
};
}
return {
content: [{
type: "text",
text: JSON.stringify({
repos: result.data.map((repo) => ({
name: repo.repoName,
url: repo.webUrl,
pushedAt: repo.pushedAt,
defaultBranch: repo.defaultBranch,
isFork: repo.isFork,
isArchived: repo.isArchived,
})),
totalCount: result.totalCount,
}),
}],
};
}
);
server.registerTool(
"read_file",
{
description: TOOL_DESCRIPTIONS.read_file,
inputSchema: {
repo: z.string().describe("The repository name."),
path: z.string().describe("The path to the file."),
ref: z.string().optional().describe("Commit SHA, branch or tag name to fetch the source code for. If not provided, uses the default branch of the repository."),
},
},
async ({ repo, path, ref }) => {
const response = await getFileSource({ repo, path, ref }, { source: 'mcp' });
if (isServiceError(response)) {
return {
content: [{ type: "text", text: `Failed to read file: ${response.message}` }],
};
}
return {
content: [{
type: "text",
text: JSON.stringify({
source: response.source,
language: response.language,
path: response.path,
url: response.webUrl,
}),
}],
};
}
);
server.registerTool(
"list_tree",
{
description: TOOL_DESCRIPTIONS.list_tree,
inputSchema: {
repo: z.string().describe("The name of the repository to list files from."),
path: z.string().describe("Directory path (relative to repo root). If omitted, the repo root is used.").optional().default(''),
ref: z.string().describe("Commit SHA, branch or tag name to list files from. If not provided, uses the default branch.").optional().default('HEAD'),
depth: z.number().int().positive().max(MAX_TREE_DEPTH).describe(`How many directory levels to traverse below \`path\` (min 1, max ${MAX_TREE_DEPTH}, default ${DEFAULT_TREE_DEPTH}).`).optional().default(DEFAULT_TREE_DEPTH),
includeFiles: z.boolean().describe("Whether to include files in the output (default: true).").optional().default(true),
includeDirectories: z.boolean().describe("Whether to include directories in the output (default: true).").optional().default(true),
maxEntries: z.number().int().positive().max(MAX_MAX_TREE_ENTRIES).describe(`Maximum number of entries to return (min 1, max ${MAX_MAX_TREE_ENTRIES}, default ${DEFAULT_MAX_TREE_ENTRIES}).`).optional().default(DEFAULT_MAX_TREE_ENTRIES),
},
},
async ({
repo,
path = '',
ref = 'HEAD',
depth = DEFAULT_TREE_DEPTH,
includeFiles = true,
includeDirectories = true,
maxEntries = DEFAULT_MAX_TREE_ENTRIES,
}: {
repo: string;
path?: string;
ref?: string;
depth?: number;
includeFiles?: boolean;
includeDirectories?: boolean;
maxEntries?: number;
}) => {
const normalizedPath = normalizeTreePath(path);
const normalizedDepth = Math.min(depth, MAX_TREE_DEPTH);
const normalizedMaxEntries = Math.min(maxEntries, MAX_MAX_TREE_ENTRIES);
if (!includeFiles && !includeDirectories) {
return {
content: [{
type: "text",
text: JSON.stringify({
repo, ref, path: normalizedPath,
entries: [] as ListTreeEntry[],
totalReturned: 0,
truncated: false,
}),
}],
};
}
const queue: Array<{ path: string; depth: number }> = [{ path: normalizedPath, depth: 0 }];
const queuedPaths = new Set<string>([normalizedPath]);
const seenEntries = new Set<string>();
const entries: ListTreeEntry[] = [];
let truncated = false;
let treeError: string | null = null;
while (queue.length > 0 && !truncated) {
const currentDepth = queue[0]!.depth;
const currentLevelPaths: string[] = [];
while (queue.length > 0 && queue[0]!.depth === currentDepth) {
currentLevelPaths.push(queue.shift()!.path);
}
const treeResult = await getTree({
repoName: repo,
revisionName: ref,
paths: currentLevelPaths.filter(Boolean),
}, { source: 'mcp' });
if (isServiceError(treeResult)) {
treeError = treeResult.message;
break;
}
const treeNodeIndex = buildTreeNodeIndex(treeResult.tree);
for (const currentPath of currentLevelPaths) {
const currentNode = currentPath === '' ? treeResult.tree : treeNodeIndex.get(currentPath);
if (!currentNode || currentNode.type !== 'tree') continue;
for (const child of currentNode.children) {
if (child.type !== 'tree' && child.type !== 'blob') continue;
const childPath = joinTreePath(currentPath, child.name);
const childDepth = currentDepth + 1;
if (child.type === 'tree' && childDepth < normalizedDepth && !queuedPaths.has(childPath)) {
queue.push({ path: childPath, depth: childDepth });
queuedPaths.add(childPath);
}
if ((child.type === 'blob' && !includeFiles) || (child.type === 'tree' && !includeDirectories)) {
continue;
}
const key = `${child.type}:${childPath}`;
if (seenEntries.has(key)) continue;
seenEntries.add(key);
if (entries.length >= normalizedMaxEntries) {
truncated = true;
break;
}
entries.push({
type: child.type as 'tree' | 'blob',
path: childPath,
name: child.name,
parentPath: currentPath,
depth: childDepth,
});
}
if (truncated) break;
}
}
if (treeError) {
return {
content: [{ type: "text", text: `Failed to list tree: ${treeError}` }],
};
}
const sortedEntries = sortTreeEntries(entries);
return {
content: [{
type: "text",
text: JSON.stringify({
repo, ref, path: normalizedPath,
entries: sortedEntries,
totalReturned: sortedEntries.length,
truncated,
}),
}],
};
}
);
server.registerTool(
"list_language_models",
{
description: TOOL_DESCRIPTIONS.list_language_models,
},
async () => {
const models = await getConfiguredLanguageModelsInfo();
return { content: [{ type: "text", text: JSON.stringify(models) }] };
}
);
server.registerTool(
"ask_codebase",
{
description: TOOL_DESCRIPTIONS.ask_codebase,
inputSchema: z.object({
query: z.string().describe("The query to ask about the codebase."),
repos: z.array(z.string()).optional().describe("The repositories accessible to the agent. If not provided, all repositories are accessible."),
languageModel: languageModelInfoSchema.optional().describe("The language model to use. If not provided, defaults to the first model in the config."),
visibility: z.enum(['PRIVATE', 'PUBLIC']).optional().describe("The visibility of the chat session. Defaults to PRIVATE for authenticated users."),
}),
},
async (request) => {
const result = await askCodebase({
query: request.query,
repos: request.repos,
languageModel: request.languageModel,
visibility: request.visibility as ChatVisibility | undefined,
source: 'mcp',
});
if (isServiceError(result)) {
return {
content: [{ type: "text", text: `Failed to ask codebase: ${result.message}` }],
};
}
const formattedResponse = dedent`
${result.answer}
---
**View full research session:** ${result.chatUrl}
**Model used:** ${result.languageModel.model}
`;
return { content: [{ type: "text", text: formattedResponse }] };
}
);
return server;
}