-
Notifications
You must be signed in to change notification settings - Fork 264
Expand file tree
/
Copy pathindex.ts
More file actions
284 lines (246 loc) · 10.3 KB
/
index.ts
File metadata and controls
284 lines (246 loc) · 10.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
#!/usr/bin/env node
// Entry point for the MCP server
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
import _dedent from "dedent";
import escapeStringRegexp from 'escape-string-regexp';
import { z } from 'zod';
import { askCodebase, getFileSource, listCommits, listRepos, search } from './client.js';
import { env, numberSchema } from './env.js';
import { askCodebaseRequestSchema, fileSourceRequestSchema, listCommitsQueryParamsSchema, listReposQueryParamsSchema } from './schemas.js';
import { AskCodebaseRequest, FileSourceRequest, ListCommitsQueryParamsSchema, ListReposQueryParams, TextContent } from './types.js';
const dedent = _dedent.withOptions({ alignValues: true });
// Create MCP server
const server = new McpServer({
name: 'sourcebot-mcp-server',
version: '0.1.0',
});
server.tool(
"search_code",
dedent`
Searches for code that matches the provided search query as a substring by default, or as a regular expression if useRegex is true. Useful for exploring remote repositories by searching for exact symbols, functions, variables, or specific code patterns. To determine if a repository is indexed, use the \`list_repos\` tool. By default, searches are global and will search the default branch of all repositories. Searches can be scoped to specific repositories, languages, and branches. When referencing code outputted by this tool, always include the file's external URL as a link. This makes it easier for the user to view the file, even if they don't have it locally checked out.
`,
{
query: z
.string()
.describe(`The search pattern to match against code contents. Do not escape quotes in your query.`)
// Escape backslashes first, then quotes, and wrap in double quotes
// so the query is treated as a literal phrase (like grep).
.transform((val) => {
const escaped = val.replace(/\\/g, '\\\\').replace(/"/g, '\\"');
return `"${escaped}"`;
}),
useRegex: z
.boolean()
.describe(`Whether to use regular expression matching to match the search query against code contents. When false, substring matching is used. (default: false)`)
.optional(),
filterByRepos: z
.array(z.string())
.describe(`Scope the search to the provided repositories.`)
.optional(),
filterByLanguages: z
.array(z.string())
.describe(`Scope the search to the provided languages.`)
.optional(),
filterByFilepaths: z
.array(z.string())
.describe(`Scope the search to the provided filepaths.`)
.optional(),
caseSensitive: z
.boolean()
.describe(`Whether the search should be case sensitive (default: false).`)
.optional(),
includeCodeSnippets: z
.boolean()
.describe(`Whether to include the code snippets in the response. If false, only the file's URL, repository, and language will be returned. (default: false)`)
.optional(),
ref: z
.string()
.describe(`Commit SHA, branch or tag name to search on. If not provided, defaults to the default branch (usually 'main' or 'master').`)
.optional(),
maxTokens: numberSchema
.describe(`The maximum number of tokens to return (default: ${env.DEFAULT_MINIMUM_TOKENS}). Higher values provide more context but consume more tokens. Values less than ${env.DEFAULT_MINIMUM_TOKENS} will be ignored.`)
.transform((val) => (val < env.DEFAULT_MINIMUM_TOKENS ? env.DEFAULT_MINIMUM_TOKENS : val))
.optional(),
},
async ({
query,
filterByRepos: repos = [],
filterByLanguages: languages = [],
filterByFilepaths: filepaths = [],
maxTokens = env.DEFAULT_MINIMUM_TOKENS,
includeCodeSnippets = false,
caseSensitive = false,
ref,
useRegex = false,
}) => {
if (repos.length > 0) {
query += ` (repo:${repos.map(id => escapeStringRegexp(id)).join(' or repo:')})`;
}
if (languages.length > 0) {
query += ` (lang:${languages.join(' or lang:')})`;
}
if (filepaths.length > 0) {
query += ` (file:${filepaths.map(filepath => escapeStringRegexp(filepath)).join(' or file:')})`;
}
if (ref) {
query += ` ( rev:${ref} )`;
}
const response = await search({
query,
matches: env.DEFAULT_MATCHES,
contextLines: env.DEFAULT_CONTEXT_LINES,
isRegexEnabled: useRegex,
isCaseSensitivityEnabled: caseSensitive,
});
if (response.files.length === 0) {
return {
content: [{
type: "text",
text: `No results found for the query: ${query}`,
}],
};
}
const content: TextContent[] = [];
let totalTokens = 0;
let isResponseTruncated = false;
for (const file of response.files) {
const numMatches = file.chunks.reduce(
(acc, chunk) => acc + chunk.matchRanges.length,
0,
);
let text = dedent`
file: ${file.webUrl}
num_matches: ${numMatches}
repo: ${file.repository}
language: ${file.language}
`;
if (includeCodeSnippets) {
const snippets = file.chunks.map(chunk => {
return `\`\`\`\n${chunk.content}\n\`\`\``
}).join('\n');
text += `\n\n${snippets}`;
}
// Rough estimate of the number of tokens in the text
// @see: https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them
const tokens = text.length / 4;
if ((totalTokens + tokens) > maxTokens) {
// Calculate remaining token budget
const remainingTokens = maxTokens - totalTokens;
if (remainingTokens > 100) { // Only truncate if meaningful space left
// Truncate text to fit remaining tokens (tokens ≈ chars/4)
const maxLength = Math.floor(remainingTokens * 4);
const truncatedText = text.substring(0, maxLength) + "\n\n...[content truncated due to token limit]";
content.push({
type: "text",
text: truncatedText,
});
totalTokens += remainingTokens;
}
isResponseTruncated = true;
break;
}
totalTokens += tokens;
content.push({
type: "text",
text,
});
}
if (isResponseTruncated) {
content.push({
type: "text",
text: `The response was truncated because the number of tokens exceeded the maximum limit of ${maxTokens}.`,
});
}
return {
content,
}
}
);
server.tool(
"list_commits",
dedent`Get a list of commits for a given repository.`,
listCommitsQueryParamsSchema.shape,
async (request: ListCommitsQueryParamsSchema) => {
const result = await listCommits(request);
return {
content: [{
type: "text", text: JSON.stringify(result)
}],
};
}
);
server.tool(
"list_repos",
dedent`Lists repositories in the organization with optional filtering and pagination.`,
listReposQueryParamsSchema.shape,
async (request: ListReposQueryParams) => {
const result = await listRepos(request);
return {
content: [{
type: "text", text: JSON.stringify({
repos: result.repos.map((repo) => ({
name: repo.repoName,
url: repo.webUrl,
pushedAt: repo.pushedAt,
})),
totalCount: result.totalCount,
})
}]
};
}
);
server.tool(
"read_file",
dedent`Reads the source code for a given file.`,
fileSourceRequestSchema.shape,
async (request: FileSourceRequest) => {
const response = await getFileSource(request);
return {
content: [{
type: "text", text: JSON.stringify({
source: response.source,
language: response.language,
path: response.path,
url: response.webUrl,
})
}]
};
}
);
server.tool(
"ask_codebase",
dedent`
Ask a natural language question about the codebase. This tool uses an AI agent to autonomously search code, read files, and find symbol references/definitions to answer your question.
The agent will:
- Analyze your question and determine what context it needs
- Search the codebase using multiple strategies (code search, symbol lookup, file reading)
- Synthesize findings into a comprehensive answer with code references
Returns a detailed answer in markdown format with code references, plus a link to view the full research session (including all tool calls and reasoning) in the Sourcebot web UI.
This is a blocking operation that may take 30-60+ seconds for complex questions as the agent researches the codebase.
`,
askCodebaseRequestSchema.shape,
async (request: AskCodebaseRequest) => {
const response = await askCodebase(request);
// Format the response with the answer and a link to the chat
const formattedResponse = dedent`
${response.answer}
---
**View full research session:** ${response.chatUrl}
`;
return {
content: [{
type: "text",
text: formattedResponse,
}],
};
}
);
const runServer = async () => {
const transport = new StdioServerTransport();
await server.connect(transport);
}
runServer().catch((error) => {
console.error('Failed to start MCP server:', error);
process.exit(1);
});