Skip to content

Commit 46e7a42

Browse files
authored
feat(mcp): forward file_pattern in semantic_search to scope results (#1149)
* feat(mcp): forward file_pattern in semantic_search to scope results The MCP semantic_search wrapper silently dropped any file scoping argument because file_pattern was not declared on its args interface, even though the underlying search core has supported it all along (the CLI exposes it via --file). In monorepos with multiple large subtrees, this made MCP semantic_search effectively unusable: top-K was dominated by the larger, less-relevant tree with no signal to the caller that the filter was ignored. Add file_pattern (string | string[]) to the handler args, forward it as filePattern into searchOpts for hybrid/semantic/keyword modes, and declare it in the tool input schema. Unit tests assert the arg reaches each search backend. Closes #1143 * docs(mcp): mention glob support in semantic_search file_pattern schema (#1149)
1 parent 3857349 commit 46e7a42

3 files changed

Lines changed: 79 additions & 0 deletions

File tree

src/mcp/tool-registry.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,11 @@ const BASE_TOOLS: ToolSchema[] = [
322322
description:
323323
'Search mode: hybrid (BM25 + semantic, default), semantic (embeddings only), keyword (BM25 only)',
324324
},
325+
file_pattern: {
326+
oneOf: [{ type: 'string' }, { type: 'array', items: { type: 'string' } }],
327+
description:
328+
'Restrict results to files matching one or more glob or substring patterns (e.g. "db/", "src/**/*.ts", or ["db/", "src/"])',
329+
},
325330
...PAGINATION_PROPS,
326331
},
327332
required: ['query'],

src/mcp/tools/semantic-search.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ interface SemanticSearchArgs {
99
limit?: number;
1010
offset?: number;
1111
min_score?: number;
12+
file_pattern?: string | string[];
1213
}
1314

1415
export async function handler(args: SemanticSearchArgs, ctx: McpToolContext): Promise<unknown> {
@@ -17,6 +18,7 @@ export async function handler(args: SemanticSearchArgs, ctx: McpToolContext): Pr
1718
limit: Math.min(args.limit ?? MCP_DEFAULTS.semantic_search ?? 100, ctx.MCP_MAX_LIMIT),
1819
offset: effectiveOffset(args),
1920
minScore: args.min_score,
21+
filePattern: args.file_pattern,
2022
};
2123

2224
if (mode === 'keyword') {

tests/unit/mcp.test.ts

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ describe('TOOLS', () => {
161161
expect(ss.inputSchema.required).toContain('query');
162162
expect(ss.inputSchema.properties).toHaveProperty('limit');
163163
expect(ss.inputSchema.properties).toHaveProperty('min_score');
164+
expect(ss.inputSchema.properties).toHaveProperty('file_pattern');
164165
});
165166

166167
it('export_graph requires format parameter with enum', () => {
@@ -1233,4 +1234,75 @@ describe('startMCPServer handler dispatch', () => {
12331234
kind: 'function',
12341235
});
12351236
});
1237+
1238+
it('dispatches semantic_search and forwards file_pattern as filePattern', async () => {
1239+
const handlers = {};
1240+
1241+
vi.doMock('@modelcontextprotocol/sdk/server/index.js', () => ({
1242+
Server: class MockServer {
1243+
setRequestHandler(name, handler) {
1244+
handlers[name] = handler;
1245+
}
1246+
async connect() {}
1247+
},
1248+
}));
1249+
vi.doMock('@modelcontextprotocol/sdk/server/stdio.js', () => ({
1250+
StdioServerTransport: class MockTransport {},
1251+
}));
1252+
vi.doMock('@modelcontextprotocol/sdk/types.js', () => ({
1253+
ListToolsRequestSchema: 'tools/list',
1254+
CallToolRequestSchema: 'tools/call',
1255+
}));
1256+
1257+
const hybridSearchMock = vi.fn(async () => ({ results: [] }));
1258+
const ftsSearchMock = vi.fn(() => ({ results: [] }));
1259+
const searchDataMock = vi.fn(async () => ({ results: [] }));
1260+
vi.doMock('../../src/domain/search/index.js', () => ({
1261+
hybridSearchData: hybridSearchMock,
1262+
ftsSearchData: ftsSearchMock,
1263+
searchData: searchDataMock,
1264+
}));
1265+
1266+
const { startMCPServer } = await import('../../src/mcp/index.js');
1267+
await startMCPServer('/tmp/test.db');
1268+
1269+
// hybrid (default): forwards filePattern as array
1270+
await handlers['tools/call']({
1271+
params: {
1272+
name: 'semantic_search',
1273+
arguments: { query: 'GUC variable', file_pattern: ['db/'], limit: 5 },
1274+
},
1275+
});
1276+
expect(hybridSearchMock).toHaveBeenCalledWith(
1277+
'GUC variable',
1278+
'/tmp/test.db',
1279+
expect.objectContaining({ filePattern: ['db/'], limit: 5 }),
1280+
);
1281+
1282+
// semantic mode: forwards filePattern as string
1283+
await handlers['tools/call']({
1284+
params: {
1285+
name: 'semantic_search',
1286+
arguments: { query: 'q', mode: 'semantic', file_pattern: 'src/mcp/' },
1287+
},
1288+
});
1289+
expect(searchDataMock).toHaveBeenCalledWith(
1290+
'q',
1291+
'/tmp/test.db',
1292+
expect.objectContaining({ filePattern: 'src/mcp/' }),
1293+
);
1294+
1295+
// keyword mode: forwards filePattern
1296+
await handlers['tools/call']({
1297+
params: {
1298+
name: 'semantic_search',
1299+
arguments: { query: 'q', mode: 'keyword', file_pattern: ['tests/'] },
1300+
},
1301+
});
1302+
expect(ftsSearchMock).toHaveBeenCalledWith(
1303+
'q',
1304+
'/tmp/test.db',
1305+
expect.objectContaining({ filePattern: ['tests/'] }),
1306+
);
1307+
});
12361308
});

0 commit comments

Comments
 (0)