Skip to content

Commit b4ce314

Browse files
feedback
1 parent 73889f1 commit b4ce314

17 files changed

Lines changed: 305 additions & 671 deletions

File tree

packages/backend/src/repoIndexManager.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import * as Sentry from '@sentry/node';
22
import { PrismaClient, Repo, RepoIndexingJobStatus, RepoIndexingJobType } from "@sourcebot/db";
33
import { createLogger, Logger } from "@sourcebot/shared";
4-
import { env, RepoIndexingJobMetadata, repoIndexingJobMetadataSchema, RepoMetadata, repoMetadataSchema } from '@sourcebot/shared';
4+
import { env, RepoIndexingJobMetadata, repoIndexingJobMetadataSchema, RepoMetadata, repoMetadataSchema, getRepoPath } from '@sourcebot/shared';
55
import { existsSync } from 'fs';
66
import { readdir, rm } from 'fs/promises';
77
import { Job, Queue, ReservedJob, Worker } from "groupmq";
@@ -12,7 +12,7 @@ import { cloneRepository, fetchRepository, getBranches, getCommitHashForRefName,
1212
import { captureEvent } from './posthog.js';
1313
import { PromClient } from './promClient.js';
1414
import { RepoWithConnections, Settings } from "./types.js";
15-
import { getAuthCredentialsForRepo, getRepoPath, getShardPrefix, groupmqLifecycleExceptionWrapper, measure, setIntervalAsync } from './utils.js';
15+
import { getAuthCredentialsForRepo, getShardPrefix, groupmqLifecycleExceptionWrapper, measure, setIntervalAsync } from './utils.js';
1616
import { indexGitRepository } from './zoekt.js';
1717

1818
const LOG_TAG = 'repo-index-manager';

packages/backend/src/utils.ts

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -53,25 +53,6 @@ export const arraysEqualShallow = <T>(a?: readonly T[], b?: readonly T[]) => {
5353
return true;
5454
}
5555

56-
// @note: this function is duplicated in `packages/web/src/features/fileTree/actions.ts`.
57-
// @todo: we should move this to a shared package.
58-
export const getRepoPath = (repo: Repo): { path: string, isReadOnly: boolean } => {
59-
// If we are dealing with a local repository, then use that as the path.
60-
// Mark as read-only since we aren't guaranteed to have write access to the local filesystem.
61-
const cloneUrl = new URL(repo.cloneUrl);
62-
if (repo.external_codeHostType === 'genericGitHost' && cloneUrl.protocol === 'file:') {
63-
return {
64-
path: cloneUrl.pathname,
65-
isReadOnly: true,
66-
}
67-
}
68-
69-
return {
70-
path: path.join(REPOS_CACHE_DIR, repo.id.toString()),
71-
isReadOnly: false,
72-
}
73-
}
74-
7556
export const getShardPrefix = (orgId: number, repoId: number) => {
7657
return `${orgId}_${repoId}`;
7758
}

packages/backend/src/zoekt.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
import { Repo } from "@sourcebot/db";
2-
import { createLogger, env } from "@sourcebot/shared";
2+
import { createLogger, env, getRepoPath } from "@sourcebot/shared";
33
import { exec } from "child_process";
44
import { INDEX_CACHE_DIR } from "./constants.js";
55
import { Settings } from "./types.js";
6-
import { getRepoPath, getShardPrefix } from "./utils.js";
6+
import { getShardPrefix } from "./utils.js";
77

88
const logger = createLogger('zoekt');
99

packages/mcp/src/client.ts

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,8 @@ export const search = async (request: SearchRequest): Promise<SearchResponse | S
2020
return searchResponseSchema.parse(result);
2121
}
2222

23-
export const listRepos = async (params?: { activeAfter?: string, activeBefore?: string }): Promise<ListRepositoriesResponse | ServiceError> => {
23+
export const listRepos = async (): Promise<ListRepositoriesResponse | ServiceError> => {
2424
const url = new URL(`${env.SOURCEBOT_HOST}/api/repos`);
25-
if (params?.activeAfter) {
26-
url.searchParams.append('activeAfter', params.activeAfter);
27-
}
28-
if (params?.activeBefore) {
29-
url.searchParams.append('activeBefore', params.activeBefore);
30-
}
3125

3226
const result = await fetch(url.toString(), {
3327
method: 'GET',

packages/mcp/src/index.ts

Lines changed: 14 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
55
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
66
import escapeStringRegexp from 'escape-string-regexp';
77
import { z } from 'zod';
8-
import { listRepos, search, getFileSource, searchCommits } from './client.js';
8+
import { getFileSource, listRepos, search, searchCommits } from './client.js';
99
import { env, numberSchema } from './env.js';
1010
import { listReposRequestSchema } from './schemas.js';
1111
import { TextContent } from './types.js';
@@ -53,14 +53,6 @@ server.tool(
5353
.string()
5454
.describe(`The git revision to search in (e.g., 'main', 'HEAD', 'v1.0.0', 'a1b2c3d'). If not provided, defaults to the default branch (usually 'main' or 'master').`)
5555
.optional(),
56-
since: z
57-
.string()
58-
.describe(`Filter repositories by when they were last indexed by Sourcebot (NOT by commit time). Only searches in repos indexed after this date. Supports ISO 8601 (e.g., '2024-01-01') or relative formats (e.g., '30 days ago', 'last week', 'yesterday').`)
59-
.optional(),
60-
until: z
61-
.string()
62-
.describe(`Filter repositories by when they were last indexed by Sourcebot (NOT by commit time). Only searches in repos indexed before this date. Supports ISO 8601 (e.g., '2024-12-31') or relative formats (e.g., 'yesterday').`)
63-
.optional(),
6456
maxTokens: numberSchema
6557
.describe(`The maximum number of tokens to return (default: ${env.DEFAULT_MINIMUM_TOKENS}). Higher values provide more context but consume more tokens. Values less than ${env.DEFAULT_MINIMUM_TOKENS} will be ignored.`)
6658
.transform((val) => (val < env.DEFAULT_MINIMUM_TOKENS ? env.DEFAULT_MINIMUM_TOKENS : val))
@@ -74,8 +66,6 @@ server.tool(
7466
includeCodeSnippets = false,
7567
caseSensitive = false,
7668
gitRevision,
77-
since,
78-
until,
7969
}) => {
8070
if (repoIds.length > 0) {
8171
query += ` ( repo:${repoIds.map(id => escapeStringRegexp(id)).join(' or repo:')} )`;
@@ -85,16 +75,17 @@ server.tool(
8575
query += ` ( lang:${languages.join(' or lang:')} )`;
8676
}
8777

78+
if (gitRevision) {
79+
query += ` ( rev:${gitRevision} )`;
80+
}
81+
8882
const response = await search({
8983
query,
9084
matches: env.DEFAULT_MATCHES,
9185
contextLines: env.DEFAULT_CONTEXT_LINES,
9286
isRegexEnabled: true,
9387
isCaseSensitivityEnabled: caseSensitive,
9488
source: 'mcp',
95-
gitRevision,
96-
since,
97-
until,
9889
});
9990

10091
if (isServiceError(response)) {
@@ -182,21 +173,9 @@ server.tool(
182173

183174
server.tool(
184175
"search_commits",
185-
`Searches for commits in a specific repository based on actual commit time (NOT index time).
186-
187-
**Requirements**: The repository must be cloned on the Sourcebot server disk. Sourcebot automatically clones repositories during indexing, but the cloning process may not be finished when this query is executed. If the repository is not found on the server disk, an error will be returned asking you to try again later.
188-
189-
**Date Formats**: Supports ISO 8601 (e.g., "2024-01-01") or relative formats (e.g., "30 days ago", "last week", "yesterday").
190-
191-
**YOU MUST** call 'list_repos' first to obtain the exact repository ID.
192-
193-
If you receive an error that indicates that you're not authenticated, please inform the user to set the SOURCEBOT_API_KEY environment variable.`,
176+
`Searches for commits in a specific repository based on actual commit time. If you receive an error that indicates that you're not authenticated, please inform the user to set the SOURCEBOT_API_KEY environment variable.`,
194177
{
195-
repoId: z.union([z.number(), z.string()]).describe(`Repository identifier. Can be either:
196-
- Numeric database ID (e.g., 123)
197-
- Full repository name (e.g., "github.com/owner/repo") as returned by 'list_repos'
198-
199-
**YOU MUST** call 'list_repos' first to obtain the repository identifier.`),
178+
repoId: z.string().describe(`The repository to search commits in. This is the Sourcebot compatible repository ID as returned by 'list_repos'.`),
200179
query: z.string().describe(`Search query to filter commits by message content (case-insensitive).`).optional(),
201180
since: z.string().describe(`Show commits more recent than this date. Filters by actual commit time. Supports ISO 8601 (e.g., '2024-01-01') or relative formats (e.g., '30 days ago', 'last week').`).optional(),
202181
until: z.string().describe(`Show commits older than this date. Filters by actual commit time. Supports ISO 8601 (e.g., '2024-12-31') or relative formats (e.g., 'yesterday').`).optional(),
@@ -205,7 +184,7 @@ server.tool(
205184
},
206185
async ({ repoId, query, since, until, author, maxCount }) => {
207186
const result = await searchCommits({
208-
repoId,
187+
repository: repoId,
209188
query,
210189
since,
211190
until,
@@ -228,47 +207,14 @@ server.tool(
228207

229208
server.tool(
230209
"list_repos",
231-
`Lists repositories in the organization with optional filtering and pagination.
232-
233-
**Temporal Filtering**: When using 'activeAfter' or 'activeBefore', only repositories indexed within the specified timeframe are returned. This filters by when Sourcebot last indexed the repository (indexedAt), NOT by git commit dates. For commit-time filtering, use 'search_commits'. When temporal filters are applied, the output includes a 'lastIndexed' field showing when each repository was last indexed.
234-
235-
**Date Formats**: Supports ISO 8601 (e.g., "2024-01-01") and relative dates (e.g., "30 days ago", "last week", "yesterday").
236-
237-
If you receive an error that indicates that you're not authenticated, please inform the user to set the SOURCEBOT_API_KEY environment variable.`,
238-
{
239-
query: z
240-
.string()
241-
.describe("Filter repositories by name (case-insensitive).")
242-
.optional(),
243-
pageNumber: z
244-
.number()
245-
.int()
246-
.positive()
247-
.describe("Page number (1-indexed, default: 1)")
248-
.default(1),
249-
limit: z
250-
.number()
251-
.int()
252-
.positive()
253-
.describe("Number of repositories per page (default: 50)")
254-
.default(50),
255-
activeAfter: z
256-
.string()
257-
.describe("Only return repositories indexed after this date (filters by indexedAt). Supports ISO 8601 (e.g., '2024-01-01') or relative formats (e.g., '30 days ago', 'last week').")
258-
.optional(),
259-
activeBefore: z
260-
.string()
261-
.describe("Only return repositories indexed before this date (filters by indexedAt). Supports ISO 8601 (e.g., '2024-12-31') or relative formats (e.g., 'yesterday').")
262-
.optional(),
263-
},
264-
async ({ query, pageNumber = 1, limit = 50, activeAfter, activeBefore }: {
210+
`Lists repositories in the organization with optional filtering and pagination. If you receive an error that indicates that you're not authenticated, please inform the user to set the SOURCEBOT_API_KEY environment variable.`,
211+
listReposRequestSchema.shape,
212+
async ({ query, pageNumber = 1, limit = 50 }: {
265213
query?: string;
266214
pageNumber?: number;
267215
limit?: number;
268-
activeAfter?: string;
269-
activeBefore?: string;
270216
}) => {
271-
const response = await listRepos({ activeAfter, activeBefore });
217+
const response = await listRepos();
272218
if (isServiceError(response)) {
273219
return {
274220
content: [{
@@ -298,17 +244,10 @@ server.tool(
298244

299245
// Format output
300246
const content: TextContent[] = paginated.map(repo => {
301-
const repoUrl = repo.webUrl ?? repo.repoCloneUrl;
302-
let output = `id: ${repo.repoName}\nurl: ${repoUrl}`;
303-
304-
// Include indexedAt when temporal filtering is used
305-
if ((activeAfter || activeBefore) && repo.indexedAt) {
306-
output += `\nlastIndexed: ${repo.indexedAt.toISOString()}`;
307-
}
308-
247+
const repoUrl = repo.webUrl ?? repo.repoCloneUrl;
309248
return {
310249
type: "text",
311-
text: output,
250+
text: `id: ${repo.repoName}\nurl: ${repoUrl}`,
312251
}
313252
});
314253

packages/mcp/src/schemas.ts

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,6 @@ export const searchOptionsSchema = z.object({
2727
whole: z.boolean().optional(), // Whether to return the whole file as part of the response.
2828
isRegexEnabled: z.boolean().optional(), // Whether to enable regular expression search.
2929
isCaseSensitivityEnabled: z.boolean().optional(), // Whether to enable case sensitivity.
30-
gitRevision: z.string().optional(), // Filter by git branch/revision.
31-
since: z.string().optional(), // Filter repositories by indexed date (start). Filters by when the repo was last indexed by Sourcebot, not by commit time.
32-
until: z.string().optional(), // Filter repositories by indexed date (end). Filters by when the repo was last indexed by Sourcebot, not by commit time.
3330
});
3431

3532
export const searchRequestSchema = z.object({
@@ -146,7 +143,6 @@ export const searchResponseSchema = z.object({
146143
})),
147144
repositoryInfo: z.array(repositoryInfoSchema),
148145
isSearchExhaustive: z.boolean(),
149-
isBranchFilteringEnabled: z.boolean().optional(), // Whether branch filtering is enabled for this search.
150146
});
151147

152148
export const repositoryQuerySchema = z.object({
@@ -199,7 +195,7 @@ export const serviceErrorSchema = z.object({
199195
});
200196

201197
export const searchCommitsRequestSchema = z.object({
202-
repoId: z.union([z.number(), z.string()]),
198+
repository: z.string(),
203199
query: z.string().optional(),
204200
since: z.string().optional(),
205201
until: z.string().optional(),

packages/shared/src/constants.server.ts

Lines changed: 0 additions & 7 deletions
This file was deleted.

packages/shared/src/env.server.ts

Lines changed: 70 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
1+
import { indexSchema } from "@sourcebot/schemas/v3/index.schema";
2+
import { SourcebotConfig } from "@sourcebot/schemas/v3/index.type";
13
import { createEnv } from "@t3-oss/env-core";
4+
import { Ajv } from "ajv";
5+
import { readFile } from 'fs/promises';
6+
import stripJsonComments from "strip-json-comments";
27
import { z } from "zod";
3-
import { loadConfig } from "./utils.js";
4-
import { tenancyModeSchema } from "./types.js";
5-
import { SourcebotConfig } from "@sourcebot/schemas/v3/index.type";
68
import { getTokenFromConfig } from "./crypto.js";
9+
import { tenancyModeSchema } from "./types.js";
710

811
// Booleans are specified as 'true' or 'false' strings.
912
const booleanSchema = z.enum(["true", "false"]);
@@ -13,6 +16,10 @@ const booleanSchema = z.enum(["true", "false"]);
1316
// @see: https://zod.dev/?id=coercion-for-primitives
1417
const numberSchema = z.coerce.number();
1518

19+
const ajv = new Ajv({
20+
validateFormats: false,
21+
});
22+
1623
export const resolveEnvironmentVariableOverridesFromConfig = async (config: SourcebotConfig): Promise<Record<string, string>> => {
1724
if (!config.environmentOverrides) {
1825
return {};
@@ -45,6 +52,66 @@ export const resolveEnvironmentVariableOverridesFromConfig = async (config: Sour
4552
return resolved;
4653
}
4754

55+
export const isRemotePath = (path: string) => {
56+
return path.startsWith('https://') || path.startsWith('http://');
57+
}
58+
59+
export const loadConfig = async (configPath?: string): Promise<SourcebotConfig> => {
60+
if (!configPath) {
61+
throw new Error('CONFIG_PATH is required but not provided');
62+
}
63+
64+
const configContent = await (async () => {
65+
if (isRemotePath(configPath)) {
66+
const response = await fetch(configPath);
67+
if (!response.ok) {
68+
throw new Error(`Failed to fetch config file ${configPath}: ${response.statusText}`);
69+
}
70+
return response.text();
71+
} else {
72+
// Retry logic for handling race conditions with mounted volumes
73+
const maxAttempts = 5;
74+
const retryDelayMs = 2000;
75+
let lastError: Error | null = null;
76+
77+
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
78+
try {
79+
return await readFile(configPath, {
80+
encoding: 'utf-8',
81+
});
82+
} catch (error) {
83+
lastError = error as Error;
84+
85+
// Only retry on ENOENT errors (file not found)
86+
if ((error as NodeJS.ErrnoException)?.code !== 'ENOENT') {
87+
throw error; // Throw immediately for non-ENOENT errors
88+
}
89+
90+
// Log warning before retry (except on the last attempt)
91+
if (attempt < maxAttempts) {
92+
console.warn(`Config file not found, retrying in 2s... (Attempt ${attempt}/${maxAttempts})`);
93+
await new Promise(resolve => setTimeout(resolve, retryDelayMs));
94+
}
95+
}
96+
}
97+
98+
// If we've exhausted all retries, throw the last ENOENT error
99+
if (lastError) {
100+
throw lastError;
101+
}
102+
103+
throw new Error('Failed to load config after all retry attempts');
104+
}
105+
})();
106+
107+
const config = JSON.parse(stripJsonComments(configContent)) as SourcebotConfig;
108+
const isValidConfig = ajv.validate(indexSchema, config);
109+
if (!isValidConfig) {
110+
throw new Error(`Config file '${configPath}' is invalid: ${ajv.errorsText(ajv.errors)}`);
111+
}
112+
return config;
113+
}
114+
48115
// Merge process.env with environment variables resolved from config.json
49116
const runtimeEnv = await (async () => {
50117
const configPath = process.env.CONFIG_PATH;

packages/shared/src/index.server.ts

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,19 +20,16 @@ export {
2020
} from "./types.js";
2121
export {
2222
base64Decode,
23-
loadConfig,
2423
loadJsonFile,
25-
isRemotePath,
2624
getConfigSettings,
25+
getRepoPath,
2726
} from "./utils.js";
2827
export * from "./constants.js";
29-
export {
30-
REPOS_CACHE_DIR,
31-
INDEX_CACHE_DIR,
32-
} from "./constants.server.js";
3328
export {
3429
env,
3530
resolveEnvironmentVariableOverridesFromConfig,
31+
loadConfig,
32+
isRemotePath,
3633
} from "./env.server.js";
3734
export {
3835
createLogger,

0 commit comments

Comments
 (0)