Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [4.10.0] - 2025-11-24

### Added
- Added temporal filtering to search and repository APIs with support for git branch/revision filtering and repository index date filtering (since/until parameters). Supports both ISO 8601 and relative date formats (e.g., "30 days ago", "last week").
- Added support for streaming code search results. [#623](https://github.com/sourcebot-dev/sourcebot/pull/623)
- Added buttons to toggle case sensitivity and regex patterns. [#623](https://github.com/sourcebot-dev/sourcebot/pull/623)
- Added counts to members, requets, and invites tabs in the members settings. [#621](https://github.com/sourcebot-dev/sourcebot/pull/621)
Expand Down
4 changes: 2 additions & 2 deletions packages/backend/src/repoIndexManager.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import * as Sentry from '@sentry/node';
import { PrismaClient, Repo, RepoIndexingJobStatus, RepoIndexingJobType } from "@sourcebot/db";
import { createLogger, Logger } from "@sourcebot/shared";
import { env, RepoIndexingJobMetadata, repoIndexingJobMetadataSchema, RepoMetadata, repoMetadataSchema } from '@sourcebot/shared';
import { env, RepoIndexingJobMetadata, repoIndexingJobMetadataSchema, RepoMetadata, repoMetadataSchema, getRepoPath } from '@sourcebot/shared';
import { existsSync } from 'fs';
import { readdir, rm } from 'fs/promises';
import { Job, Queue, ReservedJob, Worker } from "groupmq";
Expand All @@ -12,7 +12,7 @@ import { cloneRepository, fetchRepository, getBranches, getCommitHashForRefName,
import { captureEvent } from './posthog.js';
import { PromClient } from './promClient.js';
import { RepoWithConnections, Settings } from "./types.js";
import { getAuthCredentialsForRepo, getRepoPath, getShardPrefix, groupmqLifecycleExceptionWrapper, measure, setIntervalAsync } from './utils.js';
import { getAuthCredentialsForRepo, getShardPrefix, groupmqLifecycleExceptionWrapper, measure, setIntervalAsync } from './utils.js';
import { indexGitRepository } from './zoekt.js';

const LOG_TAG = 'repo-index-manager';
Expand Down
19 changes: 0 additions & 19 deletions packages/backend/src/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,25 +53,6 @@ export const arraysEqualShallow = <T>(a?: readonly T[], b?: readonly T[]) => {
return true;
}

// @note: this function is duplicated in `packages/web/src/features/fileTree/actions.ts`.
// @todo: we should move this to a shared package.
export const getRepoPath = (repo: Repo): { path: string, isReadOnly: boolean } => {
// If we are dealing with a local repository, then use that as the path.
// Mark as read-only since we aren't guaranteed to have write access to the local filesystem.
const cloneUrl = new URL(repo.cloneUrl);
if (repo.external_codeHostType === 'genericGitHost' && cloneUrl.protocol === 'file:') {
return {
path: cloneUrl.pathname,
isReadOnly: true,
}
}

return {
path: path.join(REPOS_CACHE_DIR, repo.id.toString()),
isReadOnly: false,
}
}

export const getShardPrefix = (orgId: number, repoId: number) => {
return `${orgId}_${repoId}`;
}
Expand Down
4 changes: 2 additions & 2 deletions packages/backend/src/zoekt.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import { Repo } from "@sourcebot/db";
import { createLogger, env } from "@sourcebot/shared";
import { createLogger, env, getRepoPath } from "@sourcebot/shared";
import { exec } from "child_process";
import { INDEX_CACHE_DIR } from "./constants.js";
import { Settings } from "./types.js";
import { getRepoPath, getShardPrefix } from "./utils.js";
import { getShardPrefix } from "./utils.js";

const logger = createLogger('zoekt');

Expand Down
16 changes: 16 additions & 0 deletions packages/mcp/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Added
- Added comprehensive relative date support for all temporal parameters (e.g., "30 days ago", "last week", "yesterday")
- Added `search_commits` tool to search commits by actual commit time with full temporal filtering. Accepts both numeric database IDs (e.g., 123) and string repository names (e.g., "github.com/owner/repo") for the `repoId` parameter, allowing direct use of repository names from `list_repos` output
- Added `since`/`until` parameters to `search_code` (filters by index time - when Sourcebot indexed the repo)
- Added `gitRevision` parameter to `search_code`
- Added `activeAfter`/`activeBefore` parameters to `list_repos` (filters by index time - when Sourcebot indexed the repo)
- Added date range validation to prevent invalid date ranges (since > until)
- Added 30-second timeout for git operations to handle large repositories
- Added enhanced error messages for git operations (timeout, repository not found, invalid git repository, ambiguous arguments)
- Added clarification that repositories must be cloned on Sourcebot server disk for `search_commits` to work
- Added comprehensive temporal parameter documentation to README with clear distinction between index time and commit time filtering
- Added comprehensive unit tests for date parsing utilities (90+ test cases)
- Added unit tests for git commit search functionality with mocking
- Added integration tests for temporal parameter validation
- Added unit tests for repository identifier resolution (both string and number types)

## [1.0.12] - 2026-01-13

### Fixed
Expand Down
52 changes: 47 additions & 5 deletions packages/mcp/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,8 @@ For a more detailed guide, checkout [the docs](https://docs.sourcebot.dev/docs/f

Fetches code that matches the provided regex pattern in `query`.

**Temporal Filtering**: Use `since` and `until` to filter by repository index time (when Sourcebot last indexed the repo). This is different from commit time. See `search_commits` for commit-time filtering.

<details>
<summary>Parameters</summary>

Expand All @@ -176,6 +178,9 @@ Fetches code that matches the provided regex pattern in `query`.
| `filterByLanguages` | no | Restrict search to specific languages (GitHub linguist format, e.g., Python, JavaScript). |
| `caseSensitive` | no | Case sensitive search (default: false). |
| `includeCodeSnippets` | no | Include code snippets in results (default: false). |
| `gitRevision` | no | Git revision to search (e.g., 'main', 'develop', 'v1.0.0'). Defaults to HEAD. |
| `since` | no | Only search repos indexed after this date. Supports ISO 8601 or relative (e.g., "30 days ago"). |
| `until` | no | Only search repos indexed before this date. Supports ISO 8601 or relative (e.g., "yesterday"). |
| `maxTokens` | no | Max tokens to return (default: env.DEFAULT_MINIMUM_TOKENS). |
</details>

Expand All @@ -184,14 +189,18 @@ Fetches code that matches the provided regex pattern in `query`.

Lists repositories indexed by Sourcebot with optional filtering and pagination.

**Temporal Filtering**: Use `activeAfter` and `activeBefore` to filter by repository index time (when Sourcebot last indexed the repo). This is the same filtering behavior as `search_code`'s `since`/`until` parameters.

<details>
<summary>Parameters</summary>

| Name | Required | Description |
|:-------------|:---------|:--------------------------------------------------------------------|
| `query` | no | Filter repositories by name (case-insensitive). |
| `pageNumber` | no | Page number (1-indexed, default: 1). |
| `limit` | no | Number of repositories per page (default: 50). |
| Name | Required | Description |
|:----------------|:---------|:-----------------------------------------------------------------------------------------------|
| `query` | no | Filter repositories by name (case-insensitive). |
| `pageNumber` | no | Page number (1-indexed, default: 1). |
| `limit` | no | Number of repositories per page (default: 50). |
| `activeAfter` | no | Only return repos indexed after this date. Supports ISO 8601 or relative (e.g., "30 days ago"). |
| `activeBefore` | no | Only return repos indexed before this date. Supports ISO 8601 or relative (e.g., "yesterday"). |

</details>

Expand All @@ -208,6 +217,39 @@ Fetches the source code for a given file.
| `repoId` | yes | The Sourcebot repository ID. |
</details>

### search_commits

Searches for commits in a specific repository based on actual commit time (NOT index time).

**Requirements**: Repository must be cloned on the Sourcebot server disk. Sourcebot automatically clones repositories during indexing, but the cloning process may not be finished when this query is executed. Use `list_repos` first to get the repository ID.

**Date Formats**: Supports ISO 8601 dates (e.g., "2024-01-01") and relative formats (e.g., "30 days ago", "last week", "yesterday").

<details>
<summary>Parameters</summary>

| Name | Required | Description |
|:-----------|:---------|:-----------------------------------------------------------------------------------------------|
| `repoId` | yes | Repository identifier: either numeric database ID (e.g., 123) or full repository name (e.g., "github.com/owner/repo") as returned by `list_repos`. |
| `query` | no | Search query to filter commits by message (case-insensitive). |
| `since` | no | Show commits after this date (by commit time). Supports ISO 8601 or relative formats. |
| `until` | no | Show commits before this date (by commit time). Supports ISO 8601 or relative formats. |
| `author` | no | Filter by author name or email (supports partial matches). |
| `maxCount` | no | Maximum number of commits to return (default: 50). |

</details>

## Date Format Examples

All temporal parameters support:
- **ISO 8601**: `"2024-01-01"`, `"2024-12-31T23:59:59Z"`
- **Relative dates**: `"30 days ago"`, `"1 week ago"`, `"last month"`, `"yesterday"`

**Important**: Different tools filter by different time dimensions:
- `search_code` `since`/`until`: Filters by **index time** (when Sourcebot indexed the repo)
- `list_repos` `activeAfter`/`activeBefore`: Filters by **index time** (when Sourcebot indexed the repo)
- `search_commits` `since`/`until`: Filters by **commit time** (actual git commit dates)


## Supported Code Hosts
Sourcebot supports the following code hosts:
Expand Down
26 changes: 23 additions & 3 deletions packages/mcp/src/client.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { env } from './env.js';
import { listRepositoriesResponseSchema, searchResponseSchema, fileSourceResponseSchema } from './schemas.js';
import { FileSourceRequest, FileSourceResponse, ListRepositoriesResponse, SearchRequest, SearchResponse, ServiceError } from './types.js';
import { listRepositoriesResponseSchema, searchResponseSchema, fileSourceResponseSchema, searchCommitsResponseSchema } from './schemas.js';
import { FileSourceRequest, FileSourceResponse, ListRepositoriesResponse, SearchRequest, SearchResponse, ServiceError, SearchCommitsRequest, SearchCommitsResponse } from './types.js';
import { isServiceError } from './utils.js';

export const search = async (request: SearchRequest): Promise<SearchResponse | ServiceError> => {
Expand All @@ -21,7 +21,9 @@ export const search = async (request: SearchRequest): Promise<SearchResponse | S
}

export const listRepos = async (): Promise<ListRepositoriesResponse | ServiceError> => {
const result = await fetch(`${env.SOURCEBOT_HOST}/api/repos`, {
const url = new URL(`${env.SOURCEBOT_HOST}/api/repos`);

const result = await fetch(url.toString(), {
method: 'GET',
headers: {
'Content-Type': 'application/json',
Expand Down Expand Up @@ -52,3 +54,21 @@ export const getFileSource = async (request: FileSourceRequest): Promise<FileSou

return fileSourceResponseSchema.parse(result);
}

export const searchCommits = async (request: SearchCommitsRequest): Promise<SearchCommitsResponse | ServiceError> => {
const result = await fetch(`${env.SOURCEBOT_HOST}/api/commits`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'X-Org-Domain': '~',
...(env.SOURCEBOT_API_KEY ? { 'X-Sourcebot-Api-Key': env.SOURCEBOT_API_KEY } : {})
},
body: JSON.stringify(request)
}).then(response => response.json());
Comment thread
brendan-kellam marked this conversation as resolved.

if (isServiceError(result)) {
return result;
}

return searchCommitsResponseSchema.parse(result);
}
49 changes: 46 additions & 3 deletions packages/mcp/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
import escapeStringRegexp from 'escape-string-regexp';
import { z } from 'zod';
import { listRepos, search, getFileSource } from './client.js';
import { getFileSource, listRepos, search, searchCommits } from './client.js';
import { env, numberSchema } from './env.js';
import { listReposRequestSchema } from './schemas.js';
import { TextContent } from './types.js';
Expand Down Expand Up @@ -49,6 +49,10 @@ server.tool(
.boolean()
.describe(`Whether to include the code snippets in the response (default: false). If false, only the file's URL, repository, and language will be returned. Set to false to get a more concise response.`)
.optional(),
gitRevision: z
.string()
.describe(`The git revision to search in (e.g., 'main', 'HEAD', 'v1.0.0', 'a1b2c3d'). If not provided, defaults to the default branch (usually 'main' or 'master').`)
.optional(),
maxTokens: numberSchema
.describe(`The maximum number of tokens to return (default: ${env.DEFAULT_MINIMUM_TOKENS}). Higher values provide more context but consume more tokens. Values less than ${env.DEFAULT_MINIMUM_TOKENS} will be ignored.`)
.transform((val) => (val < env.DEFAULT_MINIMUM_TOKENS ? env.DEFAULT_MINIMUM_TOKENS : val))
Expand All @@ -61,6 +65,7 @@ server.tool(
maxTokens = env.DEFAULT_MINIMUM_TOKENS,
includeCodeSnippets = false,
caseSensitive = false,
gitRevision,
}) => {
if (repoIds.length > 0) {
query += ` ( repo:${repoIds.map(id => escapeStringRegexp(id)).join(' or repo:')} )`;
Expand All @@ -70,13 +75,17 @@ server.tool(
query += ` ( lang:${languages.join(' or lang:')} )`;
}

if (gitRevision) {
query += ` ( rev:${gitRevision} )`;
}
Comment thread
brendan-kellam marked this conversation as resolved.

const response = await search({
query,
matches: env.DEFAULT_MATCHES,
contextLines: env.DEFAULT_CONTEXT_LINES,
isRegexEnabled: true,
isCaseSensitivityEnabled: caseSensitive,
source: 'mcp'
source: 'mcp',
});

if (isServiceError(response)) {
Expand Down Expand Up @@ -162,9 +171,43 @@ server.tool(
}
);

server.tool(
"search_commits",
`Searches for commits in a specific repository based on actual commit time. If you receive an error that indicates that you're not authenticated, please inform the user to set the SOURCEBOT_API_KEY environment variable.`,
{
repoId: z.string().describe(`The repository to search commits in. This is the Sourcebot compatible repository ID as returned by 'list_repos'.`),
query: z.string().describe(`Search query to filter commits by message content (case-insensitive).`).optional(),
since: z.string().describe(`Show commits more recent than this date. Filters by actual commit time. Supports ISO 8601 (e.g., '2024-01-01') or relative formats (e.g., '30 days ago', 'last week').`).optional(),
until: z.string().describe(`Show commits older than this date. Filters by actual commit time. Supports ISO 8601 (e.g., '2024-12-31') or relative formats (e.g., 'yesterday').`).optional(),
author: z.string().describe(`Filter commits by author name or email (supports partial matches and patterns).`).optional(),
maxCount: z.number().int().positive().default(50).describe(`Maximum number of commits to return (default: 50).`),
},
async ({ repoId, query, since, until, author, maxCount }) => {
const result = await searchCommits({
repository: repoId,
query,
since,
until,
author,
maxCount,
});

if (isServiceError(result)) {
return {
content: [{ type: "text", text: `Error: ${result.message}` }],
isError: true,
};
}

return {
content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
};
}
);

server.tool(
"list_repos",
"Lists repositories in the organization with optional filtering and pagination. If you receive an error that indicates that you're not authenticated, please inform the user to set the SOURCEBOT_API_KEY environment variable.",
`Lists repositories in the organization with optional filtering and pagination. If you receive an error that indicates that you're not authenticated, please inform the user to set the SOURCEBOT_API_KEY environment variable.`,
listReposRequestSchema.shape,
async ({ query, pageNumber = 1, limit = 50 }: {
query?: string;
Expand Down
23 changes: 22 additions & 1 deletion packages/mcp/src/schemas.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// @NOTE : Please keep this file in sync with @sourcebot/web/src/features/search/schemas.ts
// @NOTE : Please keep this file in sync with @sourcebot/web/src/features/search/types.ts
// At some point, we should move these to a shared package...
import { z } from "zod";

Expand Down Expand Up @@ -193,3 +193,24 @@ export const serviceErrorSchema = z.object({
errorCode: z.string(),
message: z.string(),
});

export const searchCommitsRequestSchema = z.object({
repository: z.string(),
query: z.string().optional(),
since: z.string().optional(),
until: z.string().optional(),
author: z.string().optional(),
maxCount: z.number().int().positive().max(500).optional(),
});

export const commitSchema = z.object({
hash: z.string(),
date: z.string(),
message: z.string(),
refs: z.string(),
body: z.string(),
author_name: z.string(),
author_email: z.string(),
});

export const searchCommitsResponseSchema = z.array(commitSchema);
5 changes: 5 additions & 0 deletions packages/mcp/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ import {
fileSourceRequestSchema,
symbolSchema,
serviceErrorSchema,
searchCommitsRequestSchema,
searchCommitsResponseSchema,
} from "./schemas.js";
import { z } from "zod";

Expand All @@ -29,3 +31,6 @@ export type FileSourceResponse = z.infer<typeof fileSourceResponseSchema>;
export type TextContent = { type: "text", text: string };

export type ServiceError = z.infer<typeof serviceErrorSchema>;

export type SearchCommitsRequest = z.infer<typeof searchCommitsRequestSchema>;
export type SearchCommitsResponse = z.infer<typeof searchCommitsResponseSchema>;
Loading