Skip to content

Commit cbbe225

Browse files
committed
feat: add ref (commit SHA) to search and symbol navigation results
1 parent 4ec87e1 commit cbbe225

11 files changed

Lines changed: 151 additions & 18 deletions

File tree

docs/api-reference/sourcebot-public.openapi.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,10 @@
338338
"type": "string"
339339
}
340340
},
341+
"ref": {
342+
"type": "string",
343+
"description": "The git ref/commit SHA of the file."
344+
},
341345
"content": {
342346
"type": "string"
343347
}
@@ -829,6 +833,10 @@
829833
"language": {
830834
"type": "string"
831835
},
836+
"ref": {
837+
"type": "string",
838+
"description": "The git ref/commit SHA of the file."
839+
},
832840
"matches": {
833841
"type": "array",
834842
"items": {

packages/backend/src/repoCompileUtils.ts

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import { BitbucketConnectionConfig, GerritConnectionConfig, GiteaConnectionConfi
1515
import { ProjectVisibility } from "azure-devops-node-api/interfaces/CoreInterfaces.js";
1616
import path from 'path';
1717
import fs from 'fs/promises';
18+
import { fileURLToPath } from 'node:url';
1819
import { glob } from 'glob';
1920
import { getLocalDefaultBranch, getOriginUrl, isPathAValidGitRepoRoot, isUrlAValidGitRepo } from './git.js';
2021
import assert from 'assert';
@@ -106,7 +107,7 @@ export const createGitHubRepoRecord = ({
106107
.replace(/^https?:\/\//, '');
107108

108109
const repoDisplayName = repo.full_name;
109-
const repoName = path.join(repoNameRoot, repoDisplayName);
110+
const repoName = path.posix.join(repoNameRoot, repoDisplayName);
110111
const cloneUrl = new URL(repo.clone_url!);
111112
const isPublic = repo.private === false;
112113

@@ -184,7 +185,7 @@ export const compileGitlabConfig = async (
184185
project.visibility === 'public' ||
185186
project.visibility === 'internal';
186187
const repoDisplayName = project.path_with_namespace;
187-
const repoName = path.join(repoNameRoot, repoDisplayName);
188+
const repoName = path.posix.join(repoNameRoot, repoDisplayName);
188189
// project.avatar_url is not directly accessible with tokens; use the avatar API endpoint if available
189190
const avatarUrl = project.avatar_url
190191
? new URL(`/api/v4/projects/${project.id}/avatar`, hostUrl).toString()
@@ -263,7 +264,7 @@ export const compileGiteaConfig = async (
263264
const cloneUrl = new URL(repo.clone_url!);
264265
cloneUrl.host = configUrl.host
265266
const repoDisplayName = repo.full_name!;
266-
const repoName = path.join(repoNameRoot, repoDisplayName);
267+
const repoName = path.posix.join(repoNameRoot, repoDisplayName);
267268
const isPublic = repo.internal === false && repo.private === false;
268269

269270
logger.debug(`Found gitea repo ${repoDisplayName} with webUrl: ${repo.html_url}`);
@@ -326,9 +327,9 @@ export const compileGerritConfig = async (
326327
.replace(/^https?:\/\//, '');
327328

328329
const repos = gerritRepos.map((project) => {
329-
const cloneUrl = new URL(path.join(hostUrl, encodeURIComponent(project.name)));
330+
const cloneUrl = new URL(path.posix.join(hostUrl, encodeURIComponent(project.name)));
330331
const repoDisplayName = project.name;
331-
const repoName = path.join(repoNameRoot, repoDisplayName);
332+
const repoName = path.posix.join(repoNameRoot, repoDisplayName);
332333

333334
const webUrl = (() => {
334335
if (!project.web_links || project.web_links.length === 0) {
@@ -344,7 +345,7 @@ export const compileGerritConfig = async (
344345
// https://github.com/GerritCodeReview/plugins_gitiles/blob/5ee7f57/src/main/java/com/googlesource/gerrit/plugins/gitiles/GitilesWeblinks.java#L50
345346
if (webUrl.startsWith('/plugins/gitiles/')) {
346347
logger.debug(`WebUrl is a gitiles path, joining with hostUrl: ${webUrl}`);
347-
return new URL(path.join(hostUrl, webUrl)).toString();
348+
return new URL(path.posix.join(hostUrl, webUrl)).toString();
348349
} else {
349350
logger.debug(`WebUrl is not a gitiles path, returning as is: ${webUrl}`);
350351
return webUrl;
@@ -499,7 +500,7 @@ export const compileBitbucketConfig = async (
499500
: (repo as BitbucketCloudRepository).is_private === false;
500501
const isArchived = isServer ? (repo as BitbucketServerRepository).archived === true : false;
501502
const isFork = isServer ? (repo as BitbucketServerRepository).origin !== undefined : (repo as BitbucketCloudRepository).parent !== undefined;
502-
const repoName = path.join(repoNameRoot, displayName);
503+
const repoName = path.posix.join(repoNameRoot, displayName);
503504
const cloneUrl = getCloneUrl(repo);
504505
const webUrl = getWebUrl(repo);
505506
const defaultBranch = isServer ? (repo as BitbucketServerRepository).defaultBranch : (repo as BitbucketCloudRepository).mainbranch?.name;
@@ -590,8 +591,15 @@ export const compileGenericGitHostConfig_file = async (
590591
const configUrl = new URL(config.url);
591592
assert(configUrl.protocol === 'file:', 'config.url must be a file:// URL');
592593

594+
let folderPath: string;
595+
try {
596+
folderPath = fileURLToPath(configUrl).replace(/\\/g, '/');
597+
} catch {
598+
folderPath = configUrl.pathname;
599+
}
600+
593601
// Resolve the glob pattern to a list of repo-paths
594-
const repoPaths = await glob(configUrl.pathname, {
602+
const repoPaths = await glob(folderPath, {
595603
absolute: true,
596604
});
597605

@@ -600,7 +608,7 @@ export const compileGenericGitHostConfig_file = async (
600608

601609
// Warn if the glob pattern matched no paths at all
602610
if (repoPaths.length === 0) {
603-
const warning = `No paths matched the pattern '${configUrl.pathname}'. Please verify the path exists and is accessible.`;
611+
const warning = `No paths matched the pattern '${folderPath}'. Please verify the path exists and is accessible.`;
604612
logger.warn(warning);
605613
warnings.push(warning);
606614
return {
@@ -609,7 +617,7 @@ export const compileGenericGitHostConfig_file = async (
609617
};
610618
}
611619

612-
logger.debug(`Found ${repoPaths.length} path(s) matching pattern '${configUrl.pathname}'`);
620+
logger.debug(`Found ${repoPaths.length} path(s) matching pattern '${folderPath}'`);
613621

614622
await Promise.all(repoPaths.map((repoPath) => gitOperationLimit(async () => {
615623
const stat = await fs.stat(repoPath).catch(() => null);
@@ -651,7 +659,7 @@ export const compileGenericGitHostConfig_file = async (
651659
const hostWithPort = extractHostWithPort(origin) ?? remoteUrl.host;
652660
// Decode URL-encoded characters (e.g., %20 -> space) to ensure consistent repo names
653661
const decodedPathname = decodeURIComponent(remoteUrl.pathname);
654-
const repoName = path.join(hostWithPort, decodedPathname.replace(/\.git$/, ''));
662+
const repoName = path.posix.join(hostWithPort, decodedPathname.replace(/\.git$/, ''));
655663

656664
const repo: RepoData = {
657665
external_codeHostType: 'genericGitHost',
@@ -723,7 +731,7 @@ export const compileGenericGitHostConfig_url = async (
723731

724732
// @note: matches the naming here:
725733
// https://github.com/sourcebot-dev/zoekt/blob/main/gitindex/index.go#L293
726-
const repoName = path.join(remoteUrl.host, remoteUrl.pathname.replace(/\.git$/, ''));
734+
const repoName = path.posix.join(remoteUrl.host, remoteUrl.pathname.replace(/\.git$/, ''));
727735

728736
const repo: RepoData = {
729737
external_codeHostType: 'genericGitHost',
@@ -787,7 +795,7 @@ export const compileAzureDevOpsConfig = async (
787795
}
788796

789797
const repoDisplayName = `${repo.project.name}/${repo.name}`;
790-
const repoName = path.join(repoNameRoot, repoDisplayName);
798+
const repoName = path.posix.join(repoNameRoot, repoDisplayName);
791799
const isPublic = repo.project.visibility === ProjectVisibility.Public;
792800

793801
if (!repo.remoteUrl) {

packages/backend/src/zoekt.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,15 @@ export const indexGitRepository = async (repo: Repo, settings: Settings, revisio
1717
const command = [
1818
'zoekt-git-index',
1919
'-allow_missing_branches',
20-
`-index ${INDEX_CACHE_DIR}`,
20+
`-index "${INDEX_CACHE_DIR}"`,
2121
`-max_trigram_count ${settings.maxTrigramCount}`,
2222
`-file_limit ${settings.maxFileSize}`,
2323
`-branches "${revisions.join(',')}"`,
2424
`-tenant_id ${repo.orgId}`,
2525
`-repo_id ${repo.id}`,
2626
`-shard_prefix_override ${shardPrefix}`,
2727
...largeFileGlobPatterns.map((pattern) => `-large_file "${pattern}"`),
28-
repoPath
28+
`"${repoPath}"`
2929
].join(' ');
3030

3131
return new Promise<{ stdout: string, stderr: string }>((resolve, reject) => {

packages/shared/src/utils.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import { readFile } from 'fs/promises';
22
import stripJsonComments from 'strip-json-comments';
33
import { z } from "zod";
4+
import { fileURLToPath } from 'node:url';
45
import { DEFAULT_CONFIG_SETTINGS } from "./constants.js";
56
import { ConfigSettings } from "./types.js";
67
import { Org, Repo } from "@sourcebot/db";
@@ -108,7 +109,7 @@ export const getRepoPath = (repo: Repo): { path: string, isReadOnly: boolean } =
108109
const cloneUrl = new URL(repo.cloneUrl);
109110
if (repo.external_codeHostType === 'genericGitHost' && cloneUrl.protocol === 'file:') {
110111
return {
111-
path: cloneUrl.pathname,
112+
path: fileURLToPath(cloneUrl),
112113
isReadOnly: true,
113114
}
114115
}
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
import { describe, it, expect, vi, beforeEach } from 'vitest';
2+
3+
vi.mock('server-only', () => ({}));
4+
5+
import { findSearchBasedSymbolReferences, findSearchBasedSymbolDefinitions } from './api';
6+
import { search } from '@/features/search';
7+
8+
vi.mock('@/features/search', () => ({
9+
search: vi.fn(),
10+
}));
11+
12+
vi.mock('@/middleware/withAuth', () => ({
13+
withOptionalAuth: (fn: any) => fn(),
14+
}));
15+
16+
vi.mock('@/middleware/sew', () => ({
17+
sew: (fn: any) => fn(),
18+
}));
19+
20+
const MOCK_SEARCH_RESPONSE = {
21+
stats: {
22+
actualMatchCount: 1,
23+
totalMatchCount: 1,
24+
duration: 100,
25+
fileCount: 1,
26+
filesSkipped: 0,
27+
contentBytesLoaded: 100,
28+
indexBytesLoaded: 100,
29+
crashes: 0,
30+
shardFilesConsidered: 1,
31+
filesConsidered: 1,
32+
filesLoaded: 1,
33+
shardsScanned: 1,
34+
shardsSkipped: 0,
35+
shardsSkippedFilter: 0,
36+
ngramMatches: 1,
37+
ngramLookups: 1,
38+
wait: 0,
39+
matchTreeConstruction: 10,
40+
matchTreeSearch: 90,
41+
regexpsConsidered: 0,
42+
flushReason: 'FLUSH_REASON_FINAL_FLUSH',
43+
},
44+
files: [
45+
{
46+
fileName: {
47+
text: 'src/index.ts',
48+
matchRanges: [],
49+
},
50+
repository: 'github.com/owner/repo',
51+
repositoryId: 123,
52+
webUrl: 'https://sourcebot.example.com/browse/github.com/owner/repo/blob/main/src/index.ts',
53+
language: 'TypeScript',
54+
ref: 'abcdef1234567890',
55+
chunks: [
56+
{
57+
content: 'const a = 1;',
58+
matchRanges: [
59+
{
60+
start: { byteOffset: 0, lineNumber: 1, column: 1 },
61+
end: { byteOffset: 12, lineNumber: 1, column: 13 },
62+
}
63+
],
64+
}
65+
],
66+
branches: ['main'],
67+
}
68+
],
69+
repositoryInfo: [],
70+
isSearchExhaustive: true,
71+
};
72+
73+
describe('CodeNav Search-Based APIs', () => {
74+
beforeEach(() => {
75+
vi.clearAllMocks();
76+
vi.mocked(search).mockResolvedValue(MOCK_SEARCH_RESPONSE as any);
77+
});
78+
79+
describe('findSearchBasedSymbolReferences', () => {
80+
it('includes the ref (commit SHA) in the returned file results', async () => {
81+
const result = await findSearchBasedSymbolReferences({
82+
symbolName: 'mySymbol',
83+
repoName: 'github.com/owner/repo',
84+
revisionName: 'HEAD',
85+
});
86+
87+
expect(isServiceError(result)).toBe(false);
88+
const response = result as any;
89+
expect(response.files).toHaveLength(1);
90+
expect(response.files[0].ref).toBe('abcdef1234567890');
91+
});
92+
});
93+
94+
describe('findSearchBasedSymbolDefinitions', () => {
95+
it('includes the ref (commit SHA) in the returned file results', async () => {
96+
const result = await findSearchBasedSymbolDefinitions({
97+
symbolName: 'mySymbol',
98+
repoName: 'github.com/owner/repo',
99+
revisionName: 'HEAD',
100+
});
101+
102+
expect(isServiceError(result)).toBe(false);
103+
const response = result as any;
104+
expect(response.files).toHaveLength(1);
105+
expect(response.files[0].ref).toBe('abcdef1234567890');
106+
});
107+
});
108+
});
109+
110+
function isServiceError(obj: any): boolean {
111+
return obj && typeof obj === 'object' && 'errorCode' in obj;
112+
}

packages/web/src/features/codeNav/api.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ const parseRelatedSymbolsSearchResponse = (searchResult: SearchResponse): FindRe
138138
repositoryId: file.repositoryId,
139139
webUrl: file.webUrl,
140140
language: file.language,
141+
ref: file.ref,
141142
matches: chunks.flatMap((chunk) => {
142143
return chunk.matchRanges.map((range) => ({
143144
lineContent: chunk.content,

packages/web/src/features/codeNav/types.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ export const findRelatedSymbolsResponseSchema = z.object({
2727
repositoryId: z.number(),
2828
webUrl: z.string(),
2929
language: z.string(),
30+
ref: z.string().optional().describe('The git ref/commit SHA of the file.'),
3031
matches: z.array(z.object({
3132
lineContent: z.string(),
3233
range: rangeSchema,

packages/web/src/features/search/types.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ export const searchFileSchema = z.object({
7878
})).optional(),
7979
})),
8080
branches: z.array(z.string()).optional(),
81+
ref: z.string().optional().describe('The git ref/commit SHA of the file.'),
8182
// Set if `whole` is true.
8283
content: z.string().optional(),
8384
});

packages/web/src/features/search/zoektSearcher.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,7 @@ const transformZoektSearchResponse = async (response: ZoektGrpcSearchResponse, r
465465
}
466466
}),
467467
branches: file.branches,
468+
ref: file.version || undefined,
468469
content: file.content ? file.content.toString('utf-8') : undefined,
469470
}
470471
}).filter(file => file !== undefined);

packages/web/src/features/tools/findSymbolDefinitions.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ export const findSymbolDefinitionsDefinition: ToolDefinition<
7171
files: response.files.map((file) => ({
7272
fileName: file.fileName,
7373
repo: file.repository,
74-
revision,
74+
revision: file.ref ?? revision,
7575
})),
7676
};
7777

0 commit comments

Comments
 (0)