Skip to content

Commit 22700d9

Browse files
committed
feat: add ref (commit SHA) to search and symbol navigation results
1 parent 4ec87e1 commit 22700d9

11 files changed

Lines changed: 217 additions & 18 deletions

File tree

docs/api-reference/sourcebot-public.openapi.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,10 @@
338338
"type": "string"
339339
}
340340
},
341+
"ref": {
342+
"type": "string",
343+
"description": "The git ref/commit SHA of the file."
344+
},
341345
"content": {
342346
"type": "string"
343347
}
@@ -829,6 +833,10 @@
829833
"language": {
830834
"type": "string"
831835
},
836+
"ref": {
837+
"type": "string",
838+
"description": "The git ref/commit SHA of the file."
839+
},
832840
"matches": {
833841
"type": "array",
834842
"items": {

packages/backend/src/repoCompileUtils.ts

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import { BitbucketConnectionConfig, GerritConnectionConfig, GiteaConnectionConfi
1515
import { ProjectVisibility } from "azure-devops-node-api/interfaces/CoreInterfaces.js";
1616
import path from 'path';
1717
import fs from 'fs/promises';
18+
import { fileURLToPath } from 'node:url';
1819
import { glob } from 'glob';
1920
import { getLocalDefaultBranch, getOriginUrl, isPathAValidGitRepoRoot, isUrlAValidGitRepo } from './git.js';
2021
import assert from 'assert';
@@ -106,7 +107,7 @@ export const createGitHubRepoRecord = ({
106107
.replace(/^https?:\/\//, '');
107108

108109
const repoDisplayName = repo.full_name;
109-
const repoName = path.join(repoNameRoot, repoDisplayName);
110+
const repoName = path.posix.join(repoNameRoot, repoDisplayName);
110111
const cloneUrl = new URL(repo.clone_url!);
111112
const isPublic = repo.private === false;
112113

@@ -184,7 +185,7 @@ export const compileGitlabConfig = async (
184185
project.visibility === 'public' ||
185186
project.visibility === 'internal';
186187
const repoDisplayName = project.path_with_namespace;
187-
const repoName = path.join(repoNameRoot, repoDisplayName);
188+
const repoName = path.posix.join(repoNameRoot, repoDisplayName);
188189
// project.avatar_url is not directly accessible with tokens; use the avatar API endpoint if available
189190
const avatarUrl = project.avatar_url
190191
? new URL(`/api/v4/projects/${project.id}/avatar`, hostUrl).toString()
@@ -263,7 +264,7 @@ export const compileGiteaConfig = async (
263264
const cloneUrl = new URL(repo.clone_url!);
264265
cloneUrl.host = configUrl.host
265266
const repoDisplayName = repo.full_name!;
266-
const repoName = path.join(repoNameRoot, repoDisplayName);
267+
const repoName = path.posix.join(repoNameRoot, repoDisplayName);
267268
const isPublic = repo.internal === false && repo.private === false;
268269

269270
logger.debug(`Found gitea repo ${repoDisplayName} with webUrl: ${repo.html_url}`);
@@ -326,9 +327,9 @@ export const compileGerritConfig = async (
326327
.replace(/^https?:\/\//, '');
327328

328329
const repos = gerritRepos.map((project) => {
329-
const cloneUrl = new URL(path.join(hostUrl, encodeURIComponent(project.name)));
330+
const cloneUrl = new URL(path.posix.join(hostUrl, encodeURIComponent(project.name)));
330331
const repoDisplayName = project.name;
331-
const repoName = path.join(repoNameRoot, repoDisplayName);
332+
const repoName = path.posix.join(repoNameRoot, repoDisplayName);
332333

333334
const webUrl = (() => {
334335
if (!project.web_links || project.web_links.length === 0) {
@@ -344,7 +345,7 @@ export const compileGerritConfig = async (
344345
// https://github.com/GerritCodeReview/plugins_gitiles/blob/5ee7f57/src/main/java/com/googlesource/gerrit/plugins/gitiles/GitilesWeblinks.java#L50
345346
if (webUrl.startsWith('/plugins/gitiles/')) {
346347
logger.debug(`WebUrl is a gitiles path, joining with hostUrl: ${webUrl}`);
347-
return new URL(path.join(hostUrl, webUrl)).toString();
348+
return new URL(path.posix.join(hostUrl, webUrl)).toString();
348349
} else {
349350
logger.debug(`WebUrl is not a gitiles path, returning as is: ${webUrl}`);
350351
return webUrl;
@@ -499,7 +500,7 @@ export const compileBitbucketConfig = async (
499500
: (repo as BitbucketCloudRepository).is_private === false;
500501
const isArchived = isServer ? (repo as BitbucketServerRepository).archived === true : false;
501502
const isFork = isServer ? (repo as BitbucketServerRepository).origin !== undefined : (repo as BitbucketCloudRepository).parent !== undefined;
502-
const repoName = path.join(repoNameRoot, displayName);
503+
const repoName = path.posix.join(repoNameRoot, displayName);
503504
const cloneUrl = getCloneUrl(repo);
504505
const webUrl = getWebUrl(repo);
505506
const defaultBranch = isServer ? (repo as BitbucketServerRepository).defaultBranch : (repo as BitbucketCloudRepository).mainbranch?.name;
@@ -583,15 +584,32 @@ export const compileGenericGitHostConfig = async (
583584
}
584585
}
585586

587+
/**
588+
* Compiles a generic git host configuration backed by a local `file://` URL.
589+
* Resolves the file URL to a native filesystem path using `fileURLToPath` (with a safe fallback),
590+
* applies glob matching, validates each matched path as a git repository root, and produces
591+
* repository records with POSIX-normalized names.
592+
*
593+
* @param config - The generic git host connection configuration with a `file://` URL.
594+
* @param connectionId - The database ID of the connection record.
595+
* @returns A CompileResult containing the resolved repository data and any warnings.
596+
*/
586597
export const compileGenericGitHostConfig_file = async (
587598
config: GenericGitHostConnectionConfig,
588599
connectionId: number,
589600
): Promise<CompileResult> => {
590601
const configUrl = new URL(config.url);
591602
assert(configUrl.protocol === 'file:', 'config.url must be a file:// URL');
592603

604+
let folderPath: string;
605+
try {
606+
folderPath = fileURLToPath(configUrl).replace(/\\/g, '/');
607+
} catch {
608+
folderPath = configUrl.pathname;
609+
}
610+
593611
// Resolve the glob pattern to a list of repo-paths
594-
const repoPaths = await glob(configUrl.pathname, {
612+
const repoPaths = await glob(folderPath, {
595613
absolute: true,
596614
});
597615

@@ -600,7 +618,7 @@ export const compileGenericGitHostConfig_file = async (
600618

601619
// Warn if the glob pattern matched no paths at all
602620
if (repoPaths.length === 0) {
603-
const warning = `No paths matched the pattern '${configUrl.pathname}'. Please verify the path exists and is accessible.`;
621+
const warning = `No paths matched the pattern '${folderPath}'. Please verify the path exists and is accessible.`;
604622
logger.warn(warning);
605623
warnings.push(warning);
606624
return {
@@ -609,7 +627,7 @@ export const compileGenericGitHostConfig_file = async (
609627
};
610628
}
611629

612-
logger.debug(`Found ${repoPaths.length} path(s) matching pattern '${configUrl.pathname}'`);
630+
logger.debug(`Found ${repoPaths.length} path(s) matching pattern '${folderPath}'`);
613631

614632
await Promise.all(repoPaths.map((repoPath) => gitOperationLimit(async () => {
615633
const stat = await fs.stat(repoPath).catch(() => null);
@@ -651,7 +669,7 @@ export const compileGenericGitHostConfig_file = async (
651669
const hostWithPort = extractHostWithPort(origin) ?? remoteUrl.host;
652670
// Decode URL-encoded characters (e.g., %20 -> space) to ensure consistent repo names
653671
const decodedPathname = decodeURIComponent(remoteUrl.pathname);
654-
const repoName = path.join(hostWithPort, decodedPathname.replace(/\.git$/, ''));
672+
const repoName = path.posix.join(hostWithPort, decodedPathname.replace(/\.git$/, ''));
655673

656674
const repo: RepoData = {
657675
external_codeHostType: 'genericGitHost',
@@ -723,7 +741,7 @@ export const compileGenericGitHostConfig_url = async (
723741

724742
// @note: matches the naming here:
725743
// https://github.com/sourcebot-dev/zoekt/blob/main/gitindex/index.go#L293
726-
const repoName = path.join(remoteUrl.host, remoteUrl.pathname.replace(/\.git$/, ''));
744+
const repoName = path.posix.join(remoteUrl.host, remoteUrl.pathname.replace(/\.git$/, ''));
727745

728746
const repo: RepoData = {
729747
external_codeHostType: 'genericGitHost',
@@ -787,7 +805,7 @@ export const compileAzureDevOpsConfig = async (
787805
}
788806

789807
const repoDisplayName = `${repo.project.name}/${repo.name}`;
790-
const repoName = path.join(repoNameRoot, repoDisplayName);
808+
const repoName = path.posix.join(repoNameRoot, repoDisplayName);
791809
const isPublic = repo.project.visibility === ProjectVisibility.Public;
792810

793811
if (!repo.remoteUrl) {

packages/backend/src/zoekt.ts

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,16 @@ import { getShardPrefix } from "./utils.js";
88

99
const logger = createLogger('zoekt');
1010

11+
/**
12+
* Indexes a git repository using Zoekt (zoekt-git-index CLI).
13+
* Standardizes index directory path and repository path parsing for Windows OS.
14+
*
15+
* @param repo - The repository database record to index.
16+
* @param settings - The global or custom configuration settings for the indexing limits.
17+
* @param revisions - The git branch/revision references to index.
18+
* @param signal - Optional AbortSignal to cancel the indexing process.
19+
* @returns A promise that resolves to stdout/stderr of the zoekt process.
20+
*/
1121
export const indexGitRepository = async (repo: Repo, settings: Settings, revisions: string[], signal?: AbortSignal) => {
1222
const { path: repoPath } = getRepoPath(repo);
1323
const shardPrefix = getShardPrefix(repo.orgId, repo.id);
@@ -17,15 +27,15 @@ export const indexGitRepository = async (repo: Repo, settings: Settings, revisio
1727
const command = [
1828
'zoekt-git-index',
1929
'-allow_missing_branches',
20-
`-index ${INDEX_CACHE_DIR}`,
30+
`-index "${INDEX_CACHE_DIR}"`,
2131
`-max_trigram_count ${settings.maxTrigramCount}`,
2232
`-file_limit ${settings.maxFileSize}`,
2333
`-branches "${revisions.join(',')}"`,
2434
`-tenant_id ${repo.orgId}`,
2535
`-repo_id ${repo.id}`,
2636
`-shard_prefix_override ${shardPrefix}`,
2737
...largeFileGlobPatterns.map((pattern) => `-large_file "${pattern}"`),
28-
repoPath
38+
`"${repoPath}"`
2939
].join(' ');
3040

3141
return new Promise<{ stdout: string, stderr: string }>((resolve, reject) => {

packages/shared/src/utils.ts

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import { readFile } from 'fs/promises';
22
import stripJsonComments from 'strip-json-comments';
33
import { z } from "zod";
4+
import { fileURLToPath } from 'node:url';
45
import { DEFAULT_CONFIG_SETTINGS } from "./constants.js";
56
import { ConfigSettings } from "./types.js";
67
import { Org, Repo } from "@sourcebot/db";
@@ -102,13 +103,20 @@ export const getRepoIdFromPath = (repoPath: string): number | undefined => {
102103
return isNaN(id) ? undefined : id;
103104
}
104105

106+
/**
107+
* Resolves the filesystem path for a given repository.
108+
* If the repository is a local generic git host (cloned via file://), the path is decoded properly.
109+
*
110+
* @param repo - The repository record from the database.
111+
* @returns An object containing the absolute path to the repository and whether it should be treated as read-only.
112+
*/
105113
export const getRepoPath = (repo: Repo): { path: string, isReadOnly: boolean } => {
106114
// If we are dealing with a local repository, then use that as the path.
107115
// Mark as read-only since we aren't guaranteed to have write access to the local filesystem.
108116
const cloneUrl = new URL(repo.cloneUrl);
109117
if (repo.external_codeHostType === 'genericGitHost' && cloneUrl.protocol === 'file:') {
110118
return {
111-
path: cloneUrl.pathname,
119+
path: fileURLToPath(cloneUrl),
112120
isReadOnly: true,
113121
}
114122
}
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
import { describe, it, expect, vi, beforeEach } from 'vitest';
2+
3+
vi.mock('server-only', () => ({}));
4+
5+
import { findSearchBasedSymbolReferences, findSearchBasedSymbolDefinitions } from './api';
6+
import { search } from '@/features/search';
7+
8+
vi.mock('@/features/search', () => ({
9+
search: vi.fn(),
10+
}));
11+
12+
vi.mock('@/middleware/withAuth', () => ({
13+
withOptionalAuth: (fn: any) => fn(),
14+
}));
15+
16+
vi.mock('@/middleware/sew', () => ({
17+
sew: (fn: any) => fn(),
18+
}));
19+
20+
const MOCK_SEARCH_RESPONSE = {
21+
stats: {
22+
actualMatchCount: 1,
23+
totalMatchCount: 1,
24+
duration: 100,
25+
fileCount: 1,
26+
filesSkipped: 0,
27+
contentBytesLoaded: 100,
28+
indexBytesLoaded: 100,
29+
crashes: 0,
30+
shardFilesConsidered: 1,
31+
filesConsidered: 1,
32+
filesLoaded: 1,
33+
shardsScanned: 1,
34+
shardsSkipped: 0,
35+
shardsSkippedFilter: 0,
36+
ngramMatches: 1,
37+
ngramLookups: 1,
38+
wait: 0,
39+
matchTreeConstruction: 10,
40+
matchTreeSearch: 90,
41+
regexpsConsidered: 0,
42+
flushReason: 'FLUSH_REASON_FINAL_FLUSH',
43+
},
44+
files: [
45+
{
46+
fileName: {
47+
text: 'src/index.ts',
48+
matchRanges: [],
49+
},
50+
repository: 'github.com/owner/repo',
51+
repositoryId: 123,
52+
webUrl: 'https://sourcebot.example.com/browse/github.com/owner/repo/blob/main/src/index.ts',
53+
language: 'TypeScript',
54+
ref: 'abcdef1234567890',
55+
chunks: [
56+
{
57+
content: 'const a = 1;',
58+
matchRanges: [
59+
{
60+
start: { byteOffset: 0, lineNumber: 1, column: 1 },
61+
end: { byteOffset: 12, lineNumber: 1, column: 13 },
62+
}
63+
],
64+
}
65+
],
66+
branches: ['main'],
67+
}
68+
],
69+
repositoryInfo: [],
70+
isSearchExhaustive: true,
71+
};
72+
73+
describe('CodeNav Search-Based APIs', () => {
74+
beforeEach(() => {
75+
vi.clearAllMocks();
76+
vi.mocked(search).mockResolvedValue(MOCK_SEARCH_RESPONSE as any);
77+
});
78+
79+
describe('findSearchBasedSymbolReferences', () => {
80+
it('includes the ref (commit SHA) in the returned file results', async () => {
81+
const result = await findSearchBasedSymbolReferences({
82+
symbolName: 'mySymbol',
83+
repoName: 'github.com/owner/repo',
84+
revisionName: 'HEAD',
85+
});
86+
87+
expect(isServiceError(result)).toBe(false);
88+
const response = result as any;
89+
expect(response.files).toHaveLength(1);
90+
expect(response.files[0].ref).toBe('abcdef1234567890');
91+
});
92+
});
93+
94+
describe('findSearchBasedSymbolDefinitions', () => {
95+
it('includes the ref (commit SHA) in the returned file results', async () => {
96+
const result = await findSearchBasedSymbolDefinitions({
97+
symbolName: 'mySymbol',
98+
repoName: 'github.com/owner/repo',
99+
revisionName: 'HEAD',
100+
});
101+
102+
expect(isServiceError(result)).toBe(false);
103+
const response = result as any;
104+
expect(response.files).toHaveLength(1);
105+
expect(response.files[0].ref).toBe('abcdef1234567890');
106+
});
107+
});
108+
});
109+
110+
/**
111+
* Type guard that checks whether a given object is a ServiceError by looking for an `errorCode` property.
112+
*
113+
* @param obj - The value to inspect.
114+
* @returns True if the object looks like a ServiceError.
115+
*/
116+
function isServiceError(obj: any): boolean {
117+
return obj && typeof obj === 'object' && 'errorCode' in obj;
118+
}

0 commit comments

Comments
 (0)