Skip to content

Commit bd3c915

Browse files
chore(web): use git show for fetching file contents (#829)
1 parent 0bd1c3a commit bd3c915

File tree

12 files changed

+135
-70
lines changed

12 files changed

+135
-70
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
### Changed
11+
- Changed `/api/source` api to support fetching source code for any revision, not just revisions that are indexed by zoekt. [#829](https://github.com/sourcebot-dev/sourcebot/pull/829)
12+
1013
## [4.10.20] - 2026-01-28
1114

1215
### Fixed

packages/backend/src/github.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ export type OctokitRepository = {
2828
stargazers_count?: number,
2929
watchers_count?: number,
3030
subscribers_count?: number,
31+
default_branch?: string,
3132
forks_count?: number,
3233
archived?: boolean,
3334
topics?: string[],

packages/backend/src/repoCompileUtils.test.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ vi.mock('./git.js', () => ({
66
isPathAValidGitRepoRoot: vi.fn(),
77
getOriginUrl: vi.fn(),
88
isUrlAValidGitRepo: vi.fn(),
9+
getLocalDefaultBranch: vi.fn(),
910
}));
1011

1112
// Mock the glob module

packages/backend/src/repoCompileUtils.ts

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ import { BitbucketConnectionConfig, GerritConnectionConfig, GiteaConnectionConfi
1515
import { ProjectVisibility } from "azure-devops-node-api/interfaces/CoreInterfaces.js";
1616
import path from 'path';
1717
import { glob } from 'glob';
18-
import { getOriginUrl, isPathAValidGitRepoRoot, isUrlAValidGitRepo } from './git.js';
18+
import { getLocalDefaultBranch, getOriginUrl, isPathAValidGitRepoRoot, isUrlAValidGitRepo } from './git.js';
1919
import assert from 'assert';
2020
import GitUrlParse from 'git-url-parse';
2121
import { RepoMetadata } from '@sourcebot/shared';
@@ -118,6 +118,7 @@ export const createGitHubRepoRecord = ({
118118
cloneUrl: cloneUrl.toString(),
119119
webUrl: repo.html_url,
120120
name: repoName,
121+
defaultBranch: repo.default_branch,
121122
displayName: repoDisplayName,
122123
imageUrl: repo.owner.avatar_url,
123124
isFork: repo.fork,
@@ -185,6 +186,7 @@ export const compileGitlabConfig = async (
185186
cloneUrl: cloneUrl.toString(),
186187
webUrl: projectUrl,
187188
name: repoName,
189+
defaultBranch: project.default_branch,
188190
displayName: repoDisplayName,
189191
imageUrl: avatarUrl,
190192
isFork: isFork,
@@ -257,6 +259,7 @@ export const compileGiteaConfig = async (
257259
webUrl: repo.html_url,
258260
name: repoName,
259261
displayName: repoDisplayName,
262+
defaultBranch: repo.default_branch,
260263
imageUrl: repo.owner?.avatar_url,
261264
isFork: repo.fork!,
262265
isPublic: isPublic,
@@ -339,6 +342,10 @@ export const compileGerritConfig = async (
339342
webUrl: webUrl,
340343
name: repoName,
341344
displayName: repoDisplayName,
345+
// @note: the gerrit api doesn't return the default branch (without a seperate query).
346+
// Instead, the default branch will be set once the repo is cloned.
347+
// @see: repoIndexManager.ts
348+
defaultBranch: undefined,
342349
isFork: false,
343350
isArchived: false,
344351
org: {
@@ -444,6 +451,7 @@ export const compileBitbucketConfig = async (
444451
const repoName = path.join(repoNameRoot, displayName);
445452
const cloneUrl = getCloneUrl(repo);
446453
const webUrl = getWebUrl(repo);
454+
const defaultBranch = isServer ? (repo as BitbucketServerRepository).defaultBranch : (repo as BitbucketCloudRepository).mainbranch?.name;
447455

448456
const record: RepoData = {
449457
external_id: externalId,
@@ -453,6 +461,7 @@ export const compileBitbucketConfig = async (
453461
webUrl: webUrl,
454462
name: repoName,
455463
displayName: displayName,
464+
defaultBranch,
456465
isFork: isFork,
457466
isPublic: isPublic,
458467
isArchived: isArchived,
@@ -557,6 +566,8 @@ export const compileGenericGitHostConfig_file = async (
557566

558567
const remoteUrl = GitUrlParse(origin);
559568

569+
const defaultBranch = await getLocalDefaultBranch({ path: repoPath });
570+
560571
// @note: matches the naming here:
561572
// https://github.com/sourcebot-dev/zoekt/blob/main/gitindex/index.go#L293
562573
// Go's url.URL.Host includes the port if present (even default ports like 443),
@@ -573,6 +584,7 @@ export const compileGenericGitHostConfig_file = async (
573584
cloneUrl: `file://${repoPath}`,
574585
name: repoName,
575586
displayName: repoName,
587+
defaultBranch,
576588
isFork: false,
577589
isArchived: false,
578590
org: {
@@ -612,7 +624,6 @@ export const compileGenericGitHostConfig_file = async (
612624
}
613625
}
614626

615-
616627
export const compileGenericGitHostConfig_url = async (
617628
config: GenericGitHostConnectionConfig,
618629
connectionId: number,
@@ -645,6 +656,10 @@ export const compileGenericGitHostConfig_url = async (
645656
cloneUrl: remoteUrl.toString(),
646657
name: repoName,
647658
displayName: repoName,
659+
// @note: we can't determine the default branch from the remote url.
660+
// Instead, the default branch will be set once the repo is cloned.
661+
// @see: repoIndexManager.ts
662+
defaultBranch: undefined,
648663
isFork: false,
649664
isArchived: false,
650665
org: {
@@ -719,6 +734,7 @@ export const compileAzureDevOpsConfig = async (
719734
webUrl: webUrl,
720735
name: repoName,
721736
displayName: repoDisplayName,
737+
defaultBranch: repo.defaultBranch,
722738
imageUrl: null,
723739
isFork: !!repo.isFork,
724740
isArchived: false,

packages/backend/src/repoIndexManager.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -498,6 +498,7 @@ export class RepoIndexManager {
498498
});
499499

500500
const pushedAt = await getLatestCommitTimestamp({ path: repoPath });
501+
const defaultBranch = await getLocalDefaultBranch({ path: repoPath });
501502

502503
const jobMetadata = repoIndexingJobMetadataSchema.parse(jobData.metadata);
503504

@@ -511,6 +512,13 @@ export class RepoIndexManager {
511512
...(jobData.repo.metadata as RepoMetadata),
512513
indexedRevisions: jobMetadata.indexedRevisions,
513514
} satisfies RepoMetadata,
515+
// @note: always update the default branch. While this field can be set
516+
// during connection syncing, by setting it here we ensure that a) the
517+
// default branch is as up to date as possible (since repo indexing happens
518+
// more frequently than connection syncing) and b) for hosts where it is
519+
// impossible to determine the default branch from the host's API
520+
// (e.g., generic git url), we still set the default branch here.
521+
defaultBranch: defaultBranch,
514522
}
515523
});
516524

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
-- AlterTable
2+
ALTER TABLE "Repo" ADD COLUMN "defaultBranch" TEXT;

packages/db/prisma/schema.prisma

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ model Repo {
5959
webUrl String?
6060
connections RepoToConnection[]
6161
imageUrl String?
62+
defaultBranch String?
6263
6364
permittedAccounts AccountToRepoPermission[]
6465
permissionSyncJobs RepoPermissionSyncJob[]

packages/web/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,7 @@
145145
"input-otp": "^1.4.2",
146146
"langfuse": "^3.38.4",
147147
"langfuse-vercel": "^3.38.4",
148+
"linguist-languages": "^9.3.1",
148149
"lucide-react": "^0.517.0",
149150
"micromatch": "^4.0.8",
150151
"next": "15.5.9",
Lines changed: 52 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,85 +1,70 @@
11
import 'server-only';
2-
import { fileNotFound, ServiceError, unexpectedError } from "../../lib/serviceError";
2+
import { fileNotFound, notFound, ServiceError, unexpectedError } from "../../lib/serviceError";
33
import { FileSourceRequest, FileSourceResponse } from "./types";
4-
import { isServiceError } from "../../lib/utils";
5-
import { search } from "./searchApi";
64
import { sew } from "@/actions";
75
import { withOptionalAuthV2 } from "@/withAuthV2";
8-
import { QueryIR } from './ir';
9-
import escapeStringRegexp from "escape-string-regexp";
6+
import { getRepoPath } from '@sourcebot/shared';
7+
import { simpleGit } from 'simple-git';
8+
import { detectLanguageFromFilename } from "@/lib/languageDetection";
9+
import { getBrowsePath } from "@/app/[domain]/browse/hooks/utils";
10+
import { getCodeHostBrowseFileAtBranchUrl } from "@/lib/utils";
11+
import { SINGLE_TENANT_ORG_DOMAIN } from "@/lib/constants";
1012

11-
// @todo (bkellam) #574 : We should really be using `git show <hash>:<path>` to fetch file contents here.
12-
// This will allow us to support permalinks to files at a specific revision that may not be indexed
13-
// by zoekt. We should also refactor this out of the /search folder.
14-
15-
export const getFileSource = async ({ path, repo, ref }: FileSourceRequest): Promise<FileSourceResponse | ServiceError> => sew(() =>
16-
withOptionalAuthV2(async () => {
17-
const query: QueryIR = {
18-
and: {
19-
children: [
20-
{
21-
repo: {
22-
regexp: `^${escapeStringRegexp(repo)}$`,
23-
},
24-
},
25-
{
26-
substring: {
27-
pattern: path,
28-
case_sensitive: true,
29-
file_name: true,
30-
content: false,
31-
}
32-
},
33-
...(ref ? [{
34-
branch: {
35-
pattern: ref,
36-
exact: true,
37-
},
38-
}]: [])
39-
]
40-
}
41-
}
42-
43-
const searchResponse = await search({
44-
queryType: 'ir',
45-
query,
46-
options: {
47-
matches: 1,
48-
whole: true,
49-
}
13+
export const getFileSource = async ({ path: filePath, repo: repoName, ref }: FileSourceRequest): Promise<FileSourceResponse | ServiceError> => sew(() =>
14+
withOptionalAuthV2(async ({ org, prisma }) => {
15+
const repo = await prisma.repo.findFirst({
16+
where: { name: repoName, orgId: org.id },
5017
});
51-
52-
if (isServiceError(searchResponse)) {
53-
return searchResponse;
18+
if (!repo) {
19+
return notFound(`Repository "${repoName}" not found.`);
5420
}
5521

56-
const files = searchResponse.files;
57-
58-
if (!files || files.length === 0) {
59-
return fileNotFound(path, repo);
60-
}
22+
const { path: repoPath } = getRepoPath(repo);
23+
const git = simpleGit().cwd(repoPath);
6124

62-
const file = files[0];
63-
const source = file.content ?? '';
64-
const language = file.language;
25+
const gitRef = ref ??
26+
repo.defaultBranch ??
27+
'HEAD';
6528

66-
const repoInfo = searchResponse.repositoryInfo.find((repo) => repo.id === file.repositoryId);
67-
if (!repoInfo) {
68-
// This should never happen.
69-
return unexpectedError("Repository info not found");
29+
let source: string;
30+
try {
31+
source = await git.raw(['show', `${gitRef}:${filePath}`]);
32+
} catch (error: unknown) {
33+
const errorMessage = error instanceof Error ? error.message : String(error);
34+
if (errorMessage.includes('does not exist') || errorMessage.includes('fatal: path')) {
35+
return fileNotFound(filePath, repoName);
36+
}
37+
if (errorMessage.includes('unknown revision') || errorMessage.includes('bad revision') || errorMessage.includes('invalid object name')) {
38+
return unexpectedError(`Invalid git reference: ${gitRef}`);
39+
}
40+
throw error;
7041
}
7142

43+
const language = detectLanguageFromFilename(filePath);
44+
const webUrl = getBrowsePath({
45+
repoName: repo.name,
46+
revisionName: ref,
47+
path: filePath,
48+
pathType: 'blob',
49+
domain: SINGLE_TENANT_ORG_DOMAIN,
50+
});
51+
const externalWebUrl = getCodeHostBrowseFileAtBranchUrl({
52+
webUrl: repo.webUrl,
53+
codeHostType: repo.external_codeHostType,
54+
branchName: gitRef,
55+
filePath,
56+
});
57+
7258
return {
7359
source,
7460
language,
75-
path,
76-
repo,
77-
repoCodeHostType: repoInfo.codeHostType,
78-
repoDisplayName: repoInfo.displayName,
79-
repoExternalWebUrl: repoInfo.webUrl,
61+
path: filePath,
62+
repo: repoName,
63+
repoCodeHostType: repo.external_codeHostType,
64+
repoDisplayName: repo.displayName ?? undefined,
65+
repoExternalWebUrl: repo.webUrl ?? undefined,
8066
branch: ref,
81-
webUrl: file.webUrl,
82-
externalWebUrl: file.externalWebUrl,
67+
webUrl,
68+
externalWebUrl,
8369
} satisfies FileSourceResponse;
84-
8570
}));
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import * as linguistLanguages from 'linguist-languages';
2+
import path from 'path';
3+
4+
const extensionToLanguage = new Map<string, string>();
5+
6+
for (const [languageName, languageData] of Object.entries(linguistLanguages)) {
7+
if ('extensions' in languageData && languageData.extensions) {
8+
for (const ext of languageData.extensions) {
9+
const normalizedExt = ext.toLowerCase();
10+
if (!extensionToLanguage.has(normalizedExt)) {
11+
extensionToLanguage.set(normalizedExt, languageName);
12+
}
13+
}
14+
}
15+
if ('filenames' in languageData && languageData.filenames) {
16+
for (const filename of languageData.filenames) {
17+
if (!extensionToLanguage.has(filename)) {
18+
extensionToLanguage.set(filename, languageName);
19+
}
20+
}
21+
}
22+
}
23+
24+
export const detectLanguageFromFilename = (filename: string): string => {
25+
const basename = path.basename(filename);
26+
27+
// Check for exact filename match (e.g., Makefile, Dockerfile)
28+
if (extensionToLanguage.has(basename)) {
29+
return extensionToLanguage.get(basename)!;
30+
}
31+
32+
// Check for extension match
33+
const ext = path.extname(filename).toLowerCase();
34+
if (ext && extensionToLanguage.has(ext)) {
35+
return extensionToLanguage.get(ext)!;
36+
}
37+
38+
return '';
39+
};

0 commit comments

Comments
 (0)