diff --git a/CHANGELOG.md b/CHANGELOG.md index d3887f16c..d2913a6c5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added `/api/commits/authors` to the public API to allow fetching a list of authors for a given path and revision. [#1150](https://github.com/sourcebot-dev/sourcebot/pull/1150) - Added optional `path` query parameter to the `/api/diff` endpoint and `get_diff` MCP tool to restrict diffs to changes touching a specific file. [#1154](https://github.com/sourcebot-dev/sourcebot/pull/1154) - Added collapsible file diffs in the commit diff panel. [#1157](https://github.com/sourcebot-dev/sourcebot/pull/1157) +- Added `/api/blame` to the public API to fetch per-line blame information for a file at a given revision. [#1158](https://github.com/sourcebot-dev/sourcebot/pull/1158) ### Fixed - Bumped `postcss` to `8.5.10`. [#1155](https://github.com/sourcebot-dev/sourcebot/pull/1155) diff --git a/docs/api-reference/sourcebot-public.openapi.json b/docs/api-reference/sourcebot-public.openapi.json index df27ed331..8a42e942a 100644 --- a/docs/api-reference/sourcebot-public.openapi.json +++ b/docs/api-reference/sourcebot-public.openapi.json @@ -587,6 +587,97 @@ "webUrl" ] }, + "PublicFileBlameResponse": { + "type": "object", + "properties": { + "ranges": { + "type": "array", + "items": { + "type": "object", + "properties": { + "hash": { + "type": "string", + "description": "The hash of the commit that last modified the lines in this range." + }, + "startLine": { + "type": "integer", + "minimum": 0, + "exclusiveMinimum": true, + "description": "The 1-based line number where the range begins (inclusive)." + }, + "lineCount": { + "type": "integer", + "minimum": 0, + "exclusiveMinimum": true, + "description": "The number of contiguous lines in this range." + } + }, + "required": [ + "hash", + "startLine", + "lineCount" + ] + }, + "description": "Contiguous, non-overlapping line ranges ordered by startLine. Each range is attributed to a single commit." + }, + "commits": { + "type": "object", + "additionalProperties": { + "type": "object", + "properties": { + "hash": { + "type": "string", + "description": "The full commit SHA." + }, + "date": { + "type": "string", + "description": "The commit date in ISO 8601 format." + }, + "message": { + "type": "string", + "description": "The commit subject line." + }, + "authorName": { + "type": "string" + }, + "authorEmail": { + "type": "string" + }, + "previous": { + "type": "object", + "properties": { + "hash": { + "type": "string", + "description": "The hash of the commit that previously affected these lines (i.e., the next step backwards in the blame walk)." + }, + "path": { + "type": "string", + "description": "The file path as it existed at the previous commit. May differ from the current path due to renames." + } + }, + "required": [ + "hash", + "path" + ], + "description": "Pointer to the previous commit that affected these lines, with the file path as it existed there. Absent when the commit introduced the lines (no earlier history to walk to)." + } + }, + "required": [ + "hash", + "date", + "message", + "authorName", + "authorEmail" + ] + }, + "description": "Commit metadata keyed by hash, deduplicated across ranges." + } + }, + "required": [ + "ranges", + "commits" + ] + }, "PublicGetTreeRequest": { "type": "object", "properties": { @@ -1476,6 +1567,90 @@ } } }, + "/api/blame": { + "get": { + "operationId": "getFileBlame", + "tags": [ + "Git" + ], + "summary": "Get file blame", + "description": "Returns blame information for a file at a given repository path and optional git ref.\n\nThe response is split into two parts:\n- `ranges`: contiguous, non-overlapping line ranges, each attributed to a single commit. Ordered by `startLine`.\n- `commits`: commit metadata (hash, date, message, author, optional `previous` pointer for walking back through history) keyed by hash, deduplicated across ranges.\n\nWhole-file renames are followed automatically. Cross-file line moves and copies are not.", + "parameters": [ + { + "schema": { + "type": "string", + "description": "The file path to blame, relative to the repository root." + }, + "required": true, + "description": "The file path to blame, relative to the repository root.", + "name": "path", + "in": "query" + }, + { + "schema": { + "type": "string", + "description": "The fully-qualified repository name." + }, + "required": true, + "description": "The fully-qualified repository name.", + "name": "repo", + "in": "query" + }, + { + "schema": { + "type": "string", + "description": "The git ref (branch, tag, or commit SHA) to blame at. Defaults to the repository's default branch." + }, + "required": false, + "description": "The git ref (branch, tag, or commit SHA) to blame at. Defaults to the repository's default branch.", + "name": "ref", + "in": "query" + } + ], + "responses": { + "200": { + "description": "Blame ranges and deduplicated commit metadata.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/PublicFileBlameResponse" + } + } + } + }, + "400": { + "description": "Invalid query parameters or git ref.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/PublicApiServiceError" + } + } + } + }, + "404": { + "description": "Repository or file not found.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/PublicApiServiceError" + } + } + } + }, + "500": { + "description": "Unexpected blame retrieval failure.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/PublicApiServiceError" + } + } + } + } + } + } + }, "/api/tree": { "post": { "operationId": "getFileTree", diff --git a/docs/docs.json b/docs/docs.json index 7dbe333ed..d70056533 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -177,6 +177,7 @@ "GET /api/commits", "GET /api/commits/authors", "GET /api/source", + "GET /api/blame", "POST /api/tree" ] }, diff --git a/docs/docs/configuration/audit-logs.mdx b/docs/docs/configuration/audit-logs.mdx index 4551b24c9..bd3308ee7 100644 --- a/docs/docs/configuration/audit-logs.mdx +++ b/docs/docs/configuration/audit-logs.mdx @@ -125,6 +125,7 @@ curl --request GET '$SOURCEBOT_URL/api/ee/audit' \ | `user.created_ask_chat` | `user` | `org` | | `user.creation_failed` | `user` | `user` | | `user.delete` | `user` | `user` | +| `user.fetched_file_blame` | `user` | `org` | | `user.fetched_file_source` | `user` | `org` | | `user.fetched_file_tree` | `user` | `org` | | `user.invite_accept_failed` | `user` | `invite` | diff --git a/packages/web/src/app/api/(server)/blame/route.ts b/packages/web/src/app/api/(server)/blame/route.ts new file mode 100644 index 000000000..a7deef2a7 --- /dev/null +++ b/packages/web/src/app/api/(server)/blame/route.ts @@ -0,0 +1,37 @@ +'use server'; + +import { getFileBlame } from '@/features/git'; +import { fileBlameRequestSchema } from '@/features/git/schemas'; +import { apiHandler } from "@/lib/apiHandler"; +import { queryParamsSchemaValidationError, serviceErrorResponse } from "@/lib/serviceError"; +import { isServiceError } from "@/lib/utils"; +import { NextRequest } from "next/server"; + +export const GET = apiHandler(async (request: NextRequest) => { + const rawParams = Object.fromEntries( + Object.keys(fileBlameRequestSchema.shape).map(key => [ + key, + request.nextUrl.searchParams.get(key) ?? undefined + ]) + ); + const parsed = fileBlameRequestSchema.safeParse(rawParams); + + if (!parsed.success) { + return serviceErrorResponse( + queryParamsSchemaValidationError(parsed.error) + ); + } + + const { repo, path, ref } = parsed.data; + const response = await getFileBlame({ + path, + repo, + ref, + }); + + if (isServiceError(response)) { + return serviceErrorResponse(response); + } + + return Response.json(response); +}); diff --git a/packages/web/src/features/git/getFileBlameApi.ts b/packages/web/src/features/git/getFileBlameApi.ts new file mode 100644 index 000000000..ae12a3a0d --- /dev/null +++ b/packages/web/src/features/git/getFileBlameApi.ts @@ -0,0 +1,190 @@ +import { sew } from "@/middleware/sew"; +import { getAuditService } from '@/ee/features/audit/factory'; +import { ServiceError, notFound, fileNotFound, invalidGitRef, unresolvedGitRef, unexpectedError } from '@/lib/serviceError'; +import { withOptionalAuth } from '@/middleware/withAuth'; +import { getRepoPath } from '@sourcebot/shared'; +import { headers } from 'next/headers'; +import simpleGit from 'simple-git'; +import type z from 'zod'; +import { isGitRefValid, isPathValid } from './utils'; +import { fileBlameRequestSchema, fileBlameResponseSchema } from './schemas'; + +export { fileBlameRequestSchema, fileBlameResponseSchema } from './schemas'; +export type FileBlameRequest = z.infer; +export type FileBlameResponse = z.infer; + +type CommitMeta = FileBlameResponse['commits'][string]; + +/** + * Parses `git blame --porcelain` output into ranges and deduplicated commit metadata. + * + * Format reference: each blamed line produces an entry of the form + * + * [] (4-field header → first line of a group) + * [author (metadata block, only on first + * author-mail <> appearance of a commit globally) + * author-time + * author-tz <+/-HHMM> + * committer ... + * summary + * previous (optional) + * filename ] + * \t + * + * Within a contiguous group of lines from the same commit, only the first line's + * header carries ``; subsequent lines have a 3-field header. We detect + * group boundaries via the presence of `` and emit one range per group. + */ +const parsePorcelainBlame = (output: string): FileBlameResponse => { + const ranges: FileBlameResponse['ranges'] = []; + const commits: Record = {}; + + if (output.length === 0) { + return { ranges, commits }; + } + + const rawLines = output.split('\n'); + let i = 0; + + while (i < rawLines.length) { + const headerLine = rawLines[i]; + if (headerLine.length === 0) { + i++; + continue; + } + + const headerParts = headerLine.split(' '); + const hash = headerParts[0]; + const finalLine = parseInt(headerParts[2], 10); + if (!hash || Number.isNaN(finalLine)) { + throw new Error(`Malformed git blame porcelain header: "${headerLine}"`); + } + + // Group-start headers carry a 4th field with the group size; intra-group + // continuation headers have only 3 fields and don't start a new range. + const isGroupStart = headerParts.length >= 4; + const lineCount = isGroupStart ? parseInt(headerParts[3], 10) : NaN; + if (isGroupStart && Number.isNaN(lineCount)) { + throw new Error(`Malformed git blame porcelain header (bad num-lines): "${headerLine}"`); + } + + i++; + + // Metadata lines may appear after any header but only the first time we + // see a given commit. Accumulate whatever's there until the "\t" + // sentinel; for continuation lines this loop usually exits immediately. + let authorName: string | undefined; + let authorMail: string | undefined; + let date: string | undefined; + let message: string | undefined; + let previous: CommitMeta['previous'] | undefined; + + while (i < rawLines.length && !rawLines[i].startsWith('\t')) { + const fieldLine = rawLines[i]; + const spaceIdx = fieldLine.indexOf(' '); + const key = spaceIdx === -1 ? fieldLine : fieldLine.substring(0, spaceIdx); + const value = spaceIdx === -1 ? '' : fieldLine.substring(spaceIdx + 1); + + if (key === 'author') { + authorName = value; + } else if (key === 'author-mail') { + authorMail = value.replace(/^<|>$/g, ''); + } else if (key === 'author-time') { + date = new Date(parseInt(value, 10) * 1000).toISOString(); + } else if (key === 'summary') { + message = value; + } else if (key === 'previous') { + // "previous " — path may contain spaces, so split once. + const sep = value.indexOf(' '); + if (sep !== -1) { + previous = { + hash: value.substring(0, sep), + path: value.substring(sep + 1), + }; + } + } + // committer*, filename, boundary are intentionally ignored. + + i++; + } + + // Skip the "\t" sentinel; the file source is fetched separately. + if (i < rawLines.length && rawLines[i].startsWith('\t')) { + i++; + } + + if (!commits[hash] && authorName !== undefined && authorMail !== undefined && date !== undefined && message !== undefined) { + commits[hash] = { + hash, + date, + message, + authorName, + authorEmail: authorMail, + ...(previous ? { previous } : {}), + }; + } + + if (isGroupStart) { + ranges.push({ hash, startLine: finalLine, lineCount }); + } + } + + return { ranges, commits }; +}; + +export const getFileBlame = async ({ path: filePath, repo: repoName, ref }: FileBlameRequest, { source }: { source?: string } = {}): Promise => + sew(() => + withOptionalAuth(async ({ org, prisma, user }) => { + if (user) { + const resolvedSource = source ?? (await headers()).get('X-Sourcebot-Client-Source') ?? undefined; + await getAuditService().createAudit({ + action: 'user.fetched_file_blame', + actor: { id: user.id, type: 'user' }, + target: { id: org.id.toString(), type: 'org' }, + orgId: org.id, + metadata: { source: resolvedSource }, + }); + } + + const repo = await prisma.repo.findFirst({ + where: { name: repoName, orgId: org.id }, + }); + if (!repo) { + return notFound(`Repository "${repoName}" not found.`); + } + + if (!isPathValid(filePath)) { + return fileNotFound(filePath, repoName); + } + + if (ref !== undefined && !isGitRefValid(ref)) { + return invalidGitRef(ref); + } + + const { path: repoPath } = getRepoPath(repo); + const git = simpleGit().cwd(repoPath); + + const gitRef = ref ?? repo.defaultBranch ?? 'HEAD'; + + let porcelain: string; + try { + porcelain = await git.raw(['blame', '--porcelain', gitRef, '--', filePath]); + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : String(error); + if (errorMessage.includes('no such path') || errorMessage.includes('does not exist') || errorMessage.includes('fatal: path') || errorMessage.includes('no such file')) { + return fileNotFound(filePath, repoName); + } + if (errorMessage.includes('unknown revision') || errorMessage.includes('bad revision') || errorMessage.includes('invalid object name')) { + return unresolvedGitRef(gitRef); + } + return unexpectedError(errorMessage); + } + + try { + return parsePorcelainBlame(porcelain) satisfies FileBlameResponse; + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : String(error); + return unexpectedError(`Failed to parse git blame output: ${errorMessage}`); + } + }) + ); diff --git a/packages/web/src/features/git/index.ts b/packages/web/src/features/git/index.ts index 19d5b7f2f..896058701 100644 --- a/packages/web/src/features/git/index.ts +++ b/packages/web/src/features/git/index.ts @@ -4,6 +4,7 @@ export * from './getFilesApi'; export * from './getFolderContentsApi'; export * from './getTreeApi'; export * from './getFileSourceApi'; +export * from './getFileBlameApi'; export * from './listCommitsApi'; export * from './listCommitAuthorsApi'; export * from './getPathTypeApi'; diff --git a/packages/web/src/features/git/schemas.ts b/packages/web/src/features/git/schemas.ts index bae52e8ab..de1ed9ec3 100644 --- a/packages/web/src/features/git/schemas.ts +++ b/packages/web/src/features/git/schemas.ts @@ -110,3 +110,34 @@ export const commitAuthorSchema = z.object({ email: z.string(), commitCount: z.number().int().nonnegative(), }); + +export const fileBlameRequestSchema = z.object({ + path: z.string().describe('The file path to blame, relative to the repository root.'), + repo: z.string().describe('The fully-qualified repository name.'), + ref: z.string().optional().describe('The git ref (branch, tag, or commit SHA) to blame at. Defaults to the repository\'s default branch.'), +}); + +export const blameRangeSchema = z.object({ + hash: z.string().describe('The hash of the commit that last modified the lines in this range.'), + startLine: z.number().int().positive().describe('The 1-based line number where the range begins (inclusive).'), + lineCount: z.number().int().positive().describe('The number of contiguous lines in this range.'), +}); + +export const blamePreviousSchema = z.object({ + hash: z.string().describe('The hash of the commit that previously affected these lines (i.e., the next step backwards in the blame walk).'), + path: z.string().describe('The file path as it existed at the previous commit. May differ from the current path due to renames.'), +}); + +export const blameCommitSchema = z.object({ + hash: z.string().describe('The full commit SHA.'), + date: z.string().describe('The commit date in ISO 8601 format.'), + message: z.string().describe('The commit subject line.'), + authorName: z.string(), + authorEmail: z.string(), + previous: blamePreviousSchema.optional().describe('Pointer to the previous commit that affected these lines, with the file path as it existed there. Absent when the commit introduced the lines (no earlier history to walk to).'), +}); + +export const fileBlameResponseSchema = z.object({ + ranges: z.array(blameRangeSchema).describe('Contiguous, non-overlapping line ranges ordered by startLine. Each range is attributed to a single commit.'), + commits: z.record(z.string(), blameCommitSchema).describe('Commit metadata keyed by hash, deduplicated across ranges.'), +}); diff --git a/packages/web/src/openapi/publicApiDocument.ts b/packages/web/src/openapi/publicApiDocument.ts index 61b2c1c2c..fbfaf42fc 100644 --- a/packages/web/src/openapi/publicApiDocument.ts +++ b/packages/web/src/openapi/publicApiDocument.ts @@ -8,6 +8,8 @@ import { publicEeDeleteUserResponseSchema, publicEeUserSchema, publicEeUsersResponseSchema, + publicFileBlameRequestSchema, + publicFileBlameResponseSchema, publicFileSourceRequestSchema, publicFileSourceResponseSchema, publicFindSymbolsRequestSchema, @@ -220,6 +222,35 @@ export function createPublicOpenApiDocument(version: string) { }, }); + registry.registerPath({ + method: 'get', + path: '/api/blame', + operationId: 'getFileBlame', + tags: [gitTag.name], + summary: 'Get file blame', + description: dedent` + Returns blame information for a file at a given repository path and optional git ref. + + The response is split into two parts: + - \`ranges\`: contiguous, non-overlapping line ranges, each attributed to a single commit. Ordered by \`startLine\`. + - \`commits\`: commit metadata (hash, date, message, author, optional \`previous\` pointer for walking back through history) keyed by hash, deduplicated across ranges. + + Whole-file renames are followed automatically. Cross-file line moves and copies are not. + `, + request: { + query: publicFileBlameRequestSchema, + }, + responses: { + 200: { + description: 'Blame ranges and deduplicated commit metadata.', + content: jsonContent(publicFileBlameResponseSchema), + }, + 400: errorJson('Invalid query parameters or git ref.'), + 404: errorJson('Repository or file not found.'), + 500: errorJson('Unexpected blame retrieval failure.'), + }, + }); + registry.registerPath({ method: 'post', path: '/api/tree', diff --git a/packages/web/src/openapi/publicApiSchemas.ts b/packages/web/src/openapi/publicApiSchemas.ts index daea50e39..b55c3f3cb 100644 --- a/packages/web/src/openapi/publicApiSchemas.ts +++ b/packages/web/src/openapi/publicApiSchemas.ts @@ -8,6 +8,8 @@ import { commitAuthorSchema, commitDetailSchema, commitSchema, + fileBlameRequestSchema, + fileBlameResponseSchema, fileSourceRequestSchema, fileSourceResponseSchema, getCommitQueryParamsSchema, @@ -40,6 +42,8 @@ export const publicSearchResponseSchema = searchResponseSchema.openapi('PublicSe export const publicGetTreeRequestSchema = getTreeRequestSchema.openapi('PublicGetTreeRequest'); export const publicFileSourceRequestSchema = fileSourceRequestSchema.openapi('PublicFileSourceRequest'); export const publicFileSourceResponseSchema = fileSourceResponseSchema.openapi('PublicFileSourceResponse'); +export const publicFileBlameRequestSchema = fileBlameRequestSchema.openapi('PublicFileBlameRequest'); +export const publicFileBlameResponseSchema = fileBlameResponseSchema.openapi('PublicFileBlameResponse'); export const publicVersionResponseSchema = getVersionResponseSchema.openapi('PublicVersionResponse'); export const publicListReposQueryParamsSchema = listReposQueryParamsSchema.openapi('PublicListReposQuery'); export const publicListReposResponseSchema = listReposResponseSchema.openapi('PublicListReposResponse');