Skip to content

Commit f7bcc98

Browse files
committed
feat(analysis): add PR metadata ingestion and workflow style detection
- Implement pull request fetching and storage in new `pull_requests` table - Add PR parsing utilities for issue linking, checklists, and template markers - Compute merge method distribution (merge/squash/rebase) from commit parent analysis - Add artifact traceability signals to distinguish orchestrator vs conductor workflows - Display workflow style badge and metrics in analysis UI (PR coverage, issue linking, structured PRs)
1 parent aee3728 commit f7bcc98

15 files changed

Lines changed: 2680 additions & 63 deletions

apps/web/src/app/analysis/[jobId]/AnalysisClient.tsx

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1212,6 +1212,84 @@ export default function AnalysisClient({ jobId }: { jobId: string }) {
12121212
</div>
12131213
</div>
12141214

1215+
{/* Artifact Traceability Section */}
1216+
{wrapped.artifact_traceability ? (
1217+
<div className="mt-6 rounded-2xl border border-black/5 bg-white/60 p-5 backdrop-blur">
1218+
<div className="flex flex-col gap-2 sm:flex-row sm:items-center sm:justify-between">
1219+
<p className="text-xs font-semibold uppercase tracking-[0.25em] text-zinc-500">
1220+
Workflow Style
1221+
</p>
1222+
<span
1223+
className={`inline-flex items-center rounded-full px-2.5 py-0.5 text-xs font-medium ${
1224+
wrapped.artifact_traceability.workflow_style === "orchestrator"
1225+
? "bg-cyan-100 text-cyan-800"
1226+
: wrapped.artifact_traceability.workflow_style === "conductor"
1227+
? "bg-amber-100 text-amber-800"
1228+
: "bg-violet-100 text-violet-800"
1229+
}`}
1230+
>
1231+
{wrapped.artifact_traceability.workflow_style === "orchestrator"
1232+
? "Orchestrator"
1233+
: wrapped.artifact_traceability.workflow_style === "conductor"
1234+
? "Conductor"
1235+
: "Hybrid"}
1236+
</span>
1237+
</div>
1238+
<p className="mt-2 text-sm text-zinc-600">
1239+
{wrapped.artifact_traceability.workflow_style === "orchestrator"
1240+
? "Durable git trail with PRs, issue links, and structured collaboration — typical of autonomous agent workflows."
1241+
: wrapped.artifact_traceability.workflow_style === "conductor"
1242+
? "More ephemeral, IDE-chat style workflow with fewer artifacts — typical of interactive AI pair programming."
1243+
: "Mix of orchestrator and conductor patterns — balancing structured PRs with interactive development."}
1244+
</p>
1245+
<div className="mt-4 grid gap-3 sm:grid-cols-2 lg:grid-cols-4">
1246+
{wrapped.artifact_traceability.pr_coverage_rate !== null ? (
1247+
<div className="text-center">
1248+
<p className="text-xs font-semibold uppercase tracking-[0.2em] text-zinc-400">PR Coverage</p>
1249+
<p className="mt-1 text-xl font-semibold text-zinc-900">
1250+
{Math.round(wrapped.artifact_traceability.pr_coverage_rate * 100)}%
1251+
</p>
1252+
</div>
1253+
) : null}
1254+
{wrapped.artifact_traceability.issue_link_rate !== null ? (
1255+
<div className="text-center">
1256+
<p className="text-xs font-semibold uppercase tracking-[0.2em] text-zinc-400">Issue Linking</p>
1257+
<p className="mt-1 text-xl font-semibold text-zinc-900">
1258+
{Math.round(wrapped.artifact_traceability.issue_link_rate * 100)}%
1259+
</p>
1260+
</div>
1261+
) : null}
1262+
{wrapped.artifact_traceability.structured_pr_rate !== null ? (
1263+
<div className="text-center">
1264+
<p className="text-xs font-semibold uppercase tracking-[0.2em] text-zinc-400">Structured PRs</p>
1265+
<p className="mt-1 text-xl font-semibold text-zinc-900">
1266+
{Math.round(wrapped.artifact_traceability.structured_pr_rate * 100)}%
1267+
</p>
1268+
</div>
1269+
) : null}
1270+
{wrapped.artifact_traceability.dominant_merge_method ? (
1271+
<div className="text-center">
1272+
<p className="text-xs font-semibold uppercase tracking-[0.2em] text-zinc-400">Merge Style</p>
1273+
<p className="mt-1 text-xl font-semibold capitalize text-zinc-900">
1274+
{wrapped.artifact_traceability.dominant_merge_method}
1275+
</p>
1276+
</div>
1277+
) : null}
1278+
</div>
1279+
{wrapped.artifact_traceability.scores ? (
1280+
<div className="mt-4 flex items-center justify-center gap-4 text-xs text-zinc-500">
1281+
<span>
1282+
Orchestrator: <span className="font-semibold text-cyan-700">{wrapped.artifact_traceability.scores.orchestrator_score}</span>
1283+
</span>
1284+
<span>vs</span>
1285+
<span>
1286+
Conductor: <span className="font-semibold text-amber-700">{wrapped.artifact_traceability.scores.conductor_score}</span>
1287+
</span>
1288+
</div>
1289+
) : null}
1290+
</div>
1291+
) : null}
1292+
12151293
<div className="mt-6 rounded-2xl border border-black/5 bg-white/60 p-5 backdrop-blur">
12161294
<div className="flex flex-col gap-2 sm:flex-row sm:items-center sm:justify-between">
12171295
<p className="text-xs font-semibold uppercase tracking-[0.25em] text-zinc-500">Narrative</p>

apps/web/src/inngest/functions/analyze-repo.ts

Lines changed: 267 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,31 @@ interface GithubCompareResponse {
370370
files?: Array<{ filename: string }>;
371371
}
372372

373+
interface GithubPullListItem {
374+
number: number;
375+
title: string;
376+
body: string | null;
377+
state: "open" | "closed";
378+
created_at: string;
379+
updated_at: string;
380+
closed_at: string | null;
381+
merged_at: string | null;
382+
user: { login: string } | null;
383+
base: { ref: string };
384+
head: { ref: string; sha: string };
385+
}
386+
387+
interface GithubPullDetail extends GithubPullListItem {
388+
merged: boolean;
389+
merge_commit_sha: string | null;
390+
changed_files: number;
391+
additions: number;
392+
deletions: number;
393+
commits: number;
394+
comments: number;
395+
review_comments: number;
396+
}
397+
373398
function sleep(ms: number): Promise<void> {
374399
return new Promise((resolve) => setTimeout(resolve, ms));
375400
}
@@ -424,6 +449,79 @@ async function githubFetch<T>(url: string, token: string): Promise<T> {
424449
throw new Error("GitHub API error: exhausted retries");
425450
}
426451

452+
function extractLinkedIssueNumbers(text: string | null): number[] {
453+
if (!text) return [];
454+
const matches = text.matchAll(/\b(?:fixes|closes|resolves)\s+#(\d+)\b/gi);
455+
const out: number[] = [];
456+
for (const m of matches) {
457+
const n = Number.parseInt(m[1] ?? "", 10);
458+
if (Number.isFinite(n)) out.push(n);
459+
}
460+
return Array.from(new Set(out)).slice(0, 20);
461+
}
462+
463+
function hasChecklist(text: string | null): boolean {
464+
if (!text) return false;
465+
return /(^|\n)\s*-\s*\[[ xX]\]\s+/m.test(text);
466+
}
467+
468+
function hasTemplateMarkers(text: string | null): boolean {
469+
if (!text) return false;
470+
if (/(^|\n)\s*<!--/m.test(text)) return true;
471+
return /(^|\n)\s*#{2,3}\s+(description|changes|testing|checklist|context|motivation)\b/im.test(
472+
text
473+
);
474+
}
475+
476+
async function fetchPullRequests(params: {
477+
owner: string;
478+
repo: string;
479+
token: string;
480+
maxPullRequests: number;
481+
updatedAfter: string | null;
482+
}): Promise<GithubPullListItem[]> {
483+
const items: GithubPullListItem[] = [];
484+
const updatedAfterMs = params.updatedAfter ? new Date(params.updatedAfter).getTime() : null;
485+
let page = 1;
486+
487+
while (items.length < params.maxPullRequests) {
488+
const url = new URL(`https://api.github.com/repos/${params.owner}/${params.repo}/pulls`);
489+
url.searchParams.set("state", "all");
490+
url.searchParams.set("sort", "updated");
491+
url.searchParams.set("direction", "desc");
492+
url.searchParams.set("per_page", "100");
493+
url.searchParams.set("page", String(page));
494+
495+
const batch = await githubFetch<GithubPullListItem[]>(url.toString(), params.token);
496+
if (batch.length === 0) break;
497+
498+
for (const pr of batch) {
499+
const prUpdatedMs = new Date(pr.updated_at).getTime();
500+
if (updatedAfterMs !== null && Number.isFinite(prUpdatedMs) && prUpdatedMs <= updatedAfterMs) {
501+
return items.slice(0, params.maxPullRequests);
502+
}
503+
items.push(pr);
504+
if (items.length >= params.maxPullRequests) break;
505+
}
506+
507+
if (batch.length < 100) break;
508+
page += 1;
509+
if (page > 10) break;
510+
}
511+
512+
return items.slice(0, params.maxPullRequests);
513+
}
514+
515+
async function fetchPullRequestDetail(params: {
516+
owner: string;
517+
repo: string;
518+
token: string;
519+
number: number;
520+
}): Promise<GithubPullDetail> {
521+
const url = `https://api.github.com/repos/${params.owner}/${params.repo}/pulls/${params.number}`;
522+
return githubFetch<GithubPullDetail>(url, params.token);
523+
}
524+
427525
/**
428526
* Fetch commit list (paginated, most recent)
429527
*/
@@ -606,7 +704,7 @@ export const analyzeRepo = inngest.createFunction(
606704

607705
const { data: repo, error: repoError } = await supabase
608706
.from("repos")
609-
.select("id, owner, name, full_name")
707+
.select("id, owner, name, full_name, last_pr_sync_at")
610708
.eq("id", repoId)
611709
.single();
612710

@@ -640,6 +738,171 @@ export const analyzeRepo = inngest.createFunction(
640738
return { job, repo, githubToken };
641739
});
642740

741+
const pullRequestSignals = await step.run("sync-pull-requests", async () => {
742+
try {
743+
const prs = await fetchPullRequests({
744+
owner: repo.owner,
745+
repo: repo.name,
746+
token: githubToken,
747+
maxPullRequests: 200,
748+
updatedAfter: (repo as { last_pr_sync_at?: string | null }).last_pr_sync_at ?? null,
749+
});
750+
751+
if (prs.length === 0) {
752+
return {
753+
total: 0,
754+
merged: 0,
755+
merge_methods: { merge: 0, squash: 0, rebase: 0, unknown: 0 },
756+
checklist_rate: null,
757+
template_rate: null,
758+
linked_issue_rate: null,
759+
evidence_shas: [] as string[],
760+
};
761+
}
762+
763+
const detailed: GithubPullDetail[] = [];
764+
const detailLimit = Math.min(50, prs.length);
765+
const batchSize = 5;
766+
for (let i = 0; i < detailLimit; i += batchSize) {
767+
const batch = prs.slice(i, i + batchSize);
768+
const details = await Promise.all(
769+
batch.map((pr) =>
770+
fetchPullRequestDetail({
771+
owner: repo.owner,
772+
repo: repo.name,
773+
token: githubToken,
774+
number: pr.number,
775+
})
776+
)
777+
);
778+
detailed.push(...details);
779+
}
780+
781+
const mergeMethodCounts = { merge: 0, squash: 0, rebase: 0, unknown: 0 };
782+
const evidenceShas: string[] = [];
783+
784+
const mergedWithCommitSha = detailed.filter((pr) => pr.merged && pr.merge_commit_sha);
785+
const mergeCommitDetails: Map<string, GithubCommitDetail> = new Map();
786+
const mergeCommitBatchSize = 5;
787+
for (let i = 0; i < mergedWithCommitSha.length; i += mergeCommitBatchSize) {
788+
const batch = mergedWithCommitSha.slice(i, i + mergeCommitBatchSize);
789+
const results = await Promise.all(
790+
batch.map(async (pr) => {
791+
const sha = pr.merge_commit_sha;
792+
if (!sha) return null;
793+
try {
794+
const detail = await fetchCommitDetail({
795+
owner: repo.owner,
796+
repo: repo.name,
797+
sha,
798+
token: githubToken,
799+
});
800+
return { sha, detail };
801+
} catch {
802+
return null;
803+
}
804+
})
805+
);
806+
for (const r of results) {
807+
if (!r) continue;
808+
mergeCommitDetails.set(r.sha, r.detail);
809+
}
810+
}
811+
812+
const rows = detailed.map((pr) => {
813+
const linkedIssues = extractLinkedIssueNumbers(pr.body);
814+
const checklist = hasChecklist(pr.body);
815+
const templateMarkers = hasTemplateMarkers(pr.body);
816+
817+
let mergeMethod: string | null = null;
818+
if (pr.merged && pr.merge_commit_sha) {
819+
const commit = mergeCommitDetails.get(pr.merge_commit_sha);
820+
const parentCount = commit?.parents?.length ?? 0;
821+
if (parentCount >= 2) mergeMethod = "merge";
822+
else if (pr.merge_commit_sha === pr.head.sha) mergeMethod = "rebase";
823+
else mergeMethod = "squash";
824+
}
825+
826+
if (mergeMethod === "merge") mergeMethodCounts.merge += 1;
827+
else if (mergeMethod === "squash") mergeMethodCounts.squash += 1;
828+
else if (mergeMethod === "rebase") mergeMethodCounts.rebase += 1;
829+
else if (pr.merged) mergeMethodCounts.unknown += 1;
830+
831+
if (pr.merge_commit_sha && evidenceShas.length < 10) evidenceShas.push(pr.merge_commit_sha);
832+
833+
return {
834+
repo_id: repoId,
835+
github_pr_number: pr.number,
836+
title: pr.title,
837+
body: pr.body,
838+
state: pr.state,
839+
merged: pr.merged,
840+
merged_at: pr.merged_at,
841+
created_at: pr.created_at,
842+
updated_at: pr.updated_at,
843+
closed_at: pr.closed_at,
844+
author_login: pr.user?.login ?? null,
845+
base_ref: pr.base.ref,
846+
head_ref: pr.head.ref,
847+
head_sha: pr.head.sha,
848+
merge_commit_sha: pr.merge_commit_sha,
849+
commit_count: pr.commits,
850+
additions: pr.additions,
851+
deletions: pr.deletions,
852+
changed_files: pr.changed_files,
853+
comments_count: pr.comments,
854+
review_comments_count: pr.review_comments,
855+
linked_issue_numbers: linkedIssues,
856+
has_checklist: checklist,
857+
has_template_markers: templateMarkers,
858+
merge_method: mergeMethod,
859+
};
860+
});
861+
862+
const { error: prUpsertError } = await supabase
863+
.from("pull_requests")
864+
.upsert(rows, { onConflict: "repo_id,github_pr_number" });
865+
if (prUpsertError) {
866+
console.warn("Failed to upsert pull requests:", prUpsertError.message);
867+
}
868+
869+
await supabase
870+
.from("repos")
871+
.update({ last_pr_sync_at: new Date().toISOString() })
872+
.eq("id", repoId);
873+
874+
const mergedCount = detailed.filter((pr) => pr.merged).length;
875+
const checklistCount = detailed.filter((pr) => hasChecklist(pr.body)).length;
876+
const templateCount = detailed.filter((pr) => hasTemplateMarkers(pr.body)).length;
877+
const linkedIssueCount = detailed.filter((pr) => extractLinkedIssueNumbers(pr.body).length > 0)
878+
.length;
879+
880+
const denom = detailed.length;
881+
882+
return {
883+
total: denom,
884+
merged: mergedCount,
885+
merge_methods: mergeMethodCounts,
886+
checklist_rate: denom > 0 ? checklistCount / denom : null,
887+
template_rate: denom > 0 ? templateCount / denom : null,
888+
linked_issue_rate: denom > 0 ? linkedIssueCount / denom : null,
889+
evidence_shas: evidenceShas,
890+
};
891+
} catch (error) {
892+
const message = error instanceof Error ? error.message : "Unknown error";
893+
console.warn("Pull request ingestion skipped:", message);
894+
return {
895+
total: 0,
896+
merged: 0,
897+
merge_methods: { merge: 0, squash: 0, rebase: 0, unknown: 0 },
898+
checklist_rate: null,
899+
template_rate: null,
900+
linked_issue_rate: null,
901+
evidence_shas: [] as string[],
902+
};
903+
}
904+
});
905+
643906
// Step 2: Fetch commit list
644907
const commitList = await step.run("fetch-commit-list", async () => {
645908
return fetchCommitList({
@@ -738,7 +1001,9 @@ export const analyzeRepo = inngest.createFunction(
7381001

7391002
const metrics = computeAnalysisMetrics(legacyEvents);
7401003
const assignment = assignVibeType(metrics);
741-
const insights = computeAnalysisInsights(legacyEvents);
1004+
const insights = computeAnalysisInsights(legacyEvents, {
1005+
pull_requests: pullRequestSignals,
1006+
});
7421007

7431008
// NEW: Vibe v2 insights with episodes and subsystems
7441009
const vibeInsights = computeVibeFromCommits({

0 commit comments

Comments
 (0)