Skip to content

Commit 1ce7e73

Browse files
msukkariclaude
andcommitted
feat(db): backfill audit source metadata and add v2 inject script
Add a migration that backfills the 'source' field in audit metadata for historical events created before source tracking was introduced. All old events were web-only, so code searches and chats get 'sourcebot-web-client' and navigations get 'sourcebot-ui-codenav'. Also restore the original inject-audit-data script and add inject-audit-data-v2 with source-aware mock data generation. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 40c832c commit 1ce7e73

File tree

4 files changed

+438
-244
lines changed

4 files changed

+438
-244
lines changed
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
-- Backfill source metadata for historical audit events.
2+
--
3+
-- Before this change, all audit events were created from the web UI without
4+
-- a 'source' field in metadata. The new analytics dashboard segments events
5+
-- by source (sourcebot-*, mcp, or null/other for API). Without this backfill,
6+
-- historical web UI events would be misclassified as API traffic.
7+
8+
-- Code searches and chat creation were web-only (no server-side audit existed)
9+
UPDATE "Audit"
10+
SET metadata = jsonb_set(COALESCE(metadata, '{}')::jsonb, '{source}', '"sourcebot-web-client"')
11+
WHERE action IN ('user.performed_code_search', 'user.created_ask_chat')
12+
AND (metadata IS NULL OR metadata->>'source' IS NULL);
13+
14+
-- Navigation events (find references, goto definition) were web-only
15+
-- (created from the symbolHoverPopup client component)
16+
UPDATE "Audit"
17+
SET metadata = jsonb_set(COALESCE(metadata, '{}')::jsonb, '{source}', '"sourcebot-ui-codenav"')
18+
WHERE action IN ('user.performed_find_references', 'user.performed_goto_definition')
19+
AND (metadata IS NULL OR metadata->>'source' IS NULL);

packages/db/tools/scriptRunner.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import { PrismaClient } from "@sourcebot/db";
22
import { ArgumentParser } from "argparse";
33
import { migrateDuplicateConnections } from "./scripts/migrate-duplicate-connections";
44
import { injectAuditData } from "./scripts/inject-audit-data";
5+
import { injectAuditDataV2 } from "./scripts/inject-audit-data-v2";
56
import { injectUserData } from "./scripts/inject-user-data";
67
import { confirmAction } from "./utils";
78
import { injectRepoData } from "./scripts/inject-repo-data";
@@ -14,6 +15,7 @@ export interface Script {
1415
export const scripts: Record<string, Script> = {
1516
"migrate-duplicate-connections": migrateDuplicateConnections,
1617
"inject-audit-data": injectAuditData,
18+
"inject-audit-data-v2": injectAuditDataV2,
1719
"inject-user-data": injectUserData,
1820
"inject-repo-data": injectRepoData,
1921
"test-repo-query-perf": testRepoQueryPerf,
Lines changed: 299 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,299 @@
1+
import { Script } from "../scriptRunner";
2+
import { PrismaClient, Prisma } from "../../dist";
3+
import { confirmAction } from "../utils";
4+
5+
// User profile: defines how a user interacts with Sourcebot
6+
interface UserProfile {
7+
id: string
8+
// Whether this user uses the web UI, and how active they are (0 = never, 1 = heavy)
9+
webWeight: number
10+
// Whether this user uses MCP, and how active they are (0 = never, 1 = heavy)
11+
mcpWeight: number
12+
// Whether this user uses the API directly, and how active they are (0 = never, 1 = heavy)
13+
apiWeight: number
14+
// API source label (for non-MCP API usage)
15+
apiSource: string
16+
// How likely they are to be active on a weekday (0-1)
17+
weekdayActivity: number
18+
// How likely they are to be active on a weekend (0-1)
19+
weekendActivity: number
20+
}
21+
22+
// Generate realistic audit data for analytics testing
23+
// Simulates 50 users with mixed usage patterns across web UI, MCP, and API
24+
export const injectAuditDataV2: Script = {
25+
run: async (prisma: PrismaClient) => {
26+
const orgId = 1;
27+
28+
// Check if org exists
29+
const org = await prisma.org.findUnique({
30+
where: { id: orgId }
31+
});
32+
33+
if (!org) {
34+
console.error(`Organization with id ${orgId} not found. Please create it first.`);
35+
return;
36+
}
37+
38+
console.log(`Injecting audit data for organization: ${org.name} (${org.domain})`);
39+
40+
const apiSources = ['cli', 'sdk', 'custom-app'];
41+
42+
// Build user profiles with mixed usage patterns
43+
const users: UserProfile[] = [];
44+
45+
// Web-only users (20): browse the UI, never use MCP or API
46+
for (let i = 0; i < 20; i++) {
47+
users.push({
48+
id: `user_${String(users.length + 1).padStart(3, '0')}`,
49+
webWeight: 0.6 + Math.random() * 0.4, // 0.6-1.0
50+
mcpWeight: 0,
51+
apiWeight: 0,
52+
apiSource: '',
53+
weekdayActivity: 0.7 + Math.random() * 0.2,
54+
weekendActivity: 0.05 + Math.random() * 0.15,
55+
});
56+
}
57+
58+
// Hybrid web + MCP users (12): use the web UI daily and also have MCP set up in their IDE
59+
for (let i = 0; i < 12; i++) {
60+
users.push({
61+
id: `user_${String(users.length + 1).padStart(3, '0')}`,
62+
webWeight: 0.4 + Math.random() * 0.4, // 0.4-0.8
63+
mcpWeight: 0.5 + Math.random() * 0.5, // 0.5-1.0
64+
apiWeight: 0,
65+
apiSource: '',
66+
weekdayActivity: 0.8 + Math.random() * 0.15,
67+
weekendActivity: 0.1 + Math.random() * 0.2,
68+
});
69+
}
70+
71+
// MCP-heavy users (8): primarily use MCP through their IDE, occasionally check the web UI
72+
for (let i = 0; i < 8; i++) {
73+
users.push({
74+
id: `user_${String(users.length + 1).padStart(3, '0')}`,
75+
webWeight: 0.05 + Math.random() * 0.2, // 0.05-0.25 (occasional)
76+
mcpWeight: 0.7 + Math.random() * 0.3, // 0.7-1.0
77+
apiWeight: 0,
78+
apiSource: '',
79+
weekdayActivity: 0.85 + Math.random() * 0.1,
80+
weekendActivity: 0.3 + Math.random() * 0.3,
81+
});
82+
}
83+
84+
// API-only users (5): automated scripts/CI, no web UI or MCP
85+
for (let i = 0; i < 5; i++) {
86+
users.push({
87+
id: `user_${String(users.length + 1).padStart(3, '0')}`,
88+
webWeight: 0,
89+
mcpWeight: 0,
90+
apiWeight: 0.6 + Math.random() * 0.4,
91+
apiSource: apiSources[i % apiSources.length],
92+
weekdayActivity: 0.9 + Math.random() * 0.1,
93+
weekendActivity: 0.6 + Math.random() * 0.3,
94+
});
95+
}
96+
97+
// Hybrid web + API users (5): developers who use both the UI and have scripts that call the API
98+
for (let i = 0; i < 5; i++) {
99+
users.push({
100+
id: `user_${String(users.length + 1).padStart(3, '0')}`,
101+
webWeight: 0.3 + Math.random() * 0.4,
102+
mcpWeight: 0,
103+
apiWeight: 0.4 + Math.random() * 0.4,
104+
apiSource: apiSources[i % apiSources.length],
105+
weekdayActivity: 0.8 + Math.random() * 0.15,
106+
weekendActivity: 0.1 + Math.random() * 0.2,
107+
});
108+
}
109+
110+
// Generate data for the last 90 days
111+
const endDate = new Date();
112+
const startDate = new Date();
113+
startDate.setDate(startDate.getDate() - 90);
114+
115+
const webOnlyCount = users.filter(u => u.webWeight > 0 && u.mcpWeight === 0 && u.apiWeight === 0).length;
116+
const hybridWebMcpCount = users.filter(u => u.webWeight > 0 && u.mcpWeight > 0).length;
117+
const mcpHeavyCount = users.filter(u => u.mcpWeight > 0 && u.webWeight < 0.3).length;
118+
const apiOnlyCount = users.filter(u => u.apiWeight > 0 && u.webWeight === 0 && u.mcpWeight === 0).length;
119+
const hybridWebApiCount = users.filter(u => u.webWeight > 0 && u.apiWeight > 0).length;
120+
121+
console.log(`Generating data from ${startDate.toISOString().split('T')[0]} to ${endDate.toISOString().split('T')[0]}`);
122+
console.log(`User breakdown: ${webOnlyCount} web-only, ${hybridWebMcpCount} web+MCP, ${mcpHeavyCount} MCP-heavy, ${apiOnlyCount} API-only, ${hybridWebApiCount} web+API`);
123+
124+
confirmAction();
125+
126+
function randomTimestamp(date: Date, isWeekend: boolean): Date {
127+
const ts = new Date(date);
128+
if (isWeekend) {
129+
ts.setHours(9 + Math.floor(Math.random() * 12));
130+
} else {
131+
ts.setHours(9 + Math.floor(Math.random() * 9));
132+
}
133+
ts.setMinutes(Math.floor(Math.random() * 60));
134+
ts.setSeconds(Math.floor(Math.random() * 60));
135+
return ts;
136+
}
137+
138+
function scaledCount(baseMin: number, baseMax: number, weight: number, isWeekend: boolean): number {
139+
const weekendFactor = isWeekend ? 0.3 : 1.0;
140+
const scaledMax = Math.round(baseMax * weight * weekendFactor);
141+
const scaledMin = Math.min(Math.round(baseMin * weight * weekendFactor), scaledMax);
142+
if (scaledMax <= 0) return 0;
143+
return scaledMin + Math.floor(Math.random() * (scaledMax - scaledMin + 1));
144+
}
145+
146+
async function createAudits(
147+
userId: string,
148+
action: string,
149+
count: number,
150+
currentDate: Date,
151+
isWeekend: boolean,
152+
targetType: string,
153+
metadata?: Prisma.InputJsonValue,
154+
) {
155+
for (let i = 0; i < count; i++) {
156+
await prisma.audit.create({
157+
data: {
158+
timestamp: randomTimestamp(currentDate, isWeekend),
159+
action,
160+
actorId: userId,
161+
actorType: 'user',
162+
targetId: `${targetType}_${Math.floor(Math.random() * 1000)}`,
163+
targetType,
164+
sourcebotVersion: '1.0.0',
165+
orgId,
166+
...(metadata ? { metadata } : {}),
167+
}
168+
});
169+
}
170+
}
171+
172+
// Generate data for each day
173+
for (let d = new Date(startDate); d <= endDate; d.setDate(d.getDate() + 1)) {
174+
const currentDate = new Date(d);
175+
const dayOfWeek = currentDate.getDay();
176+
const isWeekend = dayOfWeek === 0 || dayOfWeek === 6;
177+
178+
for (const user of users) {
179+
// Determine if user is active today
180+
const activityChance = isWeekend ? user.weekendActivity : user.weekdayActivity;
181+
if (Math.random() >= activityChance) continue;
182+
183+
// --- Web UI activity (source='sourcebot-web-client' or 'sourcebot-ui-codenav') ---
184+
if (user.webWeight > 0) {
185+
const webMeta: Prisma.InputJsonValue = { source: 'sourcebot-web-client' };
186+
const codenavMeta: Prisma.InputJsonValue = { source: 'sourcebot-ui-codenav' };
187+
188+
// Code searches (2-5 base)
189+
await createAudits(user.id, 'user.performed_code_search',
190+
scaledCount(2, 5, user.webWeight, isWeekend), currentDate, isWeekend, 'search', webMeta);
191+
192+
// Navigations: find references + goto definition (5-10 base)
193+
const navCount = scaledCount(5, 10, user.webWeight, isWeekend);
194+
for (let i = 0; i < navCount; i++) {
195+
const action = Math.random() < 0.6 ? 'user.performed_find_references' : 'user.performed_goto_definition';
196+
await createAudits(user.id, action, 1, currentDate, isWeekend, 'symbol', codenavMeta);
197+
}
198+
199+
// Ask chats (0-2 base) - web only
200+
await createAudits(user.id, 'user.created_ask_chat',
201+
scaledCount(0, 2, user.webWeight, isWeekend), currentDate, isWeekend, 'org', webMeta);
202+
203+
// File source views (3-8 base)
204+
await createAudits(user.id, 'user.fetched_file_source',
205+
scaledCount(3, 8, user.webWeight, isWeekend), currentDate, isWeekend, 'file', webMeta);
206+
207+
// File tree browsing (2-5 base)
208+
await createAudits(user.id, 'user.fetched_file_tree',
209+
scaledCount(2, 5, user.webWeight, isWeekend), currentDate, isWeekend, 'repo', webMeta);
210+
211+
// List repos (1-3 base)
212+
await createAudits(user.id, 'user.listed_repos',
213+
scaledCount(1, 3, user.webWeight, isWeekend), currentDate, isWeekend, 'org', webMeta);
214+
}
215+
216+
// --- MCP activity (source='mcp') ---
217+
if (user.mcpWeight > 0) {
218+
const meta: Prisma.InputJsonValue = { source: 'mcp' };
219+
220+
// MCP code searches (5-15 base) - higher volume than web
221+
await createAudits(user.id, 'user.performed_code_search',
222+
scaledCount(5, 15, user.mcpWeight, isWeekend), currentDate, isWeekend, 'search', meta);
223+
224+
// MCP file source fetches (5-12 base)
225+
await createAudits(user.id, 'user.fetched_file_source',
226+
scaledCount(5, 12, user.mcpWeight, isWeekend), currentDate, isWeekend, 'file', meta);
227+
228+
// MCP file tree fetches (3-6 base)
229+
await createAudits(user.id, 'user.fetched_file_tree',
230+
scaledCount(3, 6, user.mcpWeight, isWeekend), currentDate, isWeekend, 'repo', meta);
231+
232+
// MCP list repos (3-8 base)
233+
await createAudits(user.id, 'user.listed_repos',
234+
scaledCount(3, 8, user.mcpWeight, isWeekend), currentDate, isWeekend, 'org', meta);
235+
}
236+
237+
// --- API activity (source=cli/sdk/custom-app) ---
238+
if (user.apiWeight > 0) {
239+
const meta: Prisma.InputJsonValue = { source: user.apiSource };
240+
241+
// API code searches (10-30 base) - highest volume, automated
242+
await createAudits(user.id, 'user.performed_code_search',
243+
scaledCount(10, 30, user.apiWeight, isWeekend), currentDate, isWeekend, 'search', meta);
244+
245+
// API file source fetches (8-20 base)
246+
await createAudits(user.id, 'user.fetched_file_source',
247+
scaledCount(8, 20, user.apiWeight, isWeekend), currentDate, isWeekend, 'file', meta);
248+
249+
// API file tree fetches (4-10 base)
250+
await createAudits(user.id, 'user.fetched_file_tree',
251+
scaledCount(4, 10, user.apiWeight, isWeekend), currentDate, isWeekend, 'repo', meta);
252+
253+
// API list repos (5-15 base)
254+
await createAudits(user.id, 'user.listed_repos',
255+
scaledCount(5, 15, user.apiWeight, isWeekend), currentDate, isWeekend, 'org', meta);
256+
}
257+
}
258+
}
259+
260+
console.log(`\nAudit data injection complete!`);
261+
console.log(`Users: ${users.length}`);
262+
console.log(`Date range: ${startDate.toISOString().split('T')[0]} to ${endDate.toISOString().split('T')[0]}`);
263+
264+
// Show statistics
265+
const stats = await prisma.audit.groupBy({
266+
by: ['action'],
267+
where: { orgId },
268+
_count: { action: true }
269+
});
270+
271+
console.log('\nAction breakdown:');
272+
stats.forEach(stat => {
273+
console.log(` ${stat.action}: ${stat._count.action}`);
274+
});
275+
276+
// Show source breakdown
277+
const allAudits = await prisma.audit.findMany({
278+
where: { orgId },
279+
select: { metadata: true }
280+
});
281+
282+
let webCount = 0, mcpCount = 0, apiCount = 0;
283+
for (const audit of allAudits) {
284+
const meta = audit.metadata as Record<string, unknown> | null;
285+
const source = meta?.source as string | undefined;
286+
if (source && typeof source === 'string' && source.startsWith('sourcebot-')) {
287+
webCount++;
288+
} else if (source === 'mcp') {
289+
mcpCount++;
290+
} else {
291+
apiCount++;
292+
}
293+
}
294+
console.log('\nSource breakdown:');
295+
console.log(` Web UI (source=sourcebot-*): ${webCount}`);
296+
console.log(` MCP (source=mcp): ${mcpCount}`);
297+
console.log(` API (source=other/null): ${apiCount}`);
298+
},
299+
};

0 commit comments

Comments
 (0)