Skip to content

Commit 3948d91

Browse files
msukkariclaude
andcommitted
feat: add audit log retention policy, update analytics UI and docs
- Add SOURCEBOT_EE_AUDIT_RETENTION_DAYS env var (default 180) and AuditLogPruner background job that prunes old audit records daily in batches - Surface retention period and oldest record date in analytics page header - Update audit action types table in docs (remove 4 stale, add 11 missing) - Add audit log storage section to sizing guide with enterprise callout and storage estimates - Update mock data script with mixed-usage user profiles and new audit actions Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 2edeba0 commit 3948d91

File tree

10 files changed

+411
-147
lines changed

10 files changed

+411
-147
lines changed

docs/docs/configuration/audit-logs.mdx

Lines changed: 30 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ This feature gives security and compliance teams the necessary information to en
1515
## Enabling/Disabling Audit Logs
1616
Audit logs are enabled by default and can be controlled with the `SOURCEBOT_EE_AUDIT_LOGGING_ENABLED` [environment variable](/docs/configuration/environment-variables).
1717

18+
## Retention Policy
19+
By default, audit logs older than 180 days are automatically pruned daily. You can configure the retention period using the `SOURCEBOT_EE_AUDIT_RETENTION_DAYS` [environment variable](/docs/configuration/environment-variables). Set it to `0` to disable automatic pruning and retain logs indefinitely.
20+
1821
## Fetching Audit Logs
1922
Audit logs are stored in the [postgres database](/docs/overview#architecture) connected to Sourcebot. To fetch all of the audit logs, you can use the following API:
2023

@@ -110,30 +113,37 @@ curl --request GET '$SOURCEBOT_URL/api/ee/audit' \
110113

111114
| Action | Actor Type | Target Type |
112115
| :------- | :------ | :------|
113-
| `api_key.creation_failed` | `user` | `org` |
114116
| `api_key.created` | `user` | `api_key` |
115-
| `api_key.deletion_failed` | `user` | `org` |
117+
| `api_key.creation_failed` | `user` | `org` |
116118
| `api_key.deleted` | `user` | `api_key` |
119+
| `api_key.deletion_failed` | `user` | `org` |
120+
| `audit.fetch` | `user` | `org` |
121+
| `chat.deleted` | `user` | `chat` |
122+
| `chat.shared_with_users` | `user` | `chat` |
123+
| `chat.unshared_with_user` | `user` | `chat` |
124+
| `chat.visibility_updated` | `user` | `chat` |
125+
| `org.ownership_transfer_failed` | `user` | `org` |
126+
| `org.ownership_transferred` | `user` | `org` |
127+
| `user.created_ask_chat` | `user` | `org` |
117128
| `user.creation_failed` | `user` | `user` |
118-
| `user.owner_created` | `user` | `org` |
119-
| `user.performed_code_search` | `user` | `org` |
120-
| `user.performed_find_references` | `user` | `org` |
121-
| `user.performed_goto_definition` | `user` | `org` |
122-
| `user.created_ask_chat` | `user` | `org` |
123-
| `user.jit_provisioning_failed` | `user` | `org` |
124-
| `user.jit_provisioned` | `user` | `org` |
125-
| `user.join_request_creation_failed` | `user` | `org` |
126-
| `user.join_requested` | `user` | `org` |
127-
| `user.join_request_approve_failed` | `user` | `account_join_request` |
128-
| `user.join_request_approved` | `user` | `account_join_request` |
129-
| `user.invite_failed` | `user` | `org` |
130-
| `user.invites_created` | `user` | `org` |
129+
| `user.delete` | `user` | `user` |
130+
| `user.fetched_file_source` | `user` | `org` |
131+
| `user.fetched_file_tree` | `user` | `org` |
131132
| `user.invite_accept_failed` | `user` | `invite` |
132133
| `user.invite_accepted` | `user` | `invite` |
134+
| `user.invite_failed` | `user` | `org` |
135+
| `user.invites_created` | `user` | `org` |
136+
| `user.join_request_approve_failed` | `user` | `account_join_request` |
137+
| `user.join_request_approved` | `user` | `account_join_request` |
138+
| `user.list` | `user` | `org` |
139+
| `user.listed_repos` | `user` | `org` |
140+
| `user.owner_created` | `user` | `org` |
141+
| `user.performed_code_search` | `user` | `org` |
142+
| `user.performed_find_references` | `user` | `org` |
143+
| `user.performed_goto_definition` | `user` | `org` |
144+
| `user.read` | `user` | `user` |
133145
| `user.signed_in` | `user` | `user` |
134146
| `user.signed_out` | `user` | `user` |
135-
| `org.ownership_transfer_failed` | `user` | `org` |
136-
| `org.ownership_transferred` | `user` | `org` |
137147

138148

139149
## Response schema
@@ -180,7 +190,7 @@ curl --request GET '$SOURCEBOT_URL/api/ee/audit' \
180190
},
181191
"targetType": {
182192
"type": "string",
183-
"enum": ["user", "org", "file", "api_key", "account_join_request", "invite"]
193+
"enum": ["user", "org", "file", "api_key", "account_join_request", "invite", "chat"]
184194
},
185195
"sourcebotVersion": {
186196
"type": "string"
@@ -192,7 +202,8 @@ curl --request GET '$SOURCEBOT_URL/api/ee/audit' \
192202
"properties": {
193203
"message": { "type": "string" },
194204
"api_key": { "type": "string" },
195-
"emails": { "type": "string" }
205+
"emails": { "type": "string" },
206+
"source": { "type": "string" }
196207
},
197208
"additionalProperties": false
198209
},

docs/docs/configuration/environment-variables.mdx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ The following environment variables allow you to configure your Sourcebot deploy
4242
| `HTTPS_PROXY` | - | <p>HTTPS proxy URL for routing SSL requests through a proxy server (e.g., `http://proxy.company.com:8080`). Requires `NODE_USE_ENV_PROXY=1`.</p> |
4343
| `NO_PROXY` | - | <p>Comma-separated list of hostnames or domains that should bypass the proxy (e.g., `localhost,127.0.0.1,.internal.domain`). Requires `NODE_USE_ENV_PROXY=1`.</p> |
4444
| `SOURCEBOT_EE_AUDIT_LOGGING_ENABLED` | `true` | <p>Enables/disables audit logging</p> |
45+
| `SOURCEBOT_EE_AUDIT_RETENTION_DAYS` | `180` | <p>The number of days to retain audit logs. Audit log records older than this will be automatically pruned daily. Set to `0` to disable pruning and retain logs indefinitely.</p> |
4546
| `AUTH_EE_GCP_IAP_ENABLED` | `false` | <p>When enabled, allows Sourcebot to automatically register/login from a successful GCP IAP redirect</p> |
4647
| `AUTH_EE_GCP_IAP_AUDIENCE` | - | <p>The GCP IAP audience to use when verifying JWT tokens. Must be set to enable GCP IAP JIT provisioning</p> |
4748
| `EXPERIMENT_EE_PERMISSION_SYNC_ENABLED` | `false` | <p>Enables [permission syncing](/docs/features/permission-syncing).</p> |

docs/docs/deployment/sizing-guide.mdx

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,34 @@ If your instance is resource-constrained, you can reduce the concurrency of back
4545

4646
Lowering these values reduces peak resource usage at the cost of slower initial indexing.
4747

48+
## Audit log storage
49+
50+
<Info>
51+
Audit logging is an enterprise feature and is only available with an [enterprise license](/docs/overview#license-key). If you are not on an enterprise plan, audit logs are not stored and this section does not apply.
52+
</Info>
53+
54+
[Audit logs](/docs/configuration/audit-logs) are stored in the Postgres database connected to your Sourcebot deployment. Each audit record captures the action performed, the actor, the target, a timestamp, and optional metadata (e.g., request source). There are three database indexes on the audit table to support analytics and lookup queries.
55+
56+
**Estimated storage per audit event: ~350 bytes** (including row data and indexes).
57+
58+
<Info>
59+
The table below assumes 50 events per user per day. The actual number depends on usage patterns — each user action (code search, file view, navigation, Ask chat, etc.) creates one audit event. Users who interact via [MCP](/docs/features/mcp-server) or the [API](/docs/api-reference/search) tend to generate significantly more events than web-only users, so your real usage may vary.
60+
</Info>
61+
62+
| Team size | Avg events / user / day | Daily events | Monthly storage | 6-month storage |
63+
|---|---|---|---|---|
64+
| 10 users | 50 | 500 | ~5 MB | ~30 MB |
65+
| 50 users | 50 | 2,500 | ~25 MB | ~150 MB |
66+
| 100 users | 50 | 5,000 | ~50 MB | ~300 MB |
67+
| 500 users | 50 | 25,000 | ~250 MB | ~1.5 GB |
68+
| 1,000 users | 50 | 50,000 | ~500 MB | ~3 GB |
69+
70+
### Retention policy
71+
72+
By default, audit logs older than **180 days** are automatically pruned daily by a background job. You can adjust this with the `SOURCEBOT_EE_AUDIT_RETENTION_DAYS` [environment variable](/docs/configuration/environment-variables). Set it to `0` to disable pruning and retain logs indefinitely.
73+
74+
For most deployments, the default 180-day retention keeps database size manageable. If you have a large team with heavy MCP/API usage and need longer retention, plan your Postgres disk allocation accordingly using the estimates above.
75+
4876
## Monitoring
4977

5078
We recommend monitoring the following metrics after deployment to validate your sizing:
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import { PrismaClient } from "@sourcebot/db";
2+
import { createLogger, env } from "@sourcebot/shared";
3+
import { setIntervalAsync } from "../utils.js";
4+
5+
const BATCH_SIZE = 10_000;
6+
const ONE_DAY_MS = 24 * 60 * 60 * 1000;
7+
8+
const logger = createLogger('audit-log-pruner');
9+
10+
export class AuditLogPruner {
11+
private interval?: NodeJS.Timeout;
12+
13+
constructor(private db: PrismaClient) {}
14+
15+
startScheduler() {
16+
if (env.SOURCEBOT_EE_AUDIT_LOGGING_ENABLED !== 'true') {
17+
logger.info('Audit logging is disabled, skipping audit log pruner.');
18+
return;
19+
}
20+
21+
if (env.SOURCEBOT_EE_AUDIT_RETENTION_DAYS <= 0) {
22+
logger.info('SOURCEBOT_EE_AUDIT_RETENTION_DAYS is 0, audit log pruning is disabled.');
23+
return;
24+
}
25+
26+
logger.info(`Audit log pruner started. Retaining logs for ${env.SOURCEBOT_EE_AUDIT_RETENTION_DAYS} days.`);
27+
28+
// Run immediately on startup, then every 24 hours
29+
this.pruneOldAuditLogs();
30+
this.interval = setIntervalAsync(() => this.pruneOldAuditLogs(), ONE_DAY_MS);
31+
}
32+
33+
async dispose() {
34+
if (this.interval) {
35+
clearInterval(this.interval);
36+
this.interval = undefined;
37+
}
38+
}
39+
40+
private async pruneOldAuditLogs() {
41+
const cutoff = new Date(Date.now() - env.SOURCEBOT_EE_AUDIT_RETENTION_DAYS * ONE_DAY_MS);
42+
let totalDeleted = 0;
43+
44+
logger.info(`Pruning audit logs older than ${cutoff.toISOString()}...`);
45+
46+
// Delete in batches to avoid long-running transactions
47+
while (true) {
48+
const batch = await this.db.audit.findMany({
49+
where: { timestamp: { lt: cutoff } },
50+
select: { id: true },
51+
take: BATCH_SIZE,
52+
});
53+
54+
if (batch.length === 0) break;
55+
56+
const result = await this.db.audit.deleteMany({
57+
where: { id: { in: batch.map(r => r.id) } },
58+
});
59+
60+
totalDeleted += result.count;
61+
62+
if (batch.length < BATCH_SIZE) break;
63+
}
64+
65+
if (totalDeleted > 0) {
66+
logger.info(`Pruned ${totalDeleted} audit log records.`);
67+
} else {
68+
logger.info('No audit log records to prune.');
69+
}
70+
}
71+
}

packages/backend/src/index.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import { ConfigManager } from "./configManager.js";
1212
import { ConnectionManager } from './connectionManager.js';
1313
import { INDEX_CACHE_DIR, REPOS_CACHE_DIR, SHUTDOWN_SIGNALS } from './constants.js';
1414
import { AccountPermissionSyncer } from "./ee/accountPermissionSyncer.js";
15+
import { AuditLogPruner } from "./ee/auditLogPruner.js";
1516
import { GithubAppManager } from "./ee/githubAppManager.js";
1617
import { RepoPermissionSyncer } from './ee/repoPermissionSyncer.js';
1718
import { shutdownPosthog } from "./posthog.js";
@@ -64,9 +65,11 @@ const repoPermissionSyncer = new RepoPermissionSyncer(prisma, settings, redis);
6465
const accountPermissionSyncer = new AccountPermissionSyncer(prisma, settings, redis);
6566
const repoIndexManager = new RepoIndexManager(prisma, settings, redis, promClient);
6667
const configManager = new ConfigManager(prisma, connectionManager, env.CONFIG_PATH);
68+
const auditLogPruner = new AuditLogPruner(prisma);
6769

6870
connectionManager.startScheduler();
6971
await repoIndexManager.startScheduler();
72+
auditLogPruner.startScheduler();
7073

7174
if (env.EXPERIMENT_EE_PERMISSION_SYNC_ENABLED === 'true' && !hasEntitlement('permission-syncing')) {
7275
logger.error('Permission syncing is not supported in current plan. Please contact team@sourcebot.dev for assistance.');
@@ -105,6 +108,7 @@ const listenToShutdownSignals = () => {
105108
await connectionManager.dispose()
106109
await repoPermissionSyncer.dispose()
107110
await accountPermissionSyncer.dispose()
111+
await auditLogPruner.dispose()
108112
await configManager.dispose()
109113

110114
await prisma.$disconnect();

0 commit comments

Comments
 (0)