Skip to content

Commit d490c0e

Browse files
metrics
1 parent c2299aa commit d490c0e

File tree

3 files changed

+64
-69
lines changed

3 files changed

+64
-69
lines changed

packages/backend/src/index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ if (hasEntitlement('github-app')) {
6767
const connectionManager = new ConnectionManager(prisma, settings, redis);
6868
const repoPermissionSyncer = new RepoPermissionSyncer(prisma, settings, redis);
6969
const userPermissionSyncer = new UserPermissionSyncer(prisma, settings, redis);
70-
const repoIndexManager = new RepoIndexManager(prisma, settings, redis);
70+
const repoIndexManager = new RepoIndexManager(prisma, settings, redis, promClient);
7171

7272
connectionManager.startScheduler();
7373
repoIndexManager.startScheduler();

packages/backend/src/promClient.ts

Lines changed: 30 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -10,84 +10,51 @@ export class PromClient {
1010
private app: express.Application;
1111
private server: Server;
1212

13-
public activeRepoIndexingJobs: Gauge<string>;
14-
public pendingRepoIndexingJobs: Gauge<string>;
15-
public repoIndexingReattemptsTotal: Counter<string>;
16-
public repoIndexingFailTotal: Counter<string>;
17-
public repoIndexingSuccessTotal: Counter<string>;
18-
19-
public activeRepoGarbageCollectionJobs: Gauge<string>;
20-
public repoGarbageCollectionErrorTotal: Counter<string>;
21-
public repoGarbageCollectionFailTotal: Counter<string>;
22-
public repoGarbageCollectionSuccessTotal: Counter<string>;
13+
public activeRepoIndexJobs: Gauge<string>;
14+
public pendingRepoIndexJobs: Gauge<string>;
15+
public repoIndexJobReattemptsTotal: Counter<string>;
16+
public repoIndexJobFailTotal: Counter<string>;
17+
public repoIndexJobSuccessTotal: Counter<string>;
2318

2419
public readonly PORT = 3060;
2520

2621
constructor() {
2722
this.registry = new Registry();
2823

29-
this.activeRepoIndexingJobs = new Gauge({
30-
name: 'active_repo_indexing_jobs',
31-
help: 'The number of repo indexing jobs in progress',
32-
labelNames: ['repo'],
33-
});
34-
this.registry.registerMetric(this.activeRepoIndexingJobs);
35-
36-
this.pendingRepoIndexingJobs = new Gauge({
37-
name: 'pending_repo_indexing_jobs',
38-
help: 'The number of repo indexing jobs waiting in queue',
39-
labelNames: ['repo'],
40-
});
41-
this.registry.registerMetric(this.pendingRepoIndexingJobs);
42-
43-
this.repoIndexingReattemptsTotal = new Counter({
44-
name: 'repo_indexing_reattempts',
45-
help: 'The number of repo indexing reattempts',
46-
labelNames: ['repo'],
47-
});
48-
this.registry.registerMetric(this.repoIndexingReattemptsTotal);
49-
50-
this.repoIndexingFailTotal = new Counter({
51-
name: 'repo_indexing_fails',
52-
help: 'The number of repo indexing fails',
53-
labelNames: ['repo'],
54-
});
55-
this.registry.registerMetric(this.repoIndexingFailTotal);
56-
57-
this.repoIndexingSuccessTotal = new Counter({
58-
name: 'repo_indexing_successes',
59-
help: 'The number of repo indexing successes',
60-
labelNames: ['repo'],
24+
this.activeRepoIndexJobs = new Gauge({
25+
name: 'active_repo_index_jobs',
26+
help: 'The number of repo jobs in progress',
27+
labelNames: ['repo', 'type'],
6128
});
62-
this.registry.registerMetric(this.repoIndexingSuccessTotal);
29+
this.registry.registerMetric(this.activeRepoIndexJobs);
6330

64-
this.activeRepoGarbageCollectionJobs = new Gauge({
65-
name: 'active_repo_garbage_collection_jobs',
66-
help: 'The number of repo garbage collection jobs in progress',
67-
labelNames: ['repo'],
31+
this.pendingRepoIndexJobs = new Gauge({
32+
name: 'pending_repo_index_jobs',
33+
help: 'The number of repo jobs waiting in queue',
34+
labelNames: ['repo', 'type'],
6835
});
69-
this.registry.registerMetric(this.activeRepoGarbageCollectionJobs);
36+
this.registry.registerMetric(this.pendingRepoIndexJobs);
7037

71-
this.repoGarbageCollectionErrorTotal = new Counter({
72-
name: 'repo_garbage_collection_errors',
73-
help: 'The number of repo garbage collection errors',
74-
labelNames: ['repo'],
38+
this.repoIndexJobReattemptsTotal = new Counter({
39+
name: 'repo_index_job_reattempts',
40+
help: 'The number of repo job reattempts',
41+
labelNames: ['repo', 'type'],
7542
});
76-
this.registry.registerMetric(this.repoGarbageCollectionErrorTotal);
43+
this.registry.registerMetric(this.repoIndexJobReattemptsTotal);
7744

78-
this.repoGarbageCollectionFailTotal = new Counter({
79-
name: 'repo_garbage_collection_fails',
80-
help: 'The number of repo garbage collection fails',
81-
labelNames: ['repo'],
45+
this.repoIndexJobFailTotal = new Counter({
46+
name: 'repo_index_job_fails',
47+
help: 'The number of repo job fails',
48+
labelNames: ['repo', 'type'],
8249
});
83-
this.registry.registerMetric(this.repoGarbageCollectionFailTotal);
50+
this.registry.registerMetric(this.repoIndexJobFailTotal);
8451

85-
this.repoGarbageCollectionSuccessTotal = new Counter({
86-
name: 'repo_garbage_collection_successes',
87-
help: 'The number of repo garbage collection successes',
88-
labelNames: ['repo'],
52+
this.repoIndexJobSuccessTotal = new Counter({
53+
name: 'repo_index_job_successes',
54+
help: 'The number of repo job successes',
55+
labelNames: ['repo', 'type'],
8956
});
90-
this.registry.registerMetric(this.repoGarbageCollectionSuccessTotal);
57+
this.registry.registerMetric(this.repoIndexJobSuccessTotal);
9158

9259
client.collectDefaultMetrics({
9360
register: this.registry,

packages/backend/src/repoIndexManager.ts

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import { Redis } from 'ioredis';
88
import { INDEX_CACHE_DIR } from './constants.js';
99
import { env } from './env.js';
1010
import { cloneRepository, fetchRepository, isPathAValidGitRepoRoot, unsetGitConfig, upsertGitConfig } from './git.js';
11+
import { PromClient } from './promClient.js';
1112
import { repoMetadataSchema, RepoWithConnections, Settings } from "./types.js";
1213
import { getAuthCredentialsForRepo, getRepoPath, getShardPrefix, groupmqLifecycleExceptionWrapper, measure } from './utils.js';
1314
import { indexGitRepository } from './zoekt.js';
@@ -43,6 +44,7 @@ export class RepoIndexManager {
4344
private db: PrismaClient,
4445
private settings: Settings,
4546
redis: Redis,
47+
private promClient: PromClient,
4648
) {
4749
this.queue = new Queue<JobPayload>({
4850
redis,
@@ -73,7 +75,7 @@ export class RepoIndexManager {
7375
this.interval = setInterval(async () => {
7476
await this.scheduleIndexJobs();
7577
await this.scheduleCleanupJobs();
76-
}, 1000 * 5);
78+
}, this.settings.reindexRepoPollingIntervalMs);
7779

7880
this.worker.run();
7981
}
@@ -135,7 +137,7 @@ export class RepoIndexManager {
135137
}
136138
}
137139
],
138-
}
140+
},
139141
});
140142

141143
if (reposToIndex.length > 0) {
@@ -213,6 +215,9 @@ export class RepoIndexManager {
213215
},
214216
jobId: job.id,
215217
});
218+
219+
const jobTypeLabel = getJobTypePrometheusLabel(type);
220+
this.promClient.pendingRepoIndexJobs.inc({ repo: job.repo.name, type: jobTypeLabel });
216221
}
217222
}
218223

@@ -243,6 +248,10 @@ export class RepoIndexManager {
243248
}
244249
});
245250

251+
const jobTypeLabel = getJobTypePrometheusLabel(jobType);
252+
this.promClient.pendingRepoIndexJobs.dec({ repo: job.data.repoName, type: jobTypeLabel });
253+
this.promClient.activeRepoIndexJobs.inc({ repo: job.data.repoName, type: jobTypeLabel });
254+
246255
const abortController = new AbortController();
247256
const signalHandler = () => {
248257
logger.info(`Received shutdown signal, aborting...`);
@@ -378,6 +387,8 @@ export class RepoIndexManager {
378387
}
379388
});
380389

390+
const jobTypeLabel = getJobTypePrometheusLabel(jobData.type);
391+
381392
if (jobData.type === RepoIndexingJobType.INDEX) {
382393
const repo = await this.db.repo.update({
383394
where: { id: jobData.repoId },
@@ -395,6 +406,10 @@ export class RepoIndexManager {
395406

396407
logger.info(`Completed cleanup job ${job.data.jobId} for repo ${repo.name} (id: ${repo.id})`);
397408
}
409+
410+
// Track metrics for successful job
411+
this.promClient.activeRepoIndexJobs.dec({ repo: job.data.repoName, type: jobTypeLabel });
412+
this.promClient.repoIndexJobSuccessTotal.inc({ repo: job.data.repoName, type: jobTypeLabel });
398413
});
399414

400415
private onJobFailed = async (job: Job<JobPayload>) =>
@@ -404,6 +419,8 @@ export class RepoIndexManager {
404419
const attempt = job.attemptsMade + 1;
405420
const wasLastAttempt = attempt >= job.opts.attempts;
406421

422+
const jobTypeLabel = getJobTypePrometheusLabel(job.data.type);
423+
407424
if (wasLastAttempt) {
408425
const { repo } = await this.db.repoIndexingJob.update({
409426
where: { id: job.data.jobId },
@@ -415,29 +432,38 @@ export class RepoIndexManager {
415432
select: { repo: true }
416433
});
417434

435+
this.promClient.activeRepoIndexJobs.dec({ repo: job.data.repoName, type: jobTypeLabel });
436+
this.promClient.repoIndexJobFailTotal.inc({ repo: job.data.repoName, type: jobTypeLabel });
437+
418438
logger.error(`Failed job ${job.data.jobId} for repo ${repo.name} (id: ${repo.id}). Attempt ${attempt} / ${job.opts.attempts}. Failing job.`);
419439
} else {
420440
const repo = await this.db.repo.findUniqueOrThrow({
421441
where: { id: job.data.repoId },
422442
});
423443

444+
this.promClient.repoIndexJobReattemptsTotal.inc({ repo: job.data.repoName, type: jobTypeLabel });
445+
424446
logger.warn(`Failed job ${job.data.jobId} for repo ${repo.name} (id: ${repo.id}). Attempt ${attempt} / ${job.opts.attempts}. Retrying.`);
425447
}
426448
});
427449

428450
private onJobStalled = async (jobId: string) =>
429451
groupmqLifecycleExceptionWrapper('onJobStalled', logger, async () => {
430452
const logger = createJobLogger(jobId);
431-
const { repo } = await this.db.repoIndexingJob.update({
453+
const { repo, type } = await this.db.repoIndexingJob.update({
432454
where: { id: jobId },
433455
data: {
434456
status: RepoIndexingJobStatus.FAILED,
435457
completedAt: new Date(),
436458
errorMessage: 'Job stalled',
437459
},
438-
select: { repo: true }
460+
select: { repo: true, type: true }
439461
});
440462

463+
const jobTypeLabel = getJobTypePrometheusLabel(type);
464+
this.promClient.activeRepoIndexJobs.dec({ repo: repo.name, type: jobTypeLabel });
465+
this.promClient.repoIndexJobFailTotal.inc({ repo: repo.name, type: jobTypeLabel });
466+
441467
logger.error(`Job ${jobId} stalled for repo ${repo.name} (id: ${repo.id})`);
442468
});
443469

@@ -453,4 +479,6 @@ export class RepoIndexManager {
453479
await this.worker.close();
454480
await this.queue.close();
455481
}
456-
}
482+
}
483+
484+
const getJobTypePrometheusLabel = (type: RepoIndexingJobType) => type === RepoIndexingJobType.INDEX ? 'index' : 'cleanup';

0 commit comments

Comments
 (0)