Skip to content

Commit cbf9b3c

Browse files
author
shashank dwivedi
committed
fix: reconcile missing zoekt shards on startup - Fixes #1210
1 parent 96af7e8 commit cbf9b3c

1 file changed

Lines changed: 63 additions & 0 deletions

File tree

packages/backend/src/repoIndexManager.ts

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ export class RepoIndexManager {
100100
logger.debug('Starting scheduler');
101101
// Cleanup any orphaned disk resources on startup
102102
await this.cleanupOrphanedDiskResources();
103+
await this.reconcileMissingShards();
103104
this.interval = setIntervalAsync(async () => {
104105
await this.scheduleIndexJobs();
105106
await this.scheduleCleanupJobs();
@@ -703,6 +704,8 @@ export class RepoIndexManager {
703704
}
704705
}
705706

707+
708+
706709
// --- Index shards ---
707710
// Shard files are prefixed with <orgId>_<repoId>: DATA_CACHE_DIR/index/<orgId>_<repoId>_*.zoekt
708711
if (existsSync(INDEX_CACHE_DIR)) {
@@ -736,6 +739,66 @@ export class RepoIndexManager {
736739
}
737740
}
738741

742+
// Scans the DB for repos marked as indexed but missing their shard files
743+
// on disk. This handles the case where the index directory is wiped (e.g.
744+
// ephemeral storage on a pod restart) but the DB still has stale indexedAt
745+
// timestamps, causing the scheduler to skip re-indexing them.
746+
private async reconcileMissingShards() {
747+
if (!existsSync(INDEX_CACHE_DIR)) {
748+
logger.debug('Index cache directory does not exist, skipping shard reconciliation.');
749+
return;
750+
}
751+
752+
// Read what shard files actually exist on disk and build a set
753+
// of repoIds that have at least one shard present.
754+
// Uses getRepoIdFromShardFileName to match the same naming convention
755+
// as cleanupOrphanedDiskResources (shards are named <orgId>_<repoId>_*.zoekt)
756+
const entries = await readdir(INDEX_CACHE_DIR);
757+
const repoIdsOnDisk = new Set<number>();
758+
for (const entry of entries) {
759+
const repoId = getRepoIdFromShardFileName(entry);
760+
if (repoId !== undefined) {
761+
repoIdsOnDisk.add(repoId);
762+
}
763+
}
764+
765+
// Find all repos the DB believes are already indexed
766+
const indexedRepos = await this.db.repo.findMany({
767+
where: {
768+
indexedAt: {
769+
not: null,
770+
},
771+
},
772+
select: {
773+
id: true,
774+
name: true,
775+
},
776+
});
777+
778+
if (indexedRepos.length === 0) {
779+
return;
780+
}
781+
782+
let resetCount = 0;
783+
784+
for (const repo of indexedRepos) {
785+
if (!repoIdsOnDisk.has(repo.id)) {
786+
logger.warn(`Repo "${repo.name}" (id: ${repo.id}) is marked as indexed in the DB but has no shard file on disk. Marking as stale.`);
787+
await this.db.repo.update({
788+
where: { id: repo.id },
789+
data: { indexedAt: null },
790+
});
791+
resetCount++;
792+
}
793+
}
794+
795+
if (resetCount > 0) {
796+
logger.info(`Shard reconciliation complete. Reset ${resetCount} repo(s) to stale — they will be re-indexed shortly.`);
797+
} else {
798+
logger.debug('Shard reconciliation complete. All indexed repos have shard files on disk.');
799+
}
800+
}
801+
739802
public async dispose() {
740803
if (this.interval) {
741804
clearInterval(this.interval);

0 commit comments

Comments
 (0)