Skip to content

Commit bb12c47

Browse files
cursoragentmsukkari
andcommitted
fix: cleanup temporary shard files on failed indexing
When zoekt-git-index fails during repository indexing, it can leave behind .tmp shard files that accumulate over time and fill up disk space. This is especially problematic for large repos that repeatedly fail to index. Changes: - Add cleanupTempShards() function to zoekt.ts that removes temporary shard files (files with .tmp in their name) for a specific repository - Call cleanupTempShards() in repoIndexManager.ts when indexGitRepository fails, before re-throwing the error This ensures that even if a repository consistently fails to index, the temporary files created during each attempt are cleaned up. Co-authored-by: michael <michael@sourcebot.dev>
1 parent 9d8dcb3 commit bb12c47

File tree

2 files changed

+44
-4
lines changed

2 files changed

+44
-4
lines changed

packages/backend/src/repoIndexManager.ts

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ import { captureEvent } from './posthog.js';
1313
import { PromClient } from './promClient.js';
1414
import { RepoWithConnections, Settings } from "./types.js";
1515
import { getAuthCredentialsForRepo, getShardPrefix, groupmqLifecycleExceptionWrapper, measure, setIntervalAsync } from './utils.js';
16-
import { indexGitRepository } from './zoekt.js';
16+
import { cleanupTempShards, indexGitRepository } from './zoekt.js';
1717

1818
const LOG_TAG = 'repo-index-manager';
1919
const logger = createLogger(LOG_TAG);
@@ -438,9 +438,17 @@ export class RepoIndexManager {
438438
}
439439

440440
logger.info(`Indexing ${repo.name} (id: ${repo.id})...`);
441-
const { durationMs } = await measure(() => indexGitRepository(repo, this.settings, revisions, signal));
442-
const indexDuration_s = durationMs / 1000;
443-
logger.info(`Indexed ${repo.name} (id: ${repo.id}) in ${indexDuration_s}s`);
441+
try {
442+
const { durationMs } = await measure(() => indexGitRepository(repo, this.settings, revisions, signal));
443+
const indexDuration_s = durationMs / 1000;
444+
logger.info(`Indexed ${repo.name} (id: ${repo.id}) in ${indexDuration_s}s`);
445+
} catch (error) {
446+
// Clean up any temporary shard files left behind by the failed indexing operation.
447+
// Zoekt creates .tmp files during indexing which can accumulate if indexing fails repeatedly.
448+
logger.warn(`Indexing failed for ${repo.name} (id: ${repo.id}), cleaning up temp shard files...`);
449+
await cleanupTempShards(repo);
450+
throw error;
451+
}
444452

445453
return revisions;
446454
}

packages/backend/src/zoekt.ts

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import { Repo } from "@sourcebot/db";
22
import { createLogger, env, getRepoPath } from "@sourcebot/shared";
33
import { exec } from "child_process";
4+
import { readdir, rm } from "fs/promises";
45
import { INDEX_CACHE_DIR } from "./constants.js";
56
import { Settings } from "./types.js";
67
import { getShardPrefix } from "./utils.js";
@@ -54,3 +55,34 @@ export const indexGitRepository = async (repo: Repo, settings: Settings, revisio
5455
})
5556
});
5657
}
58+
59+
/**
60+
* Cleans up temporary shard files left behind by a failed indexing operation.
61+
* Zoekt creates temporary files (with `.tmp` suffix) during indexing, which
62+
* can be left behind if the indexing process fails or is interrupted.
63+
*
64+
* @param repo - The repository whose temp shards should be cleaned up
65+
*/
66+
export const cleanupTempShards = async (repo: Repo) => {
67+
const shardPrefix = getShardPrefix(repo.orgId, repo.id);
68+
69+
try {
70+
const files = await readdir(INDEX_CACHE_DIR);
71+
const tempFiles = files.filter(file =>
72+
file.startsWith(shardPrefix) && file.includes('.tmp')
73+
);
74+
75+
for (const file of tempFiles) {
76+
const filePath = `${INDEX_CACHE_DIR}/${file}`;
77+
logger.info(`Cleaning up temp shard file: ${filePath}`);
78+
await rm(filePath, { force: true });
79+
}
80+
81+
if (tempFiles.length > 0) {
82+
logger.info(`Cleaned up ${tempFiles.length} temp shard file(s) for repo ${repo.id}`);
83+
}
84+
} catch (error) {
85+
// Log but don't throw - cleanup is best effort
86+
logger.warn(`Failed to cleanup temp shards for repo ${repo.id}:`, error);
87+
}
88+
}

0 commit comments

Comments
 (0)