Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions packages/backend/src/repoIndexManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import { captureEvent } from './posthog.js';
import { PromClient } from './promClient.js';
import { RepoWithConnections, Settings } from "./types.js";
import { getAuthCredentialsForRepo, getShardPrefix, groupmqLifecycleExceptionWrapper, measure, setIntervalAsync } from './utils.js';
import { indexGitRepository } from './zoekt.js';
import { cleanupTempShards, indexGitRepository } from './zoekt.js';

const LOG_TAG = 'repo-index-manager';
const logger = createLogger(LOG_TAG);
Expand Down Expand Up @@ -438,9 +438,17 @@ export class RepoIndexManager {
}

logger.info(`Indexing ${repo.name} (id: ${repo.id})...`);
const { durationMs } = await measure(() => indexGitRepository(repo, this.settings, revisions, signal));
const indexDuration_s = durationMs / 1000;
logger.info(`Indexed ${repo.name} (id: ${repo.id}) in ${indexDuration_s}s`);
try {
const { durationMs } = await measure(() => indexGitRepository(repo, this.settings, revisions, signal));
const indexDuration_s = durationMs / 1000;
logger.info(`Indexed ${repo.name} (id: ${repo.id}) in ${indexDuration_s}s`);
Comment thread
brendan-kellam marked this conversation as resolved.
} catch (error) {
// Clean up any temporary shard files left behind by the failed indexing operation.
// Zoekt creates .tmp files during indexing which can accumulate if indexing fails repeatedly.
logger.warn(`Indexing failed for ${repo.name} (id: ${repo.id}), cleaning up temp shard files...`);
await cleanupTempShards(repo);
throw error;
}

return revisions;
}
Expand Down
32 changes: 32 additions & 0 deletions packages/backend/src/zoekt.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { Repo } from "@sourcebot/db";
import { createLogger, env, getRepoPath } from "@sourcebot/shared";
import { exec } from "child_process";
import { readdir, rm } from "fs/promises";
import { INDEX_CACHE_DIR } from "./constants.js";
import { Settings } from "./types.js";
import { getShardPrefix } from "./utils.js";
Expand Down Expand Up @@ -54,3 +55,34 @@ export const indexGitRepository = async (repo: Repo, settings: Settings, revisio
})
});
}

/**
* Cleans up temporary shard files left behind by a failed indexing operation.
* Zoekt creates temporary files (with `.tmp` suffix) during indexing, which
* can be left behind if the indexing process fails or is interrupted.
*
* @param repo - The repository whose temp shards should be cleaned up
*/
export const cleanupTempShards = async (repo: Repo) => {
const shardPrefix = getShardPrefix(repo.orgId, repo.id);

try {
const files = await readdir(INDEX_CACHE_DIR);
const tempFiles = files.filter(file =>
file.startsWith(shardPrefix) && file.includes('.tmp')
);

for (const file of tempFiles) {
const filePath = `${INDEX_CACHE_DIR}/${file}`;
logger.info(`Cleaning up temp shard file: ${filePath}`);
await rm(filePath, { force: true });
}

if (tempFiles.length > 0) {
logger.info(`Cleaned up ${tempFiles.length} temp shard file(s) for repo ${repo.id}`);
}
} catch (error) {
// Log but don't throw - cleanup is best effort
logger.warn(`Failed to cleanup temp shards for repo ${repo.id}:`, error);
}
}