@@ -100,6 +100,7 @@ export class RepoIndexManager {
100100 logger . debug ( 'Starting scheduler' ) ;
101101 // Cleanup any orphaned disk resources on startup
102102 await this . cleanupOrphanedDiskResources ( ) ;
103+ await this . reconcileMissingShards ( ) ;
103104 this . interval = setIntervalAsync ( async ( ) => {
104105 await this . scheduleIndexJobs ( ) ;
105106 await this . scheduleCleanupJobs ( ) ;
@@ -703,6 +704,8 @@ export class RepoIndexManager {
703704 }
704705 }
705706
707+
708+
706709 // --- Index shards ---
707710 // Shard files are prefixed with <orgId>_<repoId>: DATA_CACHE_DIR/index/<orgId>_<repoId>_*.zoekt
708711 if ( existsSync ( INDEX_CACHE_DIR ) ) {
@@ -736,6 +739,66 @@ export class RepoIndexManager {
736739 }
737740 }
738741
742+ // Scans the DB for repos marked as indexed but missing their shard files
743+ // on disk. This handles the case where the index directory is wiped (e.g.
744+ // ephemeral storage on a pod restart) but the DB still has stale indexedAt
745+ // timestamps, causing the scheduler to skip re-indexing them.
746+ private async reconcileMissingShards ( ) {
747+ if ( ! existsSync ( INDEX_CACHE_DIR ) ) {
748+ logger . debug ( 'Index cache directory does not exist, skipping shard reconciliation.' ) ;
749+ return ;
750+ }
751+
752+ // Read what shard files actually exist on disk and build a set
753+ // of repoIds that have at least one shard present.
754+ // Uses getRepoIdFromShardFileName to match the same naming convention
755+ // as cleanupOrphanedDiskResources (shards are named <orgId>_<repoId>_*.zoekt)
756+ const entries = await readdir ( INDEX_CACHE_DIR ) ;
757+ const repoIdsOnDisk = new Set < number > ( ) ;
758+ for ( const entry of entries ) {
759+ const repoId = getRepoIdFromShardFileName ( entry ) ;
760+ if ( repoId !== undefined ) {
761+ repoIdsOnDisk . add ( repoId ) ;
762+ }
763+ }
764+
765+ // Find all repos the DB believes are already indexed
766+ const indexedRepos = await this . db . repo . findMany ( {
767+ where : {
768+ indexedAt : {
769+ not : null ,
770+ } ,
771+ } ,
772+ select : {
773+ id : true ,
774+ name : true ,
775+ } ,
776+ } ) ;
777+
778+ if ( indexedRepos . length === 0 ) {
779+ return ;
780+ }
781+
782+ let resetCount = 0 ;
783+
784+ for ( const repo of indexedRepos ) {
785+ if ( ! repoIdsOnDisk . has ( repo . id ) ) {
786+ logger . warn ( `Repo "${ repo . name } " (id: ${ repo . id } ) is marked as indexed in the DB but has no shard file on disk. Marking as stale.` ) ;
787+ await this . db . repo . update ( {
788+ where : { id : repo . id } ,
789+ data : { indexedAt : null } ,
790+ } ) ;
791+ resetCount ++ ;
792+ }
793+ }
794+
795+ if ( resetCount > 0 ) {
796+ logger . info ( `Shard reconciliation complete. Reset ${ resetCount } repo(s) to stale — they will be re-indexed shortly.` ) ;
797+ } else {
798+ logger . debug ( 'Shard reconciliation complete. All indexed repos have shard files on disk.' ) ;
799+ }
800+ }
801+
739802 public async dispose ( ) {
740803 if ( this . interval ) {
741804 clearInterval ( this . interval ) ;
0 commit comments