@@ -37,6 +37,7 @@ vi.mock('@sourcebot/shared', () => ({
3737vi . mock ( './constants.js' , ( ) => ( {
3838 WORKER_STOP_GRACEFUL_TIMEOUT_MS : 5000 ,
3939 INDEX_CACHE_DIR : 'test-data/index' ,
40+ REPOS_CACHE_DIR : 'test-data/repos' ,
4041} ) ) ;
4142
4243vi . mock ( './git.js' , ( ) => ( {
@@ -65,6 +66,13 @@ vi.mock('./posthog.js', () => ({
6566vi . mock ( './utils.js' , ( ) => ( {
6667 getAuthCredentialsForRepo : vi . fn ( ) . mockResolvedValue ( null ) ,
6768 getShardPrefix : vi . fn ( ( orgId : number , repoId : number ) => `${ orgId } _${ repoId } ` ) ,
69+ getRepoIdFromShardFileName : vi . fn ( ( fileName : string ) => {
70+ const match = fileName . match ( / ^ ( \d + ) _ ( \d + ) _ / ) ;
71+ if ( ! match ) {
72+ return undefined ;
73+ }
74+ return parseInt ( match [ 2 ] , 10 ) ;
75+ } ) ,
6876 measure : vi . fn ( async ( cb : ( ) => Promise < unknown > ) => {
6977 const data = await cb ( ) ;
7078 return { data, durationMs : 100 } ;
@@ -148,6 +156,7 @@ const createMockPrisma = () => {
148156 repo : {
149157 findMany : vi . fn ( ) . mockResolvedValue ( [ ] ) ,
150158 update : vi . fn ( ) ,
159+ updateMany : vi . fn ( ) ,
151160 delete : vi . fn ( ) ,
152161 } ,
153162 repoIndexingJob : {
@@ -783,6 +792,128 @@ describe('RepoIndexManager', () => {
783792 } ) ;
784793 } ) ;
785794
795+ describe ( 'Missing Shard Reconciliation' , ( ) => {
796+ const indexedRepo = ( id : number , name : string ) => createMockRepo ( {
797+ id,
798+ name,
799+ indexedAt : new Date ( ) ,
800+ indexedCommitHash : 'abc123' ,
801+ } ) ;
802+
803+ test ( 'clears indexedAt for indexed repos whose shard files are missing on startup' , async ( ) => {
804+ ( existsSync as Mock ) . mockImplementation ( ( path : string ) => path === 'test-data/index' ) ;
805+ // Repo 1 has a shard on disk; repo 2 does not.
806+ ( readdir as Mock ) . mockResolvedValue ( [ '1_1_v16.00000.zoekt' ] ) ;
807+ ( mockPrisma . repo . findMany as Mock ) . mockResolvedValue ( [
808+ indexedRepo ( 1 , 'repo-with-shard' ) ,
809+ indexedRepo ( 2 , 'repo-missing-shard' ) ,
810+ ] ) ;
811+
812+ manager = new RepoIndexManager ( mockPrisma , mockSettings , mockRedis , mockPromClient as any ) ;
813+ await manager . startScheduler ( ) ;
814+
815+ expect ( mockPrisma . repo . updateMany ) . toHaveBeenCalledWith ( {
816+ where : { id : { in : [ 2 ] } } ,
817+ data : { indexedAt : null } ,
818+ } ) ;
819+ } ) ;
820+
821+ test ( 'does not touch repos when all shards are present' , async ( ) => {
822+ ( existsSync as Mock ) . mockImplementation ( ( path : string ) => path === 'test-data/index' ) ;
823+ ( readdir as Mock ) . mockResolvedValue ( [ '1_1_v16.00000.zoekt' , '1_2_v16.00000.zoekt' ] ) ;
824+ ( mockPrisma . repo . findMany as Mock ) . mockResolvedValue ( [
825+ indexedRepo ( 1 , 'repo-1' ) ,
826+ indexedRepo ( 2 , 'repo-2' ) ,
827+ ] ) ;
828+
829+ manager = new RepoIndexManager ( mockPrisma , mockSettings , mockRedis , mockPromClient as any ) ;
830+ await manager . startScheduler ( ) ;
831+
832+ expect ( mockPrisma . repo . updateMany ) . not . toHaveBeenCalled ( ) ;
833+ } ) ;
834+
835+ test ( 'marks all indexed repos as stale when the index directory is missing' , async ( ) => {
836+ ( existsSync as Mock ) . mockReturnValue ( false ) ;
837+ ( mockPrisma . repo . findMany as Mock ) . mockResolvedValue ( [
838+ indexedRepo ( 1 , 'repo-1' ) ,
839+ indexedRepo ( 2 , 'repo-2' ) ,
840+ ] ) ;
841+
842+ manager = new RepoIndexManager ( mockPrisma , mockSettings , mockRedis , mockPromClient as any ) ;
843+ await manager . startScheduler ( ) ;
844+
845+ expect ( mockPrisma . repo . updateMany ) . toHaveBeenCalledWith ( {
846+ where : { id : { in : [ 1 , 2 ] } } ,
847+ data : { indexedAt : null } ,
848+ } ) ;
849+ } ) ;
850+
851+ test ( 'does not count temporary shard files as valid shards' , async ( ) => {
852+ ( existsSync as Mock ) . mockImplementation ( ( path : string ) => path === 'test-data/index' ) ;
853+ ( readdir as Mock ) . mockResolvedValue ( [ '1_2_v16.00000.zoekt123.tmp' ] ) ;
854+ ( mockPrisma . repo . findMany as Mock ) . mockResolvedValue ( [
855+ indexedRepo ( 2 , 'repo-with-only-tmp-shard' ) ,
856+ ] ) ;
857+
858+ manager = new RepoIndexManager ( mockPrisma , mockSettings , mockRedis , mockPromClient as any ) ;
859+ await manager . startScheduler ( ) ;
860+
861+ expect ( mockPrisma . repo . updateMany ) . toHaveBeenCalledWith ( {
862+ where : { id : { in : [ 2 ] } } ,
863+ data : { indexedAt : null } ,
864+ } ) ;
865+ } ) ;
866+
867+ test ( 'only considers repos that are indexed, non-empty, and connected' , async ( ) => {
868+ ( existsSync as Mock ) . mockImplementation ( ( path : string ) => path === 'test-data/index' ) ;
869+ ( readdir as Mock ) . mockResolvedValue ( [ ] ) ;
870+ ( mockPrisma . repo . findMany as Mock ) . mockResolvedValue ( [ ] ) ;
871+
872+ manager = new RepoIndexManager ( mockPrisma , mockSettings , mockRedis , mockPromClient as any ) ;
873+ await manager . startScheduler ( ) ;
874+
875+ // The reconciliation query must exclude unindexed repos (nothing to mark),
876+ // empty repos (indexing completes without producing a shard), and
877+ // unconnected repos (clearing indexedAt would bypass the GC grace period).
878+ expect ( mockPrisma . repo . findMany ) . toHaveBeenCalledWith (
879+ expect . objectContaining ( {
880+ where : expect . objectContaining ( {
881+ indexedAt : { not : null } ,
882+ indexedCommitHash : { not : null } ,
883+ connections : { some : { } } ,
884+ } ) ,
885+ } )
886+ ) ;
887+
888+ expect ( mockPrisma . repo . updateMany ) . not . toHaveBeenCalled ( ) ;
889+ } ) ;
890+
891+ test ( 'reconciles on every scheduler poll, not just startup' , async ( ) => {
892+ ( existsSync as Mock ) . mockImplementation ( ( path : string ) => path === 'test-data/index' ) ;
893+ ( readdir as Mock ) . mockResolvedValue ( [ '1_1_v16.00000.zoekt' ] ) ;
894+ ( mockPrisma . repo . findMany as Mock ) . mockResolvedValue ( [ ] ) ;
895+
896+ manager = new RepoIndexManager ( mockPrisma , mockSettings , mockRedis , mockPromClient as any ) ;
897+ await manager . startScheduler ( ) ;
898+
899+ // Simulate the index directory being wiped while the worker is running,
900+ // with repo 1 still marked as indexed in the DB.
901+ ( readdir as Mock ) . mockResolvedValue ( [ ] ) ;
902+ ( mockPrisma . repo . findMany as Mock ) . mockResolvedValue ( [
903+ indexedRepo ( 1 , 'repo-1' ) ,
904+ ] ) ;
905+
906+ const { setIntervalAsync } = await import ( './utils.js' ) ;
907+ const tick = ( setIntervalAsync as Mock ) . mock . calls [ 0 ] [ 0 ] ;
908+ await tick ( ) ;
909+
910+ expect ( mockPrisma . repo . updateMany ) . toHaveBeenCalledWith ( {
911+ where : { id : { in : [ 1 ] } } ,
912+ data : { indexedAt : null } ,
913+ } ) ;
914+ } ) ;
915+ } ) ;
916+
786917 describe ( 'latestIndexingJobStatus Updates' , ( ) => {
787918 test ( 'sets latestIndexingJobStatus to IN_PROGRESS when job starts' , async ( ) => {
788919 const repo = createMockRepoWithConnections ( ) ;
0 commit comments