|
| 1 | +import { promises as fs } from 'fs'; |
| 2 | +import path from 'path'; |
| 3 | +import { z } from 'zod'; |
| 4 | + |
| 5 | +import { |
| 6 | + CODEBASE_CONTEXT_DIRNAME, |
| 7 | + INDEX_FORMAT_VERSION, |
| 8 | + INDEX_META_FILENAME, |
| 9 | + INDEX_META_VERSION, |
| 10 | + INTELLIGENCE_FILENAME, |
| 11 | + KEYWORD_INDEX_FILENAME, |
| 12 | + VECTOR_DB_DIRNAME |
| 13 | +} from '../constants/codebase-context.js'; |
| 14 | +import { IndexCorruptedError } from '../errors/index.js'; |
| 15 | + |
| 16 | +const ArtifactHeaderSchema = z.object({ |
| 17 | + buildId: z.string().min(1), |
| 18 | + formatVersion: z.number().int().nonnegative() |
| 19 | +}); |
| 20 | + |
| 21 | +const KeywordIndexFileSchema = z.object({ |
| 22 | + header: ArtifactHeaderSchema, |
| 23 | + chunks: z.array(z.unknown()) |
| 24 | +}); |
| 25 | + |
| 26 | +const VectorDbBuildSchema = z.object({ |
| 27 | + buildId: z.string().min(1), |
| 28 | + formatVersion: z.number().int().nonnegative() |
| 29 | +}); |
| 30 | + |
| 31 | +const IntelligenceFileSchema = z |
| 32 | + .object({ |
| 33 | + header: ArtifactHeaderSchema |
| 34 | + }) |
| 35 | + .passthrough(); |
| 36 | + |
| 37 | +export const IndexMetaSchema = z.object({ |
| 38 | + metaVersion: z.number().int().positive(), |
| 39 | + formatVersion: z.number().int().nonnegative(), |
| 40 | + buildId: z.string().min(1), |
| 41 | + generatedAt: z.string().datetime(), |
| 42 | + toolVersion: z.string().min(1), |
| 43 | + artifacts: z |
| 44 | + .object({ |
| 45 | + keywordIndex: z.object({ |
| 46 | + path: z.string().min(1) |
| 47 | + }), |
| 48 | + vectorDb: z.object({ |
| 49 | + path: z.string().min(1), |
| 50 | + provider: z.string().min(1) |
| 51 | + }), |
| 52 | + intelligence: z |
| 53 | + .object({ |
| 54 | + path: z.string().min(1) |
| 55 | + }) |
| 56 | + .optional() |
| 57 | + }) |
| 58 | + .passthrough() |
| 59 | +}); |
| 60 | + |
| 61 | +export type IndexMeta = z.infer<typeof IndexMetaSchema>; |
| 62 | + |
| 63 | +async function pathExists(targetPath: string): Promise<boolean> { |
| 64 | + try { |
| 65 | + await fs.access(targetPath); |
| 66 | + return true; |
| 67 | + } catch { |
| 68 | + return false; |
| 69 | + } |
| 70 | +} |
| 71 | + |
| 72 | +async function requireFile(targetPath: string, label: string): Promise<void> { |
| 73 | + if (!(await pathExists(targetPath))) { |
| 74 | + throw new IndexCorruptedError(`${label} missing: ${targetPath}`); |
| 75 | + } |
| 76 | +} |
| 77 | + |
| 78 | +async function requireDirectory(targetPath: string, label: string): Promise<void> { |
| 79 | + try { |
| 80 | + const stat = await fs.stat(targetPath); |
| 81 | + if (!stat.isDirectory()) { |
| 82 | + throw new IndexCorruptedError(`${label} is not a directory: ${targetPath}`); |
| 83 | + } |
| 84 | + } catch (error) { |
| 85 | + if (error instanceof IndexCorruptedError) throw error; |
| 86 | + throw new IndexCorruptedError(`${label} missing: ${targetPath}`); |
| 87 | + } |
| 88 | +} |
| 89 | + |
| 90 | +function asIndexCorrupted(message: string, error: unknown): IndexCorruptedError { |
| 91 | + const suffix = error instanceof Error ? error.message : String(error); |
| 92 | + return new IndexCorruptedError(`${message}: ${suffix}`); |
| 93 | +} |
| 94 | + |
| 95 | +export async function readIndexMeta(rootDir: string): Promise<IndexMeta> { |
| 96 | + const metaPath = path.join(rootDir, CODEBASE_CONTEXT_DIRNAME, INDEX_META_FILENAME); |
| 97 | + |
| 98 | + let parsed: unknown; |
| 99 | + try { |
| 100 | + const raw = await fs.readFile(metaPath, 'utf-8'); |
| 101 | + parsed = JSON.parse(raw); |
| 102 | + } catch (error) { |
| 103 | + throw asIndexCorrupted('Index meta missing or unreadable (rebuild required)', error); |
| 104 | + } |
| 105 | + |
| 106 | + const result = IndexMetaSchema.safeParse(parsed); |
| 107 | + if (!result.success) { |
| 108 | + throw new IndexCorruptedError( |
| 109 | + `Index meta schema mismatch (rebuild required): ${result.error.message}` |
| 110 | + ); |
| 111 | + } |
| 112 | + |
| 113 | + const meta = result.data; |
| 114 | + |
| 115 | + if (meta.metaVersion !== INDEX_META_VERSION) { |
| 116 | + throw new IndexCorruptedError( |
| 117 | + `Index meta version mismatch (rebuild required): expected metaVersion=${INDEX_META_VERSION}, found metaVersion=${meta.metaVersion}` |
| 118 | + ); |
| 119 | + } |
| 120 | + |
| 121 | + if (meta.formatVersion !== INDEX_FORMAT_VERSION) { |
| 122 | + throw new IndexCorruptedError( |
| 123 | + `Index format version mismatch (rebuild required): expected formatVersion=${INDEX_FORMAT_VERSION}, found formatVersion=${meta.formatVersion}` |
| 124 | + ); |
| 125 | + } |
| 126 | + |
| 127 | + return meta; |
| 128 | +} |
| 129 | + |
| 130 | +export async function validateIndexArtifacts(rootDir: string, meta: IndexMeta): Promise<void> { |
| 131 | + const contextDir = path.join(rootDir, CODEBASE_CONTEXT_DIRNAME); |
| 132 | + |
| 133 | + const keywordPath = path.join(contextDir, KEYWORD_INDEX_FILENAME); |
| 134 | + const vectorDir = path.join(contextDir, VECTOR_DB_DIRNAME); |
| 135 | + const vectorBuildPath = path.join(vectorDir, 'index-build.json'); |
| 136 | + |
| 137 | + await requireFile(keywordPath, 'Keyword index'); |
| 138 | + await requireDirectory(vectorDir, 'Vector DB directory'); |
| 139 | + await requireFile(vectorBuildPath, 'Vector DB build marker'); |
| 140 | + |
| 141 | + // Keyword index header (required) |
| 142 | + try { |
| 143 | + const raw = await fs.readFile(keywordPath, 'utf-8'); |
| 144 | + const json = JSON.parse(raw); |
| 145 | + const parsed = KeywordIndexFileSchema.safeParse(json); |
| 146 | + if (!parsed.success) { |
| 147 | + throw new IndexCorruptedError( |
| 148 | + `Keyword index schema mismatch (rebuild required): ${parsed.error.message}` |
| 149 | + ); |
| 150 | + } |
| 151 | + |
| 152 | + const { buildId, formatVersion } = parsed.data.header; |
| 153 | + if (formatVersion !== meta.formatVersion) { |
| 154 | + throw new IndexCorruptedError( |
| 155 | + `Keyword index formatVersion mismatch (rebuild required): meta=${meta.formatVersion}, index.json=${formatVersion}` |
| 156 | + ); |
| 157 | + } |
| 158 | + if (buildId !== meta.buildId) { |
| 159 | + throw new IndexCorruptedError( |
| 160 | + `Keyword index buildId mismatch (rebuild required): meta=${meta.buildId}, index.json=${buildId}` |
| 161 | + ); |
| 162 | + } |
| 163 | + } catch (error) { |
| 164 | + if (error instanceof IndexCorruptedError) throw error; |
| 165 | + throw asIndexCorrupted('Keyword index corrupted (rebuild required)', error); |
| 166 | + } |
| 167 | + |
| 168 | + // Vector DB build marker (required) |
| 169 | + try { |
| 170 | + const raw = await fs.readFile(vectorBuildPath, 'utf-8'); |
| 171 | + const json = JSON.parse(raw); |
| 172 | + const parsed = VectorDbBuildSchema.safeParse(json); |
| 173 | + if (!parsed.success) { |
| 174 | + throw new IndexCorruptedError( |
| 175 | + `Vector DB build marker schema mismatch (rebuild required): ${parsed.error.message}` |
| 176 | + ); |
| 177 | + } |
| 178 | + |
| 179 | + const { buildId, formatVersion } = parsed.data; |
| 180 | + if (formatVersion !== meta.formatVersion) { |
| 181 | + throw new IndexCorruptedError( |
| 182 | + `Vector DB formatVersion mismatch (rebuild required): meta=${meta.formatVersion}, index-build.json=${formatVersion}` |
| 183 | + ); |
| 184 | + } |
| 185 | + if (buildId !== meta.buildId) { |
| 186 | + throw new IndexCorruptedError( |
| 187 | + `Vector DB buildId mismatch (rebuild required): meta=${meta.buildId}, index-build.json=${buildId}` |
| 188 | + ); |
| 189 | + } |
| 190 | + } catch (error) { |
| 191 | + if (error instanceof IndexCorruptedError) throw error; |
| 192 | + throw asIndexCorrupted('Vector DB build marker corrupted (rebuild required)', error); |
| 193 | + } |
| 194 | + |
| 195 | + // Optional intelligence artifact: validate if present, but do not require. |
| 196 | + const intelligencePath = path.join(contextDir, INTELLIGENCE_FILENAME); |
| 197 | + if (await pathExists(intelligencePath)) { |
| 198 | + try { |
| 199 | + const raw = await fs.readFile(intelligencePath, 'utf-8'); |
| 200 | + const json = JSON.parse(raw); |
| 201 | + const parsed = IntelligenceFileSchema.safeParse(json); |
| 202 | + if (!parsed.success) { |
| 203 | + throw new IndexCorruptedError( |
| 204 | + `Intelligence schema mismatch (rebuild required): ${parsed.error.message}` |
| 205 | + ); |
| 206 | + } |
| 207 | + |
| 208 | + const { buildId, formatVersion } = parsed.data.header; |
| 209 | + if (formatVersion !== meta.formatVersion) { |
| 210 | + throw new IndexCorruptedError( |
| 211 | + `Intelligence formatVersion mismatch (rebuild required): meta=${meta.formatVersion}, intelligence.json=${formatVersion}` |
| 212 | + ); |
| 213 | + } |
| 214 | + if (buildId !== meta.buildId) { |
| 215 | + throw new IndexCorruptedError( |
| 216 | + `Intelligence buildId mismatch (rebuild required): meta=${meta.buildId}, intelligence.json=${buildId}` |
| 217 | + ); |
| 218 | + } |
| 219 | + } catch (error) { |
| 220 | + if (error instanceof IndexCorruptedError) throw error; |
| 221 | + throw asIndexCorrupted('Intelligence corrupted (rebuild required)', error); |
| 222 | + } |
| 223 | + } |
| 224 | +} |
0 commit comments