Skip to content

Commit 40d5680

Browse files
authored
perf(semantic-search): index from manifest projection (#64)
## Summary Carries the tool manifest projection into semantic-search scan jobs by retaining each manifest source revision alongside the fingerprint. This makes index work traceable to the source catalog version that produced it without adding schema reads to scan. ## Changes - Add optional `sourceRevision` to semantic-search index jobs. - Copy `ToolSchemaManifest.sourceRevision` into both skipped and changed scan jobs. - Add regression coverage that manifest source revisions survive scan materialization. ## Call Stack ```text ToolSearchIndex.create() -> executor.tools.manifest() -> write manifest snapshot ToolSearchIndex.scan() -> read manifest projection -> compare manifest.indexFingerprint with stored fingerprint -> write IndexJob(sourceRevision, fingerprint, path, status) ``` ## Tests - `bun run --cwd packages/plugins/semantic-search test -- src/sdk/tool-search-index.test.ts` - `bun run --cwd packages/plugins/semantic-search typecheck` - `bunx oxfmt --check packages/plugins/semantic-search/src/sdk/collections.ts packages/plugins/semantic-search/src/sdk/tool-search-index.ts packages/plugins/semantic-search/src/sdk/tool-search-index.test.ts` - `bunx oxlint -c .oxlintrc.jsonc --deny-warnings packages/plugins/semantic-search/src/sdk/collections.ts packages/plugins/semantic-search/src/sdk/tool-search-index.ts packages/plugins/semantic-search/src/sdk/tool-search-index.test.ts` ## Stack Base: #63 <!-- stack:links:start --> ### [Stack](https://github.com/aryasaatvik/stack) 1. #63 2. **#64** 👈 current 3. #65 <!-- stack:links:end -->
1 parent 25ee33d commit 40d5680

3 files changed

Lines changed: 88 additions & 0 deletions

File tree

packages/plugins/semantic-search/src/sdk/collections.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ export const IndexJob = Schema.Struct({
7070
name: Schema.String,
7171
integration: Schema.String,
7272
description: Schema.String,
73+
sourceRevision: Schema.optional(Schema.String),
7374
status: IndexJobStatus,
7475
fingerprint: Schema.optional(Schema.String),
7576
oldFingerprint: Schema.optional(Schema.String),

packages/plugins/semantic-search/src/sdk/tool-search-index.test.ts

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ const makeMemoryCache = (): Executor["cache"] => {
6767
const manifestForTool = (
6868
tool: Tool,
6969
fingerprint = `fingerprint:${String(tool.address)}`,
70+
sourceRevision?: string,
7071
): ToolSchemaManifest => ({
7172
address: tool.address,
7273
path: String(tool.address).replace(/^tools\./, ""),
@@ -82,6 +83,7 @@ const manifestForTool = (
8283
definitionSetHash: `definitions:${String(tool.address)}`,
8384
indexFingerprint: fingerprint,
8485
fingerprintVersion: "tool-schema-manifest/v1",
86+
...(sourceRevision === undefined ? {} : { sourceRevision }),
8587
});
8688

8789
const makeExecutor = (
@@ -327,6 +329,7 @@ describe("ToolSearchIndex", () => {
327329
}
328330

329331
expect(scanned).toMatchObject({ processed: 1, changed: 1, skipped: 0 });
332+
expect([...jobs.data.values()][0]?.sourceRevision).toBeUndefined();
330333
expect(chunked.processed).toBe(1);
331334
expect(chunked.chunks).toBeGreaterThan(0);
332335
expect(embedded).toMatchObject({ processed: chunked.chunks, chunks: chunked.chunks });
@@ -350,6 +353,84 @@ describe("ToolSearchIndex", () => {
350353
}),
351354
);
352355

356+
it.effect("copies manifest source revisions into scan jobs", () =>
357+
Effect.gen(function* () {
358+
const tool: Tool = {
359+
address: "tools.github.repos.get" as never,
360+
name: "repos.get" as never,
361+
integration: "github" as never,
362+
description: "Get a repository",
363+
owner,
364+
connection: "default" as never,
365+
pluginId: "test",
366+
};
367+
const executor: Pick<Executor, "tools" | "cache"> = {
368+
tools: {
369+
list: () => Effect.succeed([tool]),
370+
manifest: () => Effect.succeed([manifestForTool(tool, "fp-source", "spec-hash-v1")]),
371+
schema: () => Effect.succeed(null),
372+
},
373+
cache: makeMemoryCache(),
374+
};
375+
const base = {
376+
namespace,
377+
executor: executor as Executor,
378+
runs: makeCollection<IndexRun>(indexRuns.name),
379+
jobs: makeCollection<IndexJob>(indexJobs.name),
380+
chunks: makeCollection<IndexChunk>(indexChunks.name),
381+
fingerprints: makeCollection<FingerprintRow>(toolFingerprints.name),
382+
blobs: makeBlobs(),
383+
owner,
384+
};
385+
386+
yield* create({ ...base, runId: "run-source-revision", partitionCount: 1 });
387+
const scanned = yield* scan({
388+
...base,
389+
runId: "run-source-revision",
390+
partition: 0,
391+
limit: 10,
392+
});
393+
394+
expect(scanned).toMatchObject({ processed: 1, changed: 1, skipped: 0 });
395+
expect([...base.jobs.data.values()][0]).toMatchObject({
396+
path: "github.repos.get",
397+
fingerprint: "fp-source",
398+
sourceRevision: "spec-hash-v1",
399+
});
400+
401+
const skippedBase = {
402+
...base,
403+
jobs: makeCollection<IndexJob>(indexJobs.name),
404+
fingerprints: makeCollection<FingerprintRow>(toolFingerprints.name),
405+
};
406+
yield* skippedBase.fingerprints.put({
407+
owner,
408+
key: "github.repos.get",
409+
data: {
410+
path: "github.repos.get",
411+
integration: "github",
412+
fingerprint: "fp-source",
413+
chunkIds: [],
414+
},
415+
});
416+
417+
yield* create({ ...skippedBase, runId: "run-source-revision-skipped", partitionCount: 1 });
418+
const skipped = yield* scan({
419+
...skippedBase,
420+
runId: "run-source-revision-skipped",
421+
partition: 0,
422+
limit: 10,
423+
});
424+
425+
expect(skipped).toMatchObject({ processed: 1, changed: 0, skipped: 1 });
426+
expect([...skippedBase.jobs.data.values()][0]).toMatchObject({
427+
path: "github.repos.get",
428+
fingerprint: "fp-source",
429+
sourceRevision: "spec-hash-v1",
430+
});
431+
}),
432+
);
433+
353434
it.effect("marks pending path work failed and reports progress timestamps", () =>
354435
Effect.gen(function* () {
355436
const counters = { raw: 0, codegen: 0 };

packages/plugins/semantic-search/src/sdk/tool-search-index.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1092,6 +1092,9 @@ export const scan = (
10921092
name: manifest.name,
10931093
integration: manifest.integration,
10941094
description: manifest.description,
1095+
...(manifest.sourceRevision === undefined
1096+
? {}
1097+
: { sourceRevision: manifest.sourceRevision }),
10951098
status: "skipped",
10961099
fingerprint: manifest.indexFingerprint,
10971100
oldFingerprint: storedRow.fingerprint,
@@ -1110,6 +1113,9 @@ export const scan = (
11101113
name: manifest.name,
11111114
integration: manifest.integration,
11121115
description: manifest.description,
1116+
...(manifest.sourceRevision === undefined
1117+
? {}
1118+
: { sourceRevision: manifest.sourceRevision }),
11131119
status: "pendingChunk",
11141120
fingerprint: manifest.indexFingerprint,
11151121
oldFingerprint: storedRow?.fingerprint,

0 commit comments

Comments
 (0)