-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlib.js
More file actions
158 lines (145 loc) · 5.29 KB
/
lib.js
File metadata and controls
158 lines (145 loc) · 5.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
// Pure functions extracted for testability
export const DIMS = 384;
export const MIN_SCORE = 0.25;
export const JITTER = 0.05;
export const BASE_PORT = 31337;
/** FNV-1a hash with ROR by length, truncated to 12 bits. Deterministic port per username. */
export function userPort(username) {
const str = username.toLowerCase();
let h = 0x811c9dc5;
for (let i = 0; i < str.length; i++) {
h ^= str.charCodeAt(i);
h = Math.imul(h, 0x01000193);
}
h = h >>> 0;
const rot = str.length % 32;
h = ((h >>> rot) | (h << (32 - rot))) >>> 0;
return BASE_PORT + (h & 0xFFF);
}
/** Filter out already-indexed items by comparing composite keys */
export function filterUnindexed(allContent, existingIndex) {
const indexed = new Set();
for (const row of existingIndex) {
indexed.add(`${row.session_id}|${row.source_type}|${row.source_id ?? ""}`);
}
return allContent.filter(
(row) => !indexed.has(`${row.session_id}|${row.source_type}|${row.source_id ?? ""}`)
);
}
/** Deduplicate results by content */
export function dedup(results) {
return results.filter(
(r, i, arr) => arr.findIndex((x) => x.content === r.content) === i
);
}
/** Apply score floor, jitter, sort, and trim to search results */
export function postProcessResults(results, limit) {
const filtered = results.filter((r) => (1 - r.distance) >= MIN_SCORE);
const jittered = filtered.map((r) => ({
...r,
jitteredDistance: r.distance + (Math.random() * 2 - 1) * JITTER,
}));
jittered.sort((a, b) => a.jitteredDistance - b.jitteredDistance);
return jittered.slice(0, limit).map((r) => ({
score: (1 - r.distance).toFixed(4),
session_id: r.session_id,
source_type: r.source_type,
snippet: r.content.length > 500 ? r.content.slice(0, 500) + "..." : r.content,
}));
}
/** Check if a process info object looks like our server */
export function isOurServer(info) {
if (!info?.CommandLine) return false;
return info.Name === "node.exe" && info.CommandLine.includes("vector-memory-server.js");
}
/** Check if content is too short to index */
export function isIndexable(item) {
return item.content && item.content.trim().length >= 10;
}
/**
* Create an HTTP request handler with injected dependencies.
* @param {object} deps
* @param {() => object} deps.openVectorDb
* @param {() => object|null} deps.openSessionStore
* @param {(vecDb, sessionDb) => Array} deps.getUnindexedContent
* @param {(vecDb, items) => Promise<number>} deps.indexContent
* @param {(vecDb, query, limit) => Promise<Array>} deps.search
* @param {(db) => void} deps.runMaintenance
* @param {() => boolean} deps.getIsIndexing
* @param {(v: boolean) => void} deps.setIsIndexing
*/
export function createHandler(deps) {
return async function handleRequest(req, res) {
if (req.method !== "POST") {
res.writeHead(404);
res.end();
return;
}
const chunks = [];
for await (const chunk of req) chunks.push(chunk);
const body = JSON.parse(Buffer.concat(chunks).toString());
try {
let result;
if (req.url === "/search") {
const vecDb = deps.openVectorDb();
try {
if (!deps.getIsIndexing()) {
deps.setIsIndexing(true);
try {
const sessionDb = deps.openSessionStore();
if (sessionDb) {
const unindexed = deps.getUnindexedContent(vecDb, sessionDb);
sessionDb.close();
if (unindexed.length > 0) await deps.indexContent(vecDb, unindexed);
}
} finally {
deps.setIsIndexing(false);
}
}
result = await deps.search(vecDb, body.query, body.limit || 10);
} finally {
vecDb.close();
}
} else if (req.url === "/reindex") {
if (deps.getIsIndexing()) {
result = { error: "Indexing already in progress. Try again shortly." };
} else {
deps.setIsIndexing(true);
const vecDb = deps.openVectorDb();
try {
vecDb.exec("DELETE FROM indexed_items");
vecDb.exec("DROP TABLE IF EXISTS vec_items");
vecDb.exec(`CREATE VIRTUAL TABLE vec_items USING vec0(rowid INTEGER PRIMARY KEY, embedding float[${DIMS}])`);
const sessionDb = deps.openSessionStore();
if (!sessionDb) {
result = { error: "Session store not found." };
} else {
const allContent = sessionDb
.prepare("SELECT rowid, content, session_id, source_type, source_id FROM search_index")
.all();
sessionDb.close();
const count = await deps.indexContent(vecDb, allContent);
deps.runMaintenance(vecDb);
result = { count };
}
} finally {
vecDb.close();
deps.setIsIndexing(false);
}
}
} else if (req.url === "/ping") {
const identity = deps.getIdentity ? deps.getIdentity() : {};
result = { ok: true, ...identity };
} else {
res.writeHead(404);
res.end();
return;
}
res.writeHead(200, { "Content-Type": "application/json" });
res.end(JSON.stringify(result));
} catch (err) {
res.writeHead(500, { "Content-Type": "application/json" });
res.end(JSON.stringify({ error: err.message }));
}
};
}