Skip to content

Commit 3cf1b90

Browse files
authored
Merge pull request #3 from BrainSlugs83/fix/embed-worker-restart-resilience
fix: embed pool waits for worker restart, restarts on code-0 exit
2 parents 84413d5 + 590e38b commit 3cf1b90

File tree

6 files changed

+917
-80
lines changed

6 files changed

+917
-80
lines changed

embed-pool.js

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
/**
2+
* Encapsulates embed-worker lifecycle management.
3+
*
4+
* @param {() => import("node:events").EventEmitter} workerFactory Creates a worker-like object.
5+
* @param {object} [opts]
6+
* @param {number} [opts.embedTimeout=60000] Per-embed timeout in ms.
7+
* @param {number} [opts.restartDelay=2000] Delay before restarting a crashed worker.
8+
* @param {number} [opts.workerReadyTimeout=30000] Max time to wait for a restarting worker.
9+
* @param {number} [opts.maxRestartDelay=60000] Backoff cap for repeated restart failures.
10+
*/
11+
export function createEmbedPool(workerFactory, opts = {}) {
12+
const EMBED_TIMEOUT_MS = opts.embedTimeout ?? 60_000;
13+
const RESTART_DELAY_MS = opts.restartDelay ?? 2000;
14+
const WORKER_READY_TIMEOUT_MS = opts.workerReadyTimeout ?? 30_000;
15+
const MAX_RESTART_DELAY_MS = opts.maxRestartDelay ?? 60_000;
16+
17+
let worker = null;
18+
let workerAlive = false;
19+
let shuttingDown = false;
20+
let embedIdCounter = 0;
21+
let currentRestartDelay = RESTART_DELAY_MS;
22+
const pendingEmbeds = new Map();
23+
24+
let workerReadyResolve = null;
25+
let workerReadyPromise = null;
26+
27+
function rejectAllPending(reason) {
28+
for (const [, { reject, timer }] of pendingEmbeds) {
29+
clearTimeout(timer);
30+
reject(new Error(reason));
31+
}
32+
pendingEmbeds.clear();
33+
}
34+
35+
let restartTimer = null;
36+
37+
function scheduleRestart(code) {
38+
if (shuttingDown) return;
39+
const delay = currentRestartDelay;
40+
process.stderr.write(`[vector-memory] Worker exited (code ${code}) — restarting in ${delay}ms\n`);
41+
workerReadyPromise = new Promise(resolve => { workerReadyResolve = resolve; });
42+
restartTimer = setTimeout(() => {
43+
restartTimer = null;
44+
if (shuttingDown) return;
45+
try {
46+
initWorker();
47+
currentRestartDelay = RESTART_DELAY_MS;
48+
} catch (err) {
49+
process.stderr.write(`[vector-memory] Worker restart failed: ${err.message}\n`);
50+
currentRestartDelay = Math.min(currentRestartDelay * 2, MAX_RESTART_DELAY_MS);
51+
scheduleRestart(code);
52+
return;
53+
}
54+
if (workerReadyResolve) {
55+
workerReadyResolve();
56+
workerReadyResolve = null;
57+
}
58+
}, delay);
59+
}
60+
61+
function initWorker() {
62+
worker = workerFactory();
63+
workerAlive = true;
64+
workerReadyPromise = null;
65+
66+
worker.on("message", (msg) => {
67+
if (msg.type === "ready") return;
68+
if (msg.type === "error") {
69+
process.stderr.write(`[vector-memory] Embedding model error: ${msg.message}\n`);
70+
return;
71+
}
72+
const pending = pendingEmbeds.get(msg.id);
73+
if (pending) {
74+
clearTimeout(pending.timer);
75+
pendingEmbeds.delete(msg.id);
76+
pending.resolve(msg.embedding);
77+
}
78+
});
79+
80+
worker.on("error", (err) => {
81+
process.stderr.write(`[vector-memory] Worker crashed: ${err.message}\n`);
82+
workerAlive = false;
83+
rejectAllPending("Worker crashed: " + err.message);
84+
});
85+
86+
worker.on("exit", (code) => {
87+
workerAlive = false;
88+
rejectAllPending("Worker exited with code " + code);
89+
scheduleRestart(code);
90+
});
91+
}
92+
93+
async function embed(text) {
94+
if (!workerAlive && workerReadyPromise) {
95+
let timeoutId;
96+
const timeout = new Promise((_, reject) => {
97+
timeoutId = setTimeout(
98+
() => reject(new Error("Embed worker restart timed out")),
99+
WORKER_READY_TIMEOUT_MS
100+
);
101+
});
102+
try {
103+
await Promise.race([workerReadyPromise, timeout]);
104+
} finally {
105+
clearTimeout(timeoutId);
106+
}
107+
}
108+
109+
if (!workerAlive) {
110+
throw new Error("Embed worker is not running");
111+
}
112+
113+
return new Promise((resolve, reject) => {
114+
const id = embedIdCounter++;
115+
const timer = setTimeout(() => {
116+
pendingEmbeds.delete(id);
117+
reject(new Error("Embedding timed out after " + EMBED_TIMEOUT_MS + "ms"));
118+
}, EMBED_TIMEOUT_MS);
119+
pendingEmbeds.set(id, { resolve, reject, timer });
120+
try {
121+
worker.postMessage({ id, text });
122+
} catch (err) {
123+
clearTimeout(timer);
124+
pendingEmbeds.delete(id);
125+
reject(err);
126+
}
127+
});
128+
}
129+
130+
function shutdown() {
131+
shuttingDown = true;
132+
clearTimeout(restartTimer);
133+
rejectAllPending("Pool shutting down");
134+
if (worker) {
135+
worker.terminate();
136+
worker = null;
137+
workerAlive = false;
138+
}
139+
if (workerReadyResolve) {
140+
workerReadyResolve();
141+
workerReadyResolve = null;
142+
}
143+
}
144+
145+
return { embed, initWorker, shutdown, isAlive: () => workerAlive };
146+
}

index.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ import { request } from "http";
1111
import { userInfo } from "os";
1212

1313
const __dirname = dirname(fileURLToPath(import.meta.url));
14-
const COPILOT_DIR = join(homedir(), ".copilot");
14+
const COPILOT_DIR = process.env.VECTOR_MEMORY_DATA_DIR || join(homedir(), ".copilot");
1515
const EXPECTED_USER = userInfo().username;
1616
const PKG = JSON.parse(readFileSync(join(__dirname, "package.json"), "utf-8"));
1717

@@ -27,7 +27,7 @@ const PID_FILE = join(COPILOT_DIR, "vector-memory.pid");
2727

2828
const SERVER_DEPS_DIR = join(__dirname, ".server");
2929
const SERVER_DEPS_JSON = join(__dirname, "server-deps.json");
30-
const SERVER_FILES = ["vector-memory-server.js", "embed-worker.js", "lib.js"];
30+
const SERVER_FILES = ["vector-memory-server.js", "embed-worker.js", "embed-pool.js", "lib.js"];
3131

3232
/** Check if server deps are available (either in node_modules or .server/) */
3333
function serverDepsInstalled() {

package.json

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
"index.js",
1111
"vector-memory-server.js",
1212
"embed-worker.js",
13+
"embed-pool.js",
1314
"lib.js",
1415
"server-deps.json",
1516
"LICENSE",
@@ -50,9 +51,10 @@
5051
"offline"
5152
],
5253
"scripts": {
53-
"lint": "eslint index.js vector-memory-server.js embed-worker.js lib.js",
54+
"lint": "eslint index.js vector-memory-server.js embed-worker.js embed-pool.js lib.js",
5455
"test": "node --test test.js",
55-
"test:coverage": "node --test --experimental-test-coverage --test-coverage-lines=100 --test-coverage-branches=100 --test-coverage-functions=100 --test-coverage-exclude=test.js test.js",
56+
"test:integration": "node --test test-integration.js",
57+
"test:coverage": "node --test --experimental-test-coverage --test-coverage-lines=100 --test-coverage-branches=100 --test-coverage-functions=100 --test-coverage-exclude=test.js --test-coverage-exclude=test-integration.js test.js",
5658
"check": "npm run lint && npm run test:coverage"
5759
},
5860
"dependencies": {

0 commit comments

Comments
 (0)