Skip to content

Commit 9016b9c

Browse files
BrainSlugs83Copilot
andcommitted
fix: worker crash resilience — auto-restart, embed timeout, proxy retry on timeout
If the embed worker thread crashes (OOM, unhandled rejection, etc.), the server becomes a zombie: /ping responds but /search hangs forever because embed() returns a promise that never resolves. Three fixes: 1. Worker auto-restarts on crash (exit handler with 2s backoff) 2. embed() has 60s timeout — rejects instead of hanging forever 3. Proxy callServerWithRetry now catches timeout errors (not just ECONNREFUSED) so a zombie server triggers relaunch instead of surfacing a timeout to the user. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent a443a79 commit 9016b9c

File tree

4 files changed

+52
-13
lines changed

4 files changed

+52
-13
lines changed

index.js

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -250,8 +250,9 @@ async function callServerWithRetry(path, body) {
250250
try {
251251
return await callServer(path, body);
252252
} catch (err) {
253-
if (err.message && (err.message.includes("ECONNREFUSED") || err.message.includes("ECONNRESET"))) {
254-
// Server isn't responding — (re)launch and wait for it
253+
const msg = err.message || "";
254+
if (msg.includes("ECONNREFUSED") || msg.includes("ECONNRESET") || msg.includes("timeout")) {
255+
// Server isn't responding or timed out — (re)launch and wait for it
255256
await ensureServer();
256257
const ready = await waitForServer(300_000); // 5 minutes for first-run model download
257258
if (!ready) {

package-lock.json

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "ghcp-cli-vector-memory-mcp",
3-
"version": "1.5.0",
3+
"version": "1.5.1",
44
"description": "MCP server that gives GitHub Copilot CLI persistent long-term memory via local semantic vector search. Install: npx -y ghcp-cli-vector-memory-mcp",
55
"main": "index.js",
66
"bin": {

vector-memory-server.js

Lines changed: 46 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,35 +26,73 @@ let isIndexing = false;
2626

2727
// --- Embedding via Worker Thread ---
2828

29-
// Worker is started lazily after we win the singleton race
29+
// Worker is started lazily after we win the singleton race.
30+
// Auto-restarts on crash to prevent zombie server state.
3031
let worker;
32+
let workerAlive = false;
3133
let embedIdCounter = 0;
32-
const pendingEmbeds = new Map();
34+
const pendingEmbeds = new Map(); // id → { resolve, reject }
35+
const EMBED_TIMEOUT_MS = 60_000;
36+
37+
function rejectAllPending(reason) {
38+
for (const [id, { reject }] of pendingEmbeds) {
39+
reject(new Error(reason));
40+
}
41+
pendingEmbeds.clear();
42+
}
3343

3444
function initWorker() {
3545
worker = new Worker(join(__dirname, "embed-worker.js"));
46+
workerAlive = true;
47+
3648
worker.on("message", (msg) => {
3749
if (msg.type === "ready") return;
3850
if (msg.type === "error") {
3951
process.stderr.write(`[vector-memory] Embedding model error: ${msg.message}\n`);
4052
return;
4153
}
42-
const resolve = pendingEmbeds.get(msg.id);
43-
if (resolve) {
54+
const pending = pendingEmbeds.get(msg.id);
55+
if (pending) {
56+
clearTimeout(pending.timer);
4457
pendingEmbeds.delete(msg.id);
45-
resolve(msg.embedding);
58+
pending.resolve(msg.embedding);
4659
}
4760
});
61+
4862
worker.on("error", (err) => {
4963
process.stderr.write(`[vector-memory] Worker crashed: ${err.message}\n`);
64+
workerAlive = false;
65+
rejectAllPending("Worker crashed: " + err.message);
66+
});
67+
68+
worker.on("exit", (code) => {
69+
workerAlive = false;
70+
if (code !== 0) {
71+
process.stderr.write(`[vector-memory] Worker exited with code ${code} — restarting in 2s\n`);
72+
rejectAllPending("Worker exited with code " + code);
73+
setTimeout(() => initWorker(), 2000);
74+
}
5075
});
5176
}
5277

5378
function embed(text) {
54-
return new Promise((resolve) => {
79+
return new Promise((resolve, reject) => {
80+
if (!workerAlive) {
81+
return reject(new Error("Embed worker is not running"));
82+
}
5583
const id = embedIdCounter++;
56-
pendingEmbeds.set(id, resolve);
57-
worker.postMessage({ id, text });
84+
const timer = setTimeout(() => {
85+
pendingEmbeds.delete(id);
86+
reject(new Error("Embedding timed out after " + EMBED_TIMEOUT_MS + "ms"));
87+
}, EMBED_TIMEOUT_MS);
88+
pendingEmbeds.set(id, { resolve, reject, timer });
89+
try {
90+
worker.postMessage({ id, text });
91+
} catch (err) {
92+
clearTimeout(timer);
93+
pendingEmbeds.delete(id);
94+
reject(err);
95+
}
5896
});
5997
}
6098

0 commit comments

Comments
 (0)