Skip to content

Commit 8c9e044

Browse files
author
Brendan Gray
committed
v1.8.17: Context size fix (8K min for GPU modes), system prompt contamination fix, eraseContextTokenRanges hang fix
1 parent fc2e781 commit 8c9e044

3 files changed

Lines changed: 17 additions & 17 deletions

File tree

main/agenticChat.js

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -630,6 +630,10 @@ function register(ctx) {
630630
}
631631
}
632632

633+
// Memory context (project facts, code patterns) — belongs in system prompt, not user message
634+
const memoryContext = memoryStore.getContextPrompt();
635+
if (memoryContext) appendIfBudget('\n' + memoryContext + '\n');
636+
633637
_staticPromptCache.set(cacheKey, prompt);
634638
return prompt;
635639
};
@@ -644,9 +648,7 @@ function register(ctx) {
644648
return false;
645649
};
646650

647-
// Memory
648-
const memoryContext = memoryStore.getContextPrompt();
649-
if (memoryContext) appendIfBudget(memoryContext + '\n');
651+
// Memory context is injected into system prompt (buildStaticPrompt), not user message
650652

651653
// Error context
652654
if (context?.errorMessage) {

main/llmEngine.js

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ const CTX_CREATE_TIMEOUT_GPU = 15_000;
1818
const CTX_CREATE_TIMEOUT_CPU = 60_000;
1919
const DISPOSE_TIMEOUT = 10_000;
2020
const MIN_AGENTIC_CONTEXT = 4096;
21+
const MIN_USABLE_GPU_CONTEXT = 8192;
2122
const TOOL_DETECT_BUFFER_MAX = 60_000;
2223
const KV_REUSE_COOLDOWN_TURNS = 2;
2324
const MAX_PARALLEL_FUNCTION_CALLS = 4;
@@ -287,9 +288,10 @@ class LLMEngine extends EventEmitter {
287288
let maxCtx = this._computeMaxContext(gpuConfig.modelSizeGB);
288289
// CPU mode: cap context for responsive generation
289290
if (mode === false) maxCtx = Math.min(maxCtx, 8192);
291+
const contextMin = (mode === false) ? 512 : MIN_USABLE_GPU_CONTEXT;
290292
loadedContext = await this._withTimeout(
291293
loadedModel.createContext({
292-
contextSize: { min: 512, max: maxCtx },
294+
contextSize: { min: contextMin, max: maxCtx },
293295
flashAttention: true,
294296
ignoreMemorySafetyChecks: true,
295297
failedCreationRemedy: { retries: 4, autoContextSizeShrink: 0.5 },
@@ -298,9 +300,9 @@ class LLMEngine extends EventEmitter {
298300
'Context creation',
299301
);
300302

301-
// Verify context is usable (at least 512 tokens after system prompt)
303+
// Verify context is usable (need enough for system prompt + meaningful generation)
302304
const actualCtx = loadedContext.contextSize || 0;
303-
if (actualCtx < 1024 && mode !== false) {
305+
if (actualCtx < MIN_USABLE_GPU_CONTEXT && mode !== false) {
304306
const log = require('./logger');
305307
log.warn(`GPU mode ${mode} context too small (${actualCtx}), trying next mode`);
306308
loadedContext.dispose?.();
@@ -1071,16 +1073,12 @@ class LLMEngine extends EventEmitter {
10711073
try { this.chat.dispose?.(); } catch {}
10721074
}
10731075

1074-
// Reuse existing sequence — just clear KV cache
1075-
if (this.sequence && !this.sequence._disposed) {
1076-
try {
1077-
// Await the erase to prevent race with pending async operations
1078-
await this.sequence.eraseContextTokenRanges([{ start: 0, end: this.sequence.nTokens }]);
1079-
} catch {
1080-
// If erase fails (e.g. sequence disposed mid-flight), get a new sequence
1081-
try { this.sequence = this.context.getSequence(); } catch { /* context may also be gone */ }
1082-
}
1083-
} else if (this.context) {
1076+
// Dispose old sequence and get a fresh one (avoids eraseContextTokenRanges hang on degraded KV cache)
1077+
if (this.sequence) {
1078+
try { this.sequence.dispose?.(); } catch {}
1079+
this.sequence = null;
1080+
}
1081+
if (this.context) {
10841082
this.sequence = this.context.getSequence();
10851083
}
10861084

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "guide-ide",
3-
"version": "1.8.16",
3+
"version": "1.8.17",
44
"description": "guIDE - AI-Powered Offline IDE with local LLM, RAG, MCP tools, browser automation, and integrated terminal",
55
"author": {
66
"name": "Brendan Gray",

0 commit comments

Comments
 (0)