Skip to content

Commit fc2e781

Browse files
author
Brendan Gray
committed
v1.8.16: Continuation overlap dedup + code-dump nudge freeze fix
Bug 1: Code blocks broken during streaming - _pendingPartialBlock now set for ALL continuation types (was tool-fence only) - Overlap detection moved before fullResponseText/displayResponseText accumulation - llm-stream-reset + re-send corrects UI stream buffer on overlap removal Bug 2: App freezes mid-generation after context-aborted continuation - hasRawCodeDump regex tightened: requires structural HTML document patterns - contextTooSmallForNudge guard: skips nudge when totalCtx<=4096 and >50% used - Diagnostic logging before nudge fires - resetSession wrapped in 8s timeout (defense-in-depth)
1 parent 13f711d commit fc2e781

2 files changed

Lines changed: 59 additions & 17 deletions

File tree

main/agenticChat.js

Lines changed: 58 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1105,10 +1105,27 @@ function register(ctx) {
11051105
consecutiveEmptyGrammarRetries = 0;
11061106
}
11071107

1108-
fullResponseText += responseText;
1108+
// ── Overlap de-duplication for ALL continuation passes ──
1109+
// Detect if model repeated the tail we sent as context.
1110+
// Applied BEFORE accumulating to fullResponseText/displayResponseText
1111+
// so duplicate content never enters the display pipeline.
1112+
let _overlapLen = 0;
1113+
if (_pendingPartialBlock && continuationCount > 0) {
1114+
const maxCheck = Math.min(_pendingPartialBlock.length, responseText.length, 2000);
1115+
for (let len = maxCheck; len >= 20; len--) {
1116+
const suffix = _pendingPartialBlock.slice(-len);
1117+
if (responseText.startsWith(suffix)) { _overlapLen = len; break; }
1118+
}
1119+
if (_overlapLen > 0) {
1120+
console.log(`[AI Chat] Continuation overlap: removed ${_overlapLen} duplicate chars`);
1121+
}
1122+
}
1123+
const newContent = _overlapLen > 0 ? responseText.slice(_overlapLen) : responseText;
1124+
1125+
fullResponseText += newContent;
11091126

11101127
// Strip tool fences and raw inline JSON tool calls from display copy
1111-
let displayChunk = responseText
1128+
let displayChunk = newContent
11121129
.replace(/\n?```(?:json|tool_call|tool)\b[\s\S]*?```\n?/g, '')
11131130
.replace(/\n?```(?:json|tool_call|tool)\b[\s\S]*$/g, '')
11141131
.replace(/\[?\s*\{\s*"(?:tool|name)"\s*:\s*"[^"]*"[\s\S]*?\}\s*\]?/g, '')
@@ -1118,17 +1135,18 @@ function register(ctx) {
11181135
}
11191136
displayResponseText += displayChunk;
11201137

1121-
// ── SEAMLESS CONTINUATION ──
1138+
// Correct UI stream buffer: the overlapping tokens were already streamed
1139+
// during generation. Trim them by resetting to iteration start and
1140+
// re-sending just the de-duplicated new content.
1141+
if (_overlapLen > 0 && mainWindow && !mainWindow.isDestroyed()) {
1142+
mainWindow.webContents.send('llm-stream-reset');
1143+
if (displayChunk) mainWindow.webContents.send('llm-token', displayChunk);
1144+
}
1145+
1146+
// ── SEAMLESS CONTINUATION — stitch for MCP tool detection ──
11221147
let _stitchedForMcp;
11231148
if (_pendingPartialBlock) {
1124-
// Overlap de-duplication: detect if model repeated the tail we sent
1125-
let overlap = 0;
1126-
const maxCheck = Math.min(_pendingPartialBlock.length, responseText.length, 2000);
1127-
for (let len = maxCheck; len >= 20; len--) {
1128-
const suffix = _pendingPartialBlock.slice(-len);
1129-
if (responseText.startsWith(suffix)) { overlap = len; break; }
1130-
}
1131-
_stitchedForMcp = _pendingPartialBlock + responseText.slice(overlap);
1149+
_stitchedForMcp = _pendingPartialBlock + responseText.slice(_overlapLen);
11321150

11331151
// Fence-aware cleanup: if stitching produced duplicate ```json fences,
11341152
// keep only the LAST complete one (the continuation's fresh attempt)
@@ -1289,6 +1307,7 @@ function register(ctx) {
12891307
const tailForModel = partialFence.length > maxTailChars ? partialFence.slice(-maxTailChars) : partialFence;
12901308
continuationMsg = `${taskHint}${fileManifest}[Continue the tool call JSON from exactly where it was cut. Output ONLY the JSON continuation. Do NOT restart the tool call. Continue from:\n${tailForModel}]`;
12911309
} else {
1310+
_pendingPartialBlock = responseText; // enable overlap detection for ALL continuation types
12921311
const tailForModel = responseText.length > maxTailChars ? responseText.slice(-maxTailChars) : responseText;
12931312
continuationMsg = `${taskHint}${fileManifest}[Continue your response exactly where you left off. Do not restart or repeat content. Here is the end of what you wrote:\n${tailForModel}]`;
12941313
}
@@ -1412,15 +1431,38 @@ function register(ctx) {
14121431
const _unclosedFenceMatch = !hasCodeBlocks && responseText.match(/```(?:html?|css|javascript|js|typescript|ts|python|py|json)\s*\n([\s\S]{500,})$/i);
14131432
const hasUnclosedLargeBlock = !!_unclosedFenceMatch;
14141433
// Detect raw HTML/code dumped without fences (model obeyed "no code blocks" but didn't use write_file)
1415-
const hasRawCodeDump = !hasCodeBlocks && !hasUnclosedLargeBlock && responseText.length > 500 &&
1416-
(/<html[\s>]/i.test(responseText) || /<style[\s>]/i.test(responseText) || /<script[\s>]/i.test(responseText) ||
1417-
(/<\w+[\s>]/.test(responseText) && (responseText.match(/<\w+/g) || []).length > 10));
1418-
if ((hasCodeBlocks || hasUnclosedLargeBlock || hasRawCodeDump) && nudgesRemaining > 0 && iteration < MAX_AGENTIC_ITERATIONS - 1) {
1434+
// Requires STRUCTURAL HTML document tags (<!DOCTYPE, <html, <head, <body) to avoid false
1435+
// positives on plain-text descriptions that mention individual element names like <div>, <section>.
1436+
const hasRawCodeDump = !hasCodeBlocks && !hasUnclosedLargeBlock && responseText.length > 500 && (
1437+
(/<html[\s>]/i.test(responseText) && (/<head[\s>]/i.test(responseText) || /<body[\s>]/i.test(responseText))) ||
1438+
(/<style[\s>]/i.test(responseText) && (responseText.match(/[{};]\s*\w+\s*:/g) || []).length > 5) ||
1439+
(/<script[\s>]/i.test(responseText) && (responseText.match(/(?:function |const |let |var |=>)/g) || []).length > 3)
1440+
);
1441+
1442+
// Skip nudge when context is critically small — resetSession can hang on degraded KV cache
1443+
// and the model can't do better with even less context.
1444+
let contextTooSmallForNudge = false;
1445+
if (totalCtx <= 4096) {
1446+
let _nudgeUsed = 0;
1447+
try { if (llmEngine.sequence?.nTokens) _nudgeUsed = llmEngine.sequence.nTokens; } catch (_) {}
1448+
if (_nudgeUsed > 0 && _nudgeUsed / totalCtx > 0.50) contextTooSmallForNudge = true;
1449+
}
1450+
1451+
if ((hasCodeBlocks || hasUnclosedLargeBlock || hasRawCodeDump) && nudgesRemaining > 0 && iteration < MAX_AGENTIC_ITERATIONS - 1 && !contextTooSmallForNudge) {
14191452
nudgesRemaining--;
1453+
console.log(`[AI Chat] Code-dump nudge firing (codeBlocks=${hasCodeBlocks}, unclosed=${hasUnclosedLargeBlock}, rawDump=${hasRawCodeDump})`);
14201454
// Strip the raw code dump from accumulated response to free context budget.
14211455
// The model will regenerate the content properly via write_file.
14221456
fullResponseText = '';
1423-
try { await llmEngine.resetSession(true); } catch (_) {}
1457+
// Timeout-guarded resetSession to prevent indefinite hang from degraded C++ KV cache
1458+
try {
1459+
await Promise.race([
1460+
llmEngine.resetSession(true),
1461+
new Promise((_, rej) => setTimeout(() => rej(new Error('resetSession timeout')), 8000)),
1462+
]);
1463+
} catch (resetErr) {
1464+
console.warn(`[AI Chat] Code-dump nudge resetSession failed: ${resetErr.message}`);
1465+
}
14241466
sessionJustRotated = true;
14251467
currentPrompt = {
14261468
systemContext: buildStaticPrompt(),

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "guide-ide",
3-
"version": "1.8.15",
3+
"version": "1.8.16",
44
"description": "guIDE - AI-Powered Offline IDE with local LLM, RAG, MCP tools, browser automation, and integrated terminal",
55
"author": {
66
"name": "Brendan Gray",

0 commit comments

Comments
 (0)