Skip to content

Commit 72609c7

Browse files
author
Brendan Gray
committed
v1.8.6: Fix continuation loop, fence detection, preamble file-creation, repeat detection
- Fix fence closure detection (handle inline closing backticks without newline) - Fix JSON parsing in unclosed-fence check (strip trailing backticks before parse) - Add repeat content detection in continuation loop (abort after 2 identical passes) - Expand preamble: build/create/generate/design now trigger write_file - Add raw HTML/code nudge detection (unfenced code dumps) - Add stress-test.js for pipeline validation - Bump version to 1.8.6
1 parent 12b6dd3 commit 72609c7

8 files changed

Lines changed: 470 additions & 15 deletions

File tree

.github/copilot-instructions.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -545,6 +545,26 @@ These rules apply whenever running an iterative optimization loop on model promp
545545
- Tool implementations
546546
- `src/` directory (UI)
547547

548+
---
549+
550+
### NEVER stop investigating with open unknowns — MANDATORY
551+
**Added 2026-03-09 after violation where agent listed 4 unknowns and stopped investigating.**
552+
553+
When the user reports bugs and you are tasked with investigating:
554+
- You MUST close EVERY unknown before presenting a plan. No exceptions.
555+
- "What I have not read" / "What I don't know" sections in your analysis are WORK ITEMS, not disclaimers. They mean you are not done investigating.
556+
- If you list something you don't know, your next action MUST be to go find out. Not to present the plan anyway.
557+
- You cannot present a fix plan while acknowledging unknowns. The unknowns must become knowns first.
558+
- If after exhaustive investigation something truly cannot be determined from code alone, state EXACTLY what diagnostic step is needed (specific log line, specific runtime check) — not a vague "needs more investigation."
559+
- Stopping an investigation with open unknowns and presenting a partial analysis is the same as lying about completion. It violates the "never say done without proof" rule.
560+
- The ONLY acceptable reason to stop investigating is: every code path has been read, every function in the chain has been traced, and the remaining unknown requires runtime data that cannot be obtained from source code alone. In that case, state the EXACT diagnostic needed.
561+
562+
**Files NOT in scope for optimization:**
563+
- `main/llmEngine.js` — inference engine internals
564+
- `main/agenticChat.js` — agentic loop logic
565+
- Tool implementations
566+
- `src/` directory (UI)
567+
548568
### NEVER tailor changes to make tests pass
549569
- The test suite exists to reveal REAL behavior. It is not a target to satisfy.
550570
- Do NOT write code changes that are designed to pass a specific test case.

main/agenticChat.js

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -557,7 +557,7 @@ function register(ctx) {
557557
appendIfBudget(compactHint + '\n');
558558

559559
// Few-shot examples
560-
const fewShotCount = modelProfile.generation?.fewShotExamples ?? 0;
560+
const fewShotCount = modelProfile.prompt?.fewShotExamples ?? 0;
561561
if (fewShotCount > 0 && tokenBudget > 150) {
562562
const fewShotExample = '### Tool Call Example\nUser: Create an HTML page called hello.html with a greeting\nAssistant:\n```json\n{"tool":"write_file","params":{"filePath":"hello.html","content":"<!DOCTYPE html>\\n<html><head><title>Hello</title></head><body><h1>Hello!</h1></body></html>"}}\n```\n';
563563
appendIfBudget(fewShotExample);
@@ -682,6 +682,8 @@ function register(ctx) {
682682
let consecutiveEmptyGrammarRetries = 0;
683683
let continuationCount = 0;
684684
let _contLowProgressCount = 0;
685+
let _contRepeatCount = 0;
686+
let _lastContText = '';
685687
let _pendingPartialBlock = null;
686688
let lastIterationResponse = '';
687689
let nonContextRetries = 0;
@@ -1067,15 +1069,19 @@ function register(ctx) {
10671069
const _stitchedForMcp = _pendingPartialBlock ? _pendingPartialBlock + responseText : responseText;
10681070
_pendingPartialBlock = null;
10691071
const _fenceIdx = _stitchedForMcp.search(/```(?:json|tool_call|tool)\b/);
1070-
let _hasUnclosedToolFence = _fenceIdx !== -1 && !_stitchedForMcp.slice(_fenceIdx).includes('\n```');
1072+
const _afterFence = _fenceIdx !== -1 ? _stitchedForMcp.slice(_fenceIdx) : '';
1073+
// Check for closing ``` — with or without leading newline
1074+
let _hasUnclosedToolFence = _fenceIdx !== -1 &&
1075+
!_afterFence.match(/```(?:json|tool_call|tool)\b[\s\S]*?\n```/) &&
1076+
!_afterFence.match(/```(?:json|tool_call|tool)\b[\s\S]*?[^`]```\s*$/);
10711077

10721078
// If the unclosed fence contains a complete JSON tool call, don't treat as truncated
10731079
if (_hasUnclosedToolFence) {
1074-
const fenceContent = _stitchedForMcp.slice(_fenceIdx);
1075-
const jsonMatch = fenceContent.match(/```(?:json|tool_call|tool)\s*\n?([\s\S]*)/);
1080+
const jsonMatch = _afterFence.match(/```(?:json|tool_call|tool)\s*\n?([\s\S]*)/);
10761081
if (jsonMatch) {
10771082
try {
1078-
const parsed = JSON.parse(jsonMatch[1].trim());
1083+
const jsonContent = jsonMatch[1].replace(/```\s*$/, '').trim();
1084+
const parsed = JSON.parse(jsonContent);
10791085
if (parsed && typeof parsed.tool === 'string') {
10801086
_hasUnclosedToolFence = false;
10811087
}
@@ -1123,10 +1129,19 @@ function register(ctx) {
11231129
_contLowProgressCount = 0;
11241130
}
11251131

1126-
if (_contLowProgressCount >= 3) {
1127-
console.log('[AI Chat] Continuation aborted: no forward progress');
1132+
// Detect repeated identical content (model stuck in loop)
1133+
if (responseText.trim() === _lastContText.trim() && responseText.length > 0) {
1134+
_contRepeatCount++;
1135+
} else {
1136+
_contRepeatCount = 0;
1137+
}
1138+
_lastContText = responseText;
1139+
1140+
if (_contLowProgressCount >= 3 || _contRepeatCount >= 2) {
1141+
console.log(`[AI Chat] Continuation aborted: ${_contRepeatCount >= 2 ? 'repeated identical content' : 'no forward progress'}`);
11281142
continuationCount = 0;
11291143
_contLowProgressCount = 0;
1144+
_contRepeatCount = 0;
11301145
// Fall through
11311146
} else {
11321147
const truncReason = _hasUnclosedToolFence ? 'unclosed fence' : 'maxTokens';
@@ -1258,10 +1273,17 @@ function register(ctx) {
12581273
break;
12591274
}
12601275

1261-
// Code-dump nudge — only for large blocks likely to be full files
1276+
// Code-dump nudge — detect large code blocks (fenced or raw) that should be files
12621277
const _codeBlockMatch = responseText.match(/```(?:html?|css|javascript|js|typescript|ts|python|py|json)\s*\n([\s\S]*?)```/i);
12631278
const hasCodeBlocks = _codeBlockMatch && _codeBlockMatch[1].length > 500;
1264-
if (hasCodeBlocks && nudgesRemaining > 0 && iteration < MAX_AGENTIC_ITERATIONS - 1) {
1279+
// Also detect unclosed code fences (model hit maxTokens before closing ```)
1280+
const _unclosedFenceMatch = !hasCodeBlocks && responseText.match(/```(?:html?|css|javascript|js|typescript|ts|python|py|json)\s*\n([\s\S]{500,})$/i);
1281+
const hasUnclosedLargeBlock = !!_unclosedFenceMatch;
1282+
// Detect raw HTML/code dumped without fences (model obeyed "no code blocks" but didn't use write_file)
1283+
const hasRawCodeDump = !hasCodeBlocks && !hasUnclosedLargeBlock && responseText.length > 500 &&
1284+
(/<html[\s>]/i.test(responseText) || /<style[\s>]/i.test(responseText) || /<script[\s>]/i.test(responseText) ||
1285+
(/<\w+[\s>]/.test(responseText) && (responseText.match(/<\w+/g) || []).length > 10));
1286+
if ((hasCodeBlocks || hasUnclosedLargeBlock || hasRawCodeDump) && nudgesRemaining > 0 && iteration < MAX_AGENTIC_ITERATIONS - 1) {
12651287
nudgesRemaining--;
12661288
currentPrompt = {
12671289
systemContext: currentPrompt.systemContext,

main/constants.js

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@ You can also answer general questions, help with writing, and have normal conver
4444
- After tools return, explain what you found and what it means — don't just say a tool ran
4545
- Ask a specific follow-up if you need more information
4646
- When asked to visit, open, navigate to, or browse a URL or website, call \`browser_navigate\` as your first action.
47-
- When asked to save, write, or store data, results, or any content to a file, call \`write_file\` to create that file.
48-
- **Never output full file content as code blocks in your message.** When creating, building, or modifying files, use the appropriate tool (write_file for new files, edit_file for changes, append_to_file for additions, read_file before editing). Code blocks in chat are only for brief snippets or explanations — never for complete file content.
47+
- When asked to save, write, store, build, create, generate, or design any file (HTML page, script, config, stylesheet, etc.), call \`write_file\` to create it. Do not output file content in your response — use the tool.
48+
- **Never output full file content as code blocks or raw markup in your message.** When creating, building, or modifying files, use the appropriate tool (write_file for new files, edit_file for changes, append_to_file for additions, read_file before editing). Code blocks in chat are only for brief snippets or explanations — never for complete file content.
4949
5050
## Rules
5151
- **You have no knowledge of what any project file contains until you call read_file.** Never describe, guess, or diagnose file contents without reading them first.
@@ -70,6 +70,8 @@ const DEFAULT_COMPACT_PREAMBLE = `You are a helpful AI assistant running locally
7070
read_file, write_file, edit_file, list_directory, find_files, grep_search, run_command, web_search, fetch_webpage, browser_navigate, browser_snapshot, browser_click, browser_type, search_codebase, analyze_error, append_to_file
7171
7272
## Rules
73+
- **Never output full file content as code blocks in chat** — always use write_file, edit_file, or append_to_file. Code blocks are only for brief snippets or explanations.
74+
- **For new files: call write_file immediately.** Do not describe what the file would contain — create it.
7375
- Tools execute in the live environment. Call them — do not describe what you would do.
7476
- Never say you did something unless you called the tool that did it.
7577
- You do not know file contents until you call read_file. Never guess.
@@ -80,10 +82,8 @@ read_file, write_file, edit_file, list_directory, find_files, grep_search, run_c
8082
- For live/current/time-sensitive info: call web_search. Never guess dates or current state.
8183
- To visit a URL: call browser_navigate. To read a page: browser_snapshot first.
8284
- If a tool fails, retry once with corrected parameters.
83-
- For new files: call write_file immediately.
8485
- For edits: call read_file first, then edit_file with exact oldText and newText.
8586
- For large files: write_file first section, then append_to_file for each remaining section.
86-
- Never output full file content as code blocks in chat — always use the appropriate file tool.
8787
- Once a task is complete (file written, question answered, error explained), respond with a brief summary. Do not call more tools after the task is done.`;
8888

8989
const DEFAULT_CHAT_PREAMBLE = `Answer questions, help with code and concepts, and have normal conversations.

main/llmEngine.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,9 @@ class LLMEngine extends EventEmitter {
222222
if (this._activeGenerationPromise) {
223223
try { await this._activeGenerationPromise; } catch {}
224224
}
225+
// Extra settle time for node-llama-cpp internal async ops (_eraseContextTokenRanges etc.)
226+
// that may still be in-flight after the generation promise resolves
227+
await new Promise(r => setTimeout(r, 150));
225228
await this._dispose();
226229

227230
if (loadSignal.aborted) throw new Error('Load cancelled');

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "guide-ide",
3-
"version": "1.8.5",
3+
"version": "1.8.6",
44
"description": "guIDE - AI-Powered Offline IDE with local LLM, RAG, MCP tools, browser automation, and integrated terminal",
55
"author": {
66
"name": "Brendan Gray",

src/components/Chat/ChatPanel.tsx

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1140,6 +1140,15 @@ export const ChatPanel: React.FC<ChatPanelProps> = ({
11401140
}
11411141
// Clear todos from previous session — prevents ghost todos appearing from prior runs
11421142
setTodos([]);
1143+
// Clear ALL old messages — prevents conversation contamination across model switches.
1144+
// Without this, old conversation history gets sent as conversationHistory to the new model,
1145+
// seeding irrelevant/wrong context (e.g. "SAS Marketplace" from a prior model's response).
1146+
if (messages.length > 0) saveCurrentSession();
1147+
setMessages([]);
1148+
setPendingFileChanges([]);
1149+
setCheckpoints(new Map());
1150+
// Clear conversation memory so new model doesn't inherit old context
1151+
(window as any).electronAPI?.memoryClearConversations?.();
11431152
// Show loading message - model loading can take several minutes
11441153
const loadingMsgId = `msg-loading-${Date.now()}`;
11451154
setMessages(prev => [...prev, {

0 commit comments

Comments
 (0)