v1.8.6: Fix continuation loop, fence detection, preamble file-creation, repeat detection

Brendan Gray · Brendan Gray · commit 72609c77785f · 2026-03-10T07:57:06.000-04:00
- Fix fence closure detection (handle inline closing backticks without newline)
- Fix JSON parsing in unclosed-fence check (strip trailing backticks before parse)
- Add repeat content detection in continuation loop (abort after 2 identical passes)
- Expand preamble: build/create/generate/design now trigger write_file
- Add raw HTML/code nudge detection (unfenced code dumps)
- Add stress-test.js for pipeline validation
- Bump version to 1.8.6
diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md
@@ -545,6 +545,26 @@ These rules apply whenever running an iterative optimization loop on model promp
 - Tool implementations
 - `src/` directory (UI)
 
+---
+
+### NEVER stop investigating with open unknowns — MANDATORY
+**Added 2026-03-09 after violation where agent listed 4 unknowns and stopped investigating.**
+
+When the user reports bugs and you are tasked with investigating:
+- You MUST close EVERY unknown before presenting a plan. No exceptions.
+- "What I have not read" / "What I don't know" sections in your analysis are WORK ITEMS, not disclaimers. They mean you are not done investigating.
+- If you list something you don't know, your next action MUST be to go find out. Not to present the plan anyway.
+- You cannot present a fix plan while acknowledging unknowns. The unknowns must become knowns first.
+- If after exhaustive investigation something truly cannot be determined from code alone, state EXACTLY what diagnostic step is needed (specific log line, specific runtime check) — not a vague "needs more investigation."
+- Stopping an investigation with open unknowns and presenting a partial analysis is the same as lying about completion. It violates the "never say done without proof" rule.
+- The ONLY acceptable reason to stop investigating is: every code path has been read, every function in the chain has been traced, and the remaining unknown requires runtime data that cannot be obtained from source code alone. In that case, state the EXACT diagnostic needed.
+
+**Files NOT in scope for optimization:**
+- `main/llmEngine.js` — inference engine internals
+- `main/agenticChat.js` — agentic loop logic
+- Tool implementations
+- `src/` directory (UI)
+
 ### NEVER tailor changes to make tests pass
 - The test suite exists to reveal REAL behavior. It is not a target to satisfy.
 - Do NOT write code changes that are designed to pass a specific test case.
diff --git a/main/agenticChat.js b/main/agenticChat.js
@@ -557,7 +557,7 @@ function register(ctx) {
           appendIfBudget(compactHint + '\n');
 
           // Few-shot examples
-          const fewShotCount = modelProfile.generation?.fewShotExamples ?? 0;
+          const fewShotCount = modelProfile.prompt?.fewShotExamples ?? 0;
           if (fewShotCount > 0 && tokenBudget > 150) {
             const fewShotExample = '### Tool Call Example\nUser: Create an HTML page called hello.html with a greeting\nAssistant:\n```json\n{"tool":"write_file","params":{"filePath":"hello.html","content":"<!DOCTYPE html>\\n<html><head><title>Hello</title></head><body><h1>Hello!</h1></body></html>"}}\n```\n';
             appendIfBudget(fewShotExample);
@@ -682,6 +682,8 @@ function register(ctx) {
     let consecutiveEmptyGrammarRetries = 0;
     let continuationCount = 0;
     let _contLowProgressCount = 0;
+    let _contRepeatCount = 0;
+    let _lastContText = '';
     let _pendingPartialBlock = null;
     let lastIterationResponse = '';
     let nonContextRetries = 0;
@@ -1067,15 +1069,19 @@ function register(ctx) {
       const _stitchedForMcp = _pendingPartialBlock ? _pendingPartialBlock + responseText : responseText;
       _pendingPartialBlock = null;
       const _fenceIdx = _stitchedForMcp.search(/```(?:json|tool_call|tool)\b/);
-      let _hasUnclosedToolFence = _fenceIdx !== -1 && !_stitchedForMcp.slice(_fenceIdx).includes('\n```');
+      const _afterFence = _fenceIdx !== -1 ? _stitchedForMcp.slice(_fenceIdx) : '';
+      // Check for closing ``` — with or without leading newline
+      let _hasUnclosedToolFence = _fenceIdx !== -1 &&
+        !_afterFence.match(/```(?:json|tool_call|tool)\b[\s\S]*?\n```/) &&
+        !_afterFence.match(/```(?:json|tool_call|tool)\b[\s\S]*?[^`]```\s*$/);
 
       // If the unclosed fence contains a complete JSON tool call, don't treat as truncated
       if (_hasUnclosedToolFence) {
-        const fenceContent = _stitchedForMcp.slice(_fenceIdx);
-        const jsonMatch = fenceContent.match(/```(?:json|tool_call|tool)\s*\n?([\s\S]*)/);
+        const jsonMatch = _afterFence.match(/```(?:json|tool_call|tool)\s*\n?([\s\S]*)/);
         if (jsonMatch) {
           try {
-            const parsed = JSON.parse(jsonMatch[1].trim());
+            const jsonContent = jsonMatch[1].replace(/```\s*$/, '').trim();
+            const parsed = JSON.parse(jsonContent);
             if (parsed && typeof parsed.tool === 'string') {
               _hasUnclosedToolFence = false;
             }
@@ -1123,10 +1129,19 @@ function register(ctx) {
             _contLowProgressCount = 0;
           }
 
-          if (_contLowProgressCount >= 3) {
-            console.log('[AI Chat] Continuation aborted: no forward progress');
+          // Detect repeated identical content (model stuck in loop)
+          if (responseText.trim() === _lastContText.trim() && responseText.length > 0) {
+            _contRepeatCount++;
+          } else {
+            _contRepeatCount = 0;
+          }
+          _lastContText = responseText;
+
+          if (_contLowProgressCount >= 3 || _contRepeatCount >= 2) {
+            console.log(`[AI Chat] Continuation aborted: ${_contRepeatCount >= 2 ? 'repeated identical content' : 'no forward progress'}`);
             continuationCount = 0;
             _contLowProgressCount = 0;
+            _contRepeatCount = 0;
             // Fall through
           } else {
             const truncReason = _hasUnclosedToolFence ? 'unclosed fence' : 'maxTokens';
@@ -1258,10 +1273,17 @@ function register(ctx) {
           break;
         }
 
-        // Code-dump nudge — only for large blocks likely to be full files
+        // Code-dump nudge — detect large code blocks (fenced or raw) that should be files
         const _codeBlockMatch = responseText.match(/```(?:html?|css|javascript|js|typescript|ts|python|py|json)\s*\n([\s\S]*?)```/i);
         const hasCodeBlocks = _codeBlockMatch && _codeBlockMatch[1].length > 500;
-        if (hasCodeBlocks && nudgesRemaining > 0 && iteration < MAX_AGENTIC_ITERATIONS - 1) {
+        // Also detect unclosed code fences (model hit maxTokens before closing ```)
+        const _unclosedFenceMatch = !hasCodeBlocks && responseText.match(/```(?:html?|css|javascript|js|typescript|ts|python|py|json)\s*\n([\s\S]{500,})$/i);
+        const hasUnclosedLargeBlock = !!_unclosedFenceMatch;
+        // Detect raw HTML/code dumped without fences (model obeyed "no code blocks" but didn't use write_file)
+        const hasRawCodeDump = !hasCodeBlocks && !hasUnclosedLargeBlock && responseText.length > 500 &&
+          (/<html[\s>]/i.test(responseText) || /<style[\s>]/i.test(responseText) || /<script[\s>]/i.test(responseText) ||
+           (/<\w+[\s>]/.test(responseText) && (responseText.match(/<\w+/g) || []).length > 10));
+        if ((hasCodeBlocks || hasUnclosedLargeBlock || hasRawCodeDump) && nudgesRemaining > 0 && iteration < MAX_AGENTIC_ITERATIONS - 1) {
           nudgesRemaining--;
           currentPrompt = {
             systemContext: currentPrompt.systemContext,
diff --git a/main/constants.js b/main/constants.js
@@ -44,8 +44,8 @@ You can also answer general questions, help with writing, and have normal conver
 - After tools return, explain what you found and what it means — don't just say a tool ran
 - Ask a specific follow-up if you need more information
 - When asked to visit, open, navigate to, or browse a URL or website, call \`browser_navigate\` as your first action.
-- When asked to save, write, or store data, results, or any content to a file, call \`write_file\` to create that file.
-- **Never output full file content as code blocks in your message.** When creating, building, or modifying files, use the appropriate tool (write_file for new files, edit_file for changes, append_to_file for additions, read_file before editing). Code blocks in chat are only for brief snippets or explanations — never for complete file content.
+- When asked to save, write, store, build, create, generate, or design any file (HTML page, script, config, stylesheet, etc.), call \`write_file\` to create it. Do not output file content in your response — use the tool.
+- **Never output full file content as code blocks or raw markup in your message.** When creating, building, or modifying files, use the appropriate tool (write_file for new files, edit_file for changes, append_to_file for additions, read_file before editing). Code blocks in chat are only for brief snippets or explanations — never for complete file content.
 
 ## Rules
 - **You have no knowledge of what any project file contains until you call read_file.** Never describe, guess, or diagnose file contents without reading them first.
@@ -70,6 +70,8 @@ const DEFAULT_COMPACT_PREAMBLE = `You are a helpful AI assistant running locally
 read_file, write_file, edit_file, list_directory, find_files, grep_search, run_command, web_search, fetch_webpage, browser_navigate, browser_snapshot, browser_click, browser_type, search_codebase, analyze_error, append_to_file
 
 ## Rules
+- **Never output full file content as code blocks in chat** — always use write_file, edit_file, or append_to_file. Code blocks are only for brief snippets or explanations.
+- **For new files: call write_file immediately.** Do not describe what the file would contain — create it.
 - Tools execute in the live environment. Call them — do not describe what you would do.
 - Never say you did something unless you called the tool that did it.
 - You do not know file contents until you call read_file. Never guess.
@@ -80,10 +82,8 @@ read_file, write_file, edit_file, list_directory, find_files, grep_search, run_c
 - For live/current/time-sensitive info: call web_search. Never guess dates or current state.
 - To visit a URL: call browser_navigate. To read a page: browser_snapshot first.
 - If a tool fails, retry once with corrected parameters.
-- For new files: call write_file immediately.
 - For edits: call read_file first, then edit_file with exact oldText and newText.
 - For large files: write_file first section, then append_to_file for each remaining section.
-- Never output full file content as code blocks in chat — always use the appropriate file tool.
 - Once a task is complete (file written, question answered, error explained), respond with a brief summary. Do not call more tools after the task is done.`;
 
 const DEFAULT_CHAT_PREAMBLE = `Answer questions, help with code and concepts, and have normal conversations.
diff --git a/main/llmEngine.js b/main/llmEngine.js
@@ -222,6 +222,9 @@ class LLMEngine extends EventEmitter {
       if (this._activeGenerationPromise) {
         try { await this._activeGenerationPromise; } catch {}
       }
+      // Extra settle time for node-llama-cpp internal async ops (_eraseContextTokenRanges etc.)
+      // that may still be in-flight after the generation promise resolves
+      await new Promise(r => setTimeout(r, 150));
       await this._dispose();
 
       if (loadSignal.aborted) throw new Error('Load cancelled');
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "guide-ide",
-  "version": "1.8.5",
+  "version": "1.8.6",
   "description": "guIDE - AI-Powered Offline IDE with local LLM, RAG, MCP tools, browser automation, and integrated terminal",
   "author": {
     "name": "Brendan Gray",
diff --git a/src/components/Chat/ChatPanel.tsx b/src/components/Chat/ChatPanel.tsx
@@ -1140,6 +1140,15 @@ export const ChatPanel: React.FC<ChatPanelProps> = ({
     }
     // Clear todos from previous session — prevents ghost todos appearing from prior runs
     setTodos([]);
+    // Clear ALL old messages — prevents conversation contamination across model switches.
+    // Without this, old conversation history gets sent as conversationHistory to the new model,
+    // seeding irrelevant/wrong context (e.g. "SAS Marketplace" from a prior model's response).
+    if (messages.length > 0) saveCurrentSession();
+    setMessages([]);
+    setPendingFileChanges([]);
+    setCheckpoints(new Map());
+    // Clear conversation memory so new model doesn't inherit old context
+    (window as any).electronAPI?.memoryClearConversations?.();
     // Show loading message - model loading can take several minutes
     const loadingMsgId = `msg-loading-${Date.now()}`;
     setMessages(prev => [...prev, {
diff --git a/stress-test.js b/stress-test.js
diff --git a/website/src/app/download/page.tsx b/website/src/app/download/page.tsx

Original file line number	Diff line number	Diff line change
`@@ -222,6 +222,9 @@ class LLMEngine extends EventEmitter {`
`222`	`222`	`if (this._activeGenerationPromise) {`
`223`	`223`	`try { await this._activeGenerationPromise; } catch {}`
`224`	`224`	`}`
	`225`	`+ // Extra settle time for node-llama-cpp internal async ops (_eraseContextTokenRanges etc.)`
	`226`	`+ // that may still be in-flight after the generation promise resolves`
	`227`	`+ await new Promise(r => setTimeout(r, 150));`
`225`	`228`	`await this._dispose();`
`226`	`229`
`227`	`230`	`if (loadSignal.aborted) throw new Error('Load cancelled');`
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "guide-ide",`
`3`		`- "version": "1.8.5",`
	`3`	`+ "version": "1.8.6",`
`4`	`4`	`"description": "guIDE - AI-Powered Offline IDE with local LLM, RAG, MCP tools, browser automation, and integrated terminal",`
`5`	`5`	`"author": {`
`6`	`6`	`"name": "Brendan Gray",`