FileShot
diff --git a/‎.github/copilot-instructions.md‎
Lines changed: 31 additions & 0 deletions b/‎.github/copilot-instructions.md‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎main/agenticChat.js‎
Lines changed: 59 additions & 103 deletions b/‎main/agenticChat.js‎
Lines changed: 59 additions & 103 deletions
diff --git a/‎main/constants.js‎
Lines changed: 1 addition & 1 deletion b/‎main/constants.js‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎main/llmEngine.js‎
Lines changed: 7 additions & 1 deletion b/‎main/llmEngine.js‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎package.json‎
Lines changed: 1 addition & 1 deletion b/‎package.json‎
Lines changed: 1 addition & 1 deletion
@@ -1,3 +1,22 @@
+# 🚫 STOP — READ THIS FIRST — NON-NEGOTIABLE — NO EXCEPTIONS
+
+## THIS COMPUTER IS A DEV MACHINE. IT IS NOT A SERVER.
+
+**DO NOT — under ANY circumstances — run any of the following on this machine:**
+- `pm2 start` / `pm2 restart` / `pm2 reload` / `pm2 stop` / `pm2 kill` / `pm2 save` / `pm2 resurrect`
+- `node server.js` / `npm start` / `npm run dev` (for any site backend or frontend server)
+- Create, modify, or trigger any Windows Scheduled Task
+- Start any cloudflared tunnel process
+- Run any ecosystem config (`ecosystem.config.cjs`, etc.)
+
+**The production server is a SEPARATE PHYSICAL COMPUTER.** It runs all 8 sites. You cannot access it from this terminal. Manage it ONLY via https://cp.graysoft.dev (password: `diggabyte2026`, PIN: `0615`).
+
+**Violating this rule causes production downtime for real users. Every time you start a process here it conflicts with the real server. This is not a warning — it is a hard rule.**
+
+**If you are about to run pm2 or start any server process: STOP. Do not do it. There is no scenario where it is correct.**
+
+---
+
 # GitHub Copilot Instructions — guIDE Project
 
 ---
@@ -66,6 +85,7 @@ Read this list first. Every item has a full section below.
 - **No half-assing** — Every feature fully implemented end-to-end. No partial implementations
 - **No lazy shortcuts** — Write the correct solution even if it takes 500 lines
 - **No guessing** — "I don't know" is always acceptable. Speculation presented as fact is not
+- **Never suggest without 100% certainty** — If you are not certain, DO NOT suggest. Read more code, read more logs, ask the user what they see. A wrong suggestion is worse than silence.
 - **No lying** — Never claim code works without verifying it
 - **Think through pros and cons** — Present trade-offs explicitly, let the user decide
 - **Respond to problems with solutions** — Don't just acknowledge. Propose and research
@@ -228,6 +248,17 @@ Before declaring any root cause:
 - Do not claim code works without verifying it compiles/runs.
 - If something failed, say it failed. Do not hide failures.
 
+### NEVER suggest without 100% certainty — ABSOLUTE RULE
+**This is non-negotiable. If you are not certain, do not suggest. Silence is better than a wrong suggestion.**
+
+- If you have not read every relevant line of code in the full call chain, you are NOT certain.
+- If the user has described behavior that contradicts your hypothesis, YOU ARE WRONG — not the user. Read more code.
+- If you cannot trace exactly WHY a bug occurs from source to screen with actual file reads, say "I need to read more code before I can say."
+- Do NOT say "it might be X" or "I believe it's Y" and then act on that belief. Uncertainty stated out loud is not permission to proceed.
+- Do NOT present a partial understanding as a complete diagnosis.
+- A wrong suggestion wastes build time, breaks trust, and violates PATTERN 7 in the recurring failures section.
+- The standard: if you were in court and had to swear the suggestion is correct under oath — would you? If not, stay silent and investigate more.
+
 ### Honesty Over Helpfulness
 - Being genuinely helpful means sometimes saying "there's nothing to do here" or "I don't know how to do this."
 - Producing busywork output (fake audits, unnecessary refactors, placeholder features) wastes the user's time and money.
 
@@ -130,108 +130,6 @@ function register(ctx) {
     const isStale = () => myRequestId !== _activeRequestId || ctx.agenticCancelled;
 
     try {
-      // ── Image / Video Generation Detection ──
-      // If the user is asking for image/video generation, handle it directly instead of routing to an LLM
-      const imgDetect = ImageGenerationService.detectImageRequest(message);
-      const vidDetect = ImageGenerationService.detectVideoRequest(message);
-
-      if (vidDetect) {
-        // Attempt video generation via Pollinations (requires free API key)
-        const imageGen = ctx.imageGen;
-        if (!imageGen || imageGen._pollinationsKeys.length === 0) {
-          // No Pollinations API keys — inform user how to enable video gen
-          if (mainWindow) {
-            mainWindow.webContents.send('llm-token',
-              '⚠️ **Video generation requires a free Pollinations API key.**\n\n' +
-              '1. Go to **https://enter.pollinations.ai** and create a free account\n' +
-              '2. Copy your API key\n' +
-              '3. Paste it in **Settings → Pollinations API Key**\n\n' +
-              'Free video models available: **Seedance** (2-10s, best quality), **Wan** (2-15s, with audio), **Grok Video** (alpha).\n\n' +
-              'I can **generate a still image** instead if you\'d like — just ask me to "generate an image of …"'
-            );
-          }
-          return { success: true, response: 'Video generation requires Pollinations API key.', isVideoRequest: true };
-        }
-
-        if (mainWindow) {
-          mainWindow.webContents.send('llm-token', `🎬 *Generating video: "${vidDetect.extractedPrompt.substring(0, 100)}${vidDetect.extractedPrompt.length > 100 ? '…' : ''}"*\n\n⏳ Videos take 30-120 seconds to generate — please be patient…\n\n`);
-        }
-
-        try {
-          const result = await imageGen.generateVideo(vidDetect.extractedPrompt, {});
-
-          if (result.success) {
-            const videoPayload = JSON.stringify({
-              type: 'generated-video',
-              videoBase64: result.videoBase64,
-              mimeType: result.mimeType,
-              prompt: result.prompt,
-              provider: result.provider,
-              model: result.model,
-              duration: result.duration,
-            });
-
-            if (mainWindow) {
-              mainWindow.webContents.send('llm-token', `\n\n<!--GENERATED_VIDEO:${videoPayload}-->\n\n`);
-              mainWindow.webContents.send('llm-token', `✅ Video generated via **Pollinations AI** (${result.model}). Use the buttons below the video to save it.`);
-            }
-            return { success: true, response: 'Video generated successfully.', isVideoGeneration: true, video: result };
-          } else {
-            if (mainWindow) {
-              mainWindow.webContents.send('llm-token', `❌ Video generation failed: ${result.error}\n\nI can **generate a still image** instead — just ask!`);
-            }
-            return { success: false, error: result.error, isVideoGeneration: true };
-          }
-        } catch (vidErr) {
-          if (mainWindow) {
-            mainWindow.webContents.send('llm-token', `❌ Video generation error: ${vidErr.message}\n\nPlease try again.`);
-          }
-          return { success: false, error: vidErr.message, isVideoGeneration: true };
-        }
-      }
-
-      if (imgDetect.isImageRequest) {
-        const imageGen = ctx.imageGen;
-        if (mainWindow) {
-          mainWindow.webContents.send('llm-token', `🎨 *Generating image: "${imgDetect.extractedPrompt.substring(0, 100)}${imgDetect.extractedPrompt.length > 100 ? '…' : ''}"*\n\n`);
-        }
-
-        try {
-          const result = await imageGen.generate(imgDetect.extractedPrompt, {
-            width: 1024,
-            height: 1024,
-          });
-
-          if (result.success) {
-            // Send a special token that the renderer will parse as an inline image
-            const imagePayload = JSON.stringify({
-              type: 'generated-image',
-              imageBase64: result.imageBase64,
-              mimeType: result.mimeType,
-              prompt: result.prompt,
-              provider: result.provider,
-              model: result.model,
-            });
-
-            if (mainWindow) {
-              mainWindow.webContents.send('llm-token', `\n\n<!--GENERATED_IMAGE:${imagePayload}-->\n\n`);
-              mainWindow.webContents.send('llm-token', `✅ Image generated via **${result.provider === 'pollinations' ? 'Pollinations AI' : 'Google Gemini'}** (${result.model}). Use the buttons below the image to save or discard it.`);
-            }
-            return { success: true, response: 'Image generated successfully.', isImageGeneration: true, image: result };
-          } else {
-            if (mainWindow) {
-              mainWindow.webContents.send('llm-token', `❌ Image generation failed: ${result.error}\n\nI can still help you with text-based tasks — just let me know!`);
-            }
-            return { success: false, error: result.error, isImageGeneration: true };
-          }
-        } catch (imgErr) {
-          if (mainWindow) {
-            mainWindow.webContents.send('llm-token', `❌ Image generation error: ${imgErr.message}\n\nPlease try again.`);
-          }
-          return { success: false, error: imgErr.message, isImageGeneration: true };
-        }
-      }
-
       // ── Auto Mode: automatically pick the best model for this task ──
       if (context?.autoMode && !context?.cloudProvider) {
         const autoSelect = (() => {
@@ -1346,9 +1244,12 @@ function register(ctx) {
         // - xlarge models (14B+): grammar ON for first 2 iterations (original behavior)
         // The model can still output free text even with grammar constraining enabled —
         // the grammar only ensures that WHEN tool calls are made, they're structurally valid.
+        // Grammar toggle: respect user setting (default OFF). When off, skip native function calling
+        // entirely and go straight to text mode — avoids generation hangs on small models.
+        const grammarEnabled = _readConfig()?.userSettings?.enableGrammar ?? false;
         const grammarIterLimit = modelTier.grammarAlwaysOn ? Infinity
           : modelTier.tier === 'large' ? 5 : 2;
-        const useNativeFunctions = (taskType !== 'chat') && iteration <= grammarIterLimit;
+        const useNativeFunctions = grammarEnabled && (taskType !== 'chat') && iteration <= grammarIterLimit;
         let nativeFunctions = null;
         if (consecutiveEmptyGrammarRetries >= 1) {
           // Grammar-to-text fallback: model can't produce grammar output, degrade gracefully.
@@ -1420,6 +1321,10 @@ function register(ctx) {
                   // Stream tool generation progress to the renderer for live bubble display.
                   // The renderer shows a CollapsibleToolBlock with partial params as they stream in.
                   if (mainWindow && !mainWindow.isDestroyed()) {
+                    if (!toolChunk._loggedFirst) {
+                      toolChunk._loggedFirst = true;
+                      console.log(`[AI Chat] llm-tool-generating IPC SENT: callIndex=${toolChunk.callIndex} fn=${toolChunk.functionName} paramsLen=${toolChunk.paramsText?.length} done=${toolChunk.done}`);
+                    }
                     mainWindow.webContents.send('llm-tool-generating', toolChunk);
                   }
                 }
@@ -1431,16 +1336,61 @@ function register(ctx) {
               }
             } else {
               // ── LEGACY TEXT PARSING PATH ──
+              // Synthetic llm-tool-generating events: accumulate tokens and fire the same
+              // IPC event that grammar mode fires so the live streaming bubble appears.
+              // Grammar mode fires this from the toolChunk callback (4th arg of generateWithFunctions).
+              // Text mode has no equivalent callback, so we detect the JSON inline here.
+              let _tb = '';         // raw token accumulator (text path only)
+              let _tIdx = 9000;    // callIndex sentinel — grammar mode uses 0-based ints
+              let _tStart = -1;    // offset of opening '{' of current tool call in _tb
+              let _tName = null;   // tool name once the key has streamed through
+
               result = await llmEngine.generateStream(currentPrompt, {
                 ...(context?.params || {}),
                 maxTokens: effectiveMaxTokens,
               }, (token) => {
                 if (isStale()) { llmEngine.cancelGeneration('user'); return; }
                 localTokenBatcher.push(token);
+
+                // ── Live tool-call bubble (text mode) ──
+                _tb += token;
+                // Step 1: find the opening brace of a tool call if not already tracking one
+                if (_tStart === -1) {
+                  const m = _tb.match(/\{\s*"tool"\s*:\s*"([^"]+)"/);
+                  if (m) {
+                    _tStart = m.index;
+                    _tName = m[1];
+                  }
+                }
+                // Step 2: stream the accumulating content to the renderer
+                if (_tStart !== -1 && _tName && mainWindow && !mainWindow.isDestroyed()) {
+                  const raw = _tb.slice(_tStart);
+                  // Cap at 3000 chars for IPC efficiency; frontend further caps at 1500 for display
+                  const paramsText = raw.length > 3000 ? raw.slice(0, 3000) + '\n…[truncated]' : raw;
+                  mainWindow.webContents.send('llm-tool-generating', {
+                    callIndex: _tIdx,
+                    functionName: _tName,
+                    paramsText,
+                    done: false,
+                  });
+                }
               }, (thinkToken) => {
                 if (isStale()) { llmEngine.cancelGeneration('user'); return; }
                 localThinkingBatcher.push(thinkToken);
               });
+
+              // Mark any in-flight text-mode generating bubble as done.
+              // In the happy path the tool-executing IPC event clears generatingToolCalls
+              // automatically; this done:true handles edge cases where the model wrote a
+              // tool call but execution was skipped or cancelled.
+              if (_tStart !== -1 && _tName && mainWindow && !mainWindow.isDestroyed()) {
+                mainWindow.webContents.send('llm-tool-generating', {
+                  callIndex: _tIdx,
+                  functionName: _tName,
+                  paramsText: '',
+                  done: true,
+                });
+              }
             }
           } finally {
             localTokenBatcher.dispose();
@@ -2384,8 +2334,14 @@ function register(ctx) {
         const executionStateBlock = getExecutionStateSummary() || '';
 
         const hasBrowserAction = toolResults.results.some(tr => tr.tool && tr.tool.startsWith('browser_'));
+        // Context-aware continuation: if all results this iteration were successful file writes,
+        // give the model explicit permission to stop rather than commanding another tool call.
+        const allSuccessfulWrites = toolResults.results.length > 0 &&
+          toolResults.results.every(tr => (tr.tool === 'write_file' || tr.tool === 'create_file') && tr.result?.success === true);
         const continueInstruction = hasBrowserAction
           ? `\n\nThe page snapshot above has element [ref=N] numbers. Do NOT call browser_snapshot — you already have it. Use browser_click, browser_type, etc. with [ref=N]. Output your next tool call as a fenced JSON block NOW.`
+          : allSuccessfulWrites
+          ? `\n\nFiles written successfully. If the task is complete, provide a final summary now. Only call another tool if there is genuinely more work remaining that has not been done yet.`
           : `\n\nOutput the next tool call to make progress. Only provide a final summary when ALL steps are fully complete.`;
 
         // Build the iteration prompt with structured context ordering:
 
@@ -83,6 +83,7 @@ const DEFAULT_COMPACT_PREAMBLE = `You are a local AI coding assistant with tools
 
 ## Behavior
 - **Your tools are real and execute in the live environment.** Call them — do not describe what you would do instead of doing it.
+- **When your response would contain a complete file (code, markup, config, data) — call write_file. File content belongs in the filesystem, not in chat.**
 - **Never say you created, saved, ran, or navigated to something unless you called a tool that did it.**
 - **Never claim you searched for something, looked it up, or checked a source unless you actually called web_search or fetch_webpage in this response.**
 - **You do not know today's date or current real-world state. If asked for the date, time, or any live or time-sensitive information — call web_search immediately. Never state a current date, time, or real-world value from memory.**
@@ -104,7 +105,6 @@ const DEFAULT_COMPACT_PREAMBLE = `You are a local AI coding assistant with tools
 - Never claim a task is done before calling the tool that completes it — writing a file requires write_file, searching requires web_search
 - When read_file fails with ENOENT, call find_files to locate the file by name
 - Tool format: {"tool":"read_file","params":{"filePath":"src/app.js"}}
-- write_file to save code — never paste file content into chat
 - For conversational messages — greetings, casual chat, simple questions — respond directly with text. No tools needed.`;
 
 /**
 
@@ -1828,8 +1828,14 @@ After your brief acknowledgment, output ONLY the tool call blocks — no extra t
             // Accumulate paramsChunk text per callIndex and stream live to UI.
             // This powers the streaming tool generation bubble in the renderer
             // so users can see what the model is writing instead of a blank screen.
-            if (!_paramsChunkBufs[chunk.callIndex]) _paramsChunkBufs[chunk.callIndex] = '';
+            if (!_paramsChunkBufs[chunk.callIndex]) {
+              _paramsChunkBufs[chunk.callIndex] = '';
+              console.log(`[LLM] onFunctionCallParamsChunk FIRST FIRE: callIndex=${chunk.callIndex} functionName=${chunk.functionName} done=${chunk.done}`);
+            }
             if (chunk.paramsChunk) _paramsChunkBufs[chunk.callIndex] += chunk.paramsChunk;
+            if (chunk.done) {
+              console.log(`[LLM] onFunctionCallParamsChunk DONE: callIndex=${chunk.callIndex} totalLen=${_paramsChunkBufs[chunk.callIndex]?.length}`);
+            }
             if (onToolGenerating) {
               onToolGenerating({
                 callIndex: chunk.callIndex,
 
@@ -1,6 +1,6 @@
 {
   "name": "guide-ide",
-  "version": "1.6.11",
+  "version": "1.7.0",
   "description": "guIDE - AI-Powered Offline IDE with local LLM, RAG, MCP tools, browser automation, and integrated terminal",
   "author": {
     "name": "Brendan Gray",
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "guide-ide",`
`3`		`- "version": "1.6.11",`
	`3`	`+ "version": "1.7.0",`
`4`	`4`	`"description": "guIDE - AI-Powered Offline IDE with local LLM, RAG, MCP tools, browser automation, and integrated terminal",`
`5`	`5`	`"author": {`
`6`	`6`	`"name": "Brendan Gray",`