v1.8.12: Object-disposed crash fix, continuation fence dedup, code block truncation fix, sysReserve guard, model auto-load, set-as-default, active state UI

Brendan Gray · Brendan Gray · commit 942cd34a2b36 · 2026-03-10T19:18:53.000-04:00
diff --git a/electron-main.js b/electron-main.js
@@ -445,22 +445,54 @@ async function initializeServices() {
       mainWindow.webContents.send('memory-stats', memoryStore.getStats());
       mainWindow.webContents.send('mcp-tools-available', mcpToolServer.getToolDefinitions());
 
-      // Skip auto-loading — let user pick a model manually.
-      // This avoids blocking the UI for 1-5 minutes on startup.
-      const defaultModel = modelManager.getDefaultModel();
-      if (defaultModel) {
-        console.log(`[IDE] Default model available: ${defaultModel.name} (not auto-loading)`);
+      // Auto-load last used model if persisted, otherwise show available model
+      const fs = require('fs');
+      const settingsPath = require('path').join(userDataPath, 'settings.json');
+      let lastUsedModel = null;
+      try {
+        const config = JSON.parse(fs.readFileSync(settingsPath, 'utf8'));
+        if (config.lastUsedModel && fs.existsSync(config.lastUsedModel)) {
+          lastUsedModel = config.lastUsedModel;
+        }
+      } catch {}
+
+      if (lastUsedModel) {
+        const modelName = require('path').basename(lastUsedModel).replace(/\.gguf$/i, '');
+        console.log(`[IDE] Auto-loading last used model: ${modelName}`);
         mainWindow.webContents.send('llm-status', {
-          state: 'idle',
-          message: `Model ready: ${defaultModel.name}. Click to load.`,
+          state: 'loading',
+          message: `Loading ${modelName}...`,
         });
-      } else {
-        // No local GGUF model — normal on first install. Cloud AI (Cerebras/Groq)
-        // is active by default so the user can start immediately.
-        mainWindow.webContents.send('llm-status', {
-          state: 'idle',
-          message: 'Cloud AI active (Cerebras/Groq). Download a .gguf model to enable local GPU inference.',
+        // Non-blocking auto-load — UI is usable while model loads
+        llmEngine.initialize(lastUsedModel).then((modelInfo) => {
+          console.log(`[IDE] Auto-loaded model: ${modelName}`);
+          if (mainWindow && !mainWindow.isDestroyed()) {
+            mainWindow.webContents.send('llm-status', { state: 'ready', message: `Model loaded: ${modelName}` });
+            if (modelInfo?.contextSize) {
+              mainWindow.webContents.send('context-usage', { used: 0, total: modelInfo.contextSize });
+            }
+            mainWindow.webContents.send('model-auto-loaded', { path: lastUsedModel, name: modelName });
+          }
+        }).catch((err) => {
+          console.warn(`[IDE] Auto-load failed: ${err.message}`);
+          if (mainWindow && !mainWindow.isDestroyed()) {
+            mainWindow.webContents.send('llm-status', { state: 'idle', message: `Auto-load failed. Click a model to load.` });
+          }
         });
+      } else {
+        const defaultModel = modelManager.getDefaultModel();
+        if (defaultModel) {
+          console.log(`[IDE] Default model available: ${defaultModel.name} (not auto-loading)`);
+          mainWindow.webContents.send('llm-status', {
+            state: 'idle',
+            message: `Model ready: ${defaultModel.name}. Click to load.`,
+          });
+        } else {
+          mainWindow.webContents.send('llm-status', {
+            state: 'idle',
+            message: 'Cloud AI active (Cerebras/Groq). Download a .gguf model to enable local GPU inference.',
+          });
+        }
       }
       // Non-blocking: detect NVIDIA GPU and download CUDA backends in the background.
       // App is fully usable via cloud AI while this runs (or if no GPU is found).
diff --git a/main/agenticChat.js b/main/agenticChat.js
@@ -524,6 +524,30 @@ function register(ctx) {
     const sysPromptReserve = estimateTokens(actualSystemPrompt) + 50 + toolSchemaTokenEstimate;
     console.log(`[AI Chat] Profile: ${modelProfile._meta.profileSource} | ctx=${totalCtx} (hw=${hwContextSize}) | sysReserve=${sysPromptReserve}`);
 
+    // Guard: if system prompt + tool schemas exceed available context, fall back to compact preamble
+    let usedCompactFallback = false;
+    if (sysPromptReserve >= totalCtx * 0.9) {
+      const compactPrompt = llmEngine._getCompactSystemPrompt();
+      const compactReserve = estimateTokens(compactPrompt) + 50 + toolSchemaTokenEstimate;
+      if (compactReserve < totalCtx * 0.9) {
+        console.log(`[AI Chat] sysReserve (${sysPromptReserve}) exceeds ctx (${totalCtx}), switching to compact preamble (reserve=${compactReserve})`);
+        // Reset session with compact prompt
+        try { await llmEngine.resetSession(true); } catch (_) {}
+        usedCompactFallback = true;
+      } else {
+        // Even compact preamble doesn't fit — inform user
+        console.error(`[AI Chat] FATAL: Even compact preamble (${compactReserve} tokens) exceeds context (${totalCtx}). Cannot generate.`);
+        if (mainWindow && !mainWindow.isDestroyed()) {
+          mainWindow.webContents.send('llm-response-chunk', {
+            text: `\n\n**Error:** This model's context window (${totalCtx} tokens) is too small for tool-assisted generation. The system prompt alone requires ~${compactReserve} tokens. Please load a model with a larger context window, or use Cloud AI.`,
+            done: true,
+          });
+        }
+        return;
+      }
+    }
+    console.log(`[AI Chat] Model: ${modelTier.family} (${modelTier.paramLabel} ${modelTier.family}) \u2014 tools=${modelProfile.generation?.maxToolsPerTurn ?? 0}, grammar=${modelProfile.generation?.grammarConstrained ? 'strict' : 'limited'}`);
+
     const maxResponseTokens = Math.min(
       Math.floor(totalCtx * modelProfile.context.responseReservePct),
       modelProfile.context.maxResponseTokens
@@ -883,7 +907,7 @@ function register(ctx) {
               }
               if (_tStart !== -1 && _tName && mainWindow && !mainWindow.isDestroyed()) {
                 const raw = _tb.slice(_tStart);
-                const paramsText = raw.length > 4000 ? raw.slice(-4000) : raw;
+                const paramsText = raw.length > 4000 ? raw.slice(0, 4000) : raw;
                 mainWindow.webContents.send('llm-tool-generating', {
                   callIndex: _tIdx, functionName: _tName, paramsText, done: false,
                 });
@@ -1090,6 +1114,21 @@ function register(ctx) {
           if (responseText.startsWith(suffix)) { overlap = len; break; }
         }
         _stitchedForMcp = _pendingPartialBlock + responseText.slice(overlap);
+
+        // Fence-aware cleanup: if stitching produced duplicate ```json fences,
+        // keep only the LAST complete one (the continuation's fresh attempt)
+        const fencePattern = /```(?:json|tool_call|tool)\b/g;
+        const fencePositions = [];
+        let fm;
+        while ((fm = fencePattern.exec(_stitchedForMcp)) !== null) fencePositions.push(fm.index);
+        if (fencePositions.length >= 2) {
+          // Multiple fence opens — the first is from the truncated pass, the second from continuation
+          // Keep from the last fence open onward (it has the complete JSON)
+          const lastFenceStart = fencePositions[fencePositions.length - 1];
+          const textBeforeFences = _stitchedForMcp.slice(0, fencePositions[0]);
+          _stitchedForMcp = textBeforeFences + _stitchedForMcp.slice(lastFenceStart);
+          console.log(`[AI Chat] Fence dedup: removed ${fencePositions.length - 1} duplicate fence(s)`);
+        }
       } else {
         _stitchedForMcp = responseText;
       }
diff --git a/main/ipc/llmHandlers.js b/main/ipc/llmHandlers.js
@@ -60,6 +60,21 @@ function register(ctx) {
         await new Promise(r => setTimeout(r, 100));
       }
       const modelInfo = await ctx.llmEngine.initialize(modelPath);
+      // Persist as last-used model for auto-load on next startup
+      try {
+        const { ipcMain: _ipc } = require('electron');
+        // Write directly to settings file to avoid IPC roundtrip
+        const fs = require('fs');
+        const path = require('path');
+        const { app } = require('electron');
+        const settingsPath = path.join(app.getPath('userData'), 'settings.json');
+        let config = {};
+        try { config = JSON.parse(fs.readFileSync(settingsPath, 'utf8')); } catch {}
+        config.lastUsedModel = modelPath;
+        fs.writeFileSync(settingsPath + '.tmp', JSON.stringify(config, null, 2));
+        fs.renameSync(settingsPath + '.tmp', settingsPath);
+        console.log(`[LLM] Persisted lastUsedModel: ${path.basename(modelPath)}`);
+      } catch (e) { console.warn('[LLM] Failed to persist lastUsedModel:', e.message); }
       const win = ctx.getMainWindow();
       if (win && modelInfo?.contextSize) {
         win.webContents.send('context-usage', { used: 0, total: modelInfo.contextSize });
diff --git a/main/llmEngine.js b/main/llmEngine.js
@@ -1074,12 +1074,13 @@ class LLMEngine extends EventEmitter {
     // Reuse existing sequence — just clear KV cache
     if (this.sequence && !this.sequence._disposed) {
       try {
-        this.sequence.eraseContextTokenRanges([{ start: 0, end: this.sequence.nTokens }]);
+        // Await the erase to prevent race with pending async operations
+        await this.sequence.eraseContextTokenRanges([{ start: 0, end: this.sequence.nTokens }]);
       } catch {
-        // If erase fails, get a new sequence
-        this.sequence = this.context.getSequence();
+        // If erase fails (e.g. sequence disposed mid-flight), get a new sequence
+        try { this.sequence = this.context.getSequence(); } catch { /* context may also be gone */ }
       }
-    } else {
+    } else if (this.context) {
       this.sequence = this.context.getSequence();
     }
 
diff --git a/main/settingsManager.js b/main/settingsManager.js
@@ -68,6 +68,33 @@ function registerSettingsHandlers(ctx) {
   ipcMain.handle('save-settings', (_evt, settings) => _writeConfig(settings));
   ipcMain.handle('load-settings', () => _readConfig());
 
+  // ── Model persistence ──
+  ipcMain.handle('set-last-used-model', (_evt, modelPath) => {
+    try {
+      const config = _readConfig();
+      config.lastUsedModel = modelPath || null;
+      return _writeConfig(config);
+    } catch (e) { return { success: false, error: e.message }; }
+  });
+
+  ipcMain.handle('get-last-used-model', () => {
+    const config = _readConfig();
+    return config.lastUsedModel || null;
+  });
+
+  ipcMain.handle('set-default-model', (_evt, modelPath) => {
+    try {
+      const config = _readConfig();
+      config.defaultModelPath = modelPath || null;
+      return _writeConfig(config);
+    } catch (e) { return { success: false, error: e.message }; }
+  });
+
+  ipcMain.handle('get-default-model', () => {
+    const config = _readConfig();
+    return config.defaultModelPath || null;
+  });
+
   ipcMain.handle('get-system-prompt-preview', (_evt, opts) => {
     // Return the effective system prompt that would be sent to the model
     const { DEFAULT_SYSTEM_PREAMBLE, DEFAULT_COMPACT_PREAMBLE } = require('./constants');
diff --git a/preload.js b/preload.js
@@ -102,6 +102,9 @@ contextBridge.exposeInMainWorld('electronAPI', {
   modelsAdd: () => ipcRenderer.invoke('models-add'),
   modelsRemove: (modelPath) => ipcRenderer.invoke('models-remove', modelPath),
   onModelsAvailable: (callback) => _on('models-available', callback),
+  onModelAutoLoaded: (callback) => _on('model-auto-loaded', callback),
+  setDefaultModel: (modelPath) => ipcRenderer.invoke('set-default-model', modelPath),
+  getDefaultModelPath: () => ipcRenderer.invoke('get-default-model'),
 
   // ── Hardware & Model Recommendations ──
   getHardwareInfo: () => ipcRenderer.invoke('get-hardware-info'),
diff --git a/src/components/Layout/WelcomeScreen.tsx b/src/components/Layout/WelcomeScreen.tsx
@@ -2,7 +2,7 @@ import React, { useState, useEffect, useCallback } from 'react';
 import {
   FolderOpen, Plus, Clock, ChevronRight, ArrowRight,
   Download, CheckCircle, Loader2, Zap, Code2, Brain, Package,
-  Cloud, LogOut, UserCircle,
+  Cloud, LogOut, UserCircle, Star,
 } from 'lucide-react';
 import type { LicenseStatus } from '@/types/electron';
 
@@ -39,6 +39,10 @@ export const WelcomeScreen: React.FC<WelcomeScreenProps> = ({ onOpenFolder, onNe
   const [licenseStatus, setLicenseStatus] = useState<LicenseStatus | null>(null);
   const [licenseLoading, setLicenseLoading] = useState(false);
   const [cloudAILoading, setCloudAILoading] = useState(false);
+  // Track which model is currently active (loaded)
+  const [activeModel, setActiveModel] = useState<string | null>(null);
+  // Track which model is set as default
+  const [defaultModelPath, setDefaultModelPath] = useState<string | null>(null);
 
   useEffect(() => {
     try {
@@ -68,6 +72,13 @@ export const WelcomeScreen: React.FC<WelcomeScreenProps> = ({ onOpenFolder, onNe
   // Load license status for sign-in strip
   useEffect(() => {
     window.electronAPI?.licenseGetStatus?.().then(s => { if (s) setLicenseStatus(s); }).catch(() => {});
+    // Load default model path from settings
+    window.electronAPI?.getDefaultModelPath?.().then(p => { if (p) setDefaultModelPath(p); }).catch(() => {});
+    // Listen for auto-loaded model on startup
+    const cleanup = window.electronAPI?.onModelAutoLoaded?.((data: { path: string; name: string }) => {
+      setActiveModel(data.path);
+    });
+    return () => { if (typeof cleanup === 'function') cleanup(); };
   }, []);
 
   const openRecent = (path: string) => {
@@ -88,11 +99,19 @@ export const WelcomeScreen: React.FC<WelcomeScreenProps> = ({ onOpenFolder, onNe
       await window.electronAPI?.llmLoadModel?.(modelPath);
       // Switch app to local model — clear cloud provider preference so ChatPanel defaults to local
       try { localStorage.removeItem('guide-cloud-provider'); } catch {}
+      setActiveModel(modelPath);
     } finally {
       setLoadingModel(null);
     }
   };
 
+  const setAsDefault = async (modelPath: string) => {
+    try {
+      await window.electronAPI?.setDefaultModel?.(modelPath);
+      setDefaultModelPath(modelPath);
+    } catch {}
+  };
+
   const useCloudAI = () => {
     setCloudAILoading(true);
     try {
@@ -309,25 +328,47 @@ export const WelcomeScreen: React.FC<WelcomeScreenProps> = ({ onOpenFolder, onNe
                 <div className="flex flex-col gap-1">
                   {installedModels.slice(0, 4).map((model) => {
                     const label = (model.name || (model.path || '').split(/[/\\]/).pop() || 'Unknown').replace(/\.gguf$/i, '');
+                    const mp = model.path || model.name;
+                    const isActive = activeModel === mp;
+                    const isDefault = defaultModelPath === mp;
+                    const isLoading = loadingModel === mp;
                     return (
                       <div
-                        key={model.path || model.name}
+                        key={mp}
                         className="flex items-center gap-2 px-3 py-1.5 rounded-lg"
-                        style={{ backgroundColor: 'var(--theme-bg-secondary)', border: '1px solid var(--theme-border)' }}
+                        style={{
+                          backgroundColor: isActive ? 'color-mix(in srgb, var(--theme-accent) 10%, var(--theme-bg-secondary))' : 'var(--theme-bg-secondary)',
+                          border: isActive ? '1px solid var(--theme-accent)' : '1px solid var(--theme-border)',
+                        }}
                       >
+                        {/* Set as default star */}
+                        <button
+                          onClick={() => setAsDefault(mp)}
+                          className="flex-shrink-0 transition-colors"
+                          style={{ color: isDefault ? 'var(--theme-accent)' : 'var(--theme-foreground-subtle)', cursor: 'pointer' }}
+                          title={isDefault ? 'Default model' : 'Set as default'}
+                        >
+                          <Star size={12} fill={isDefault ? 'currentColor' : 'none'} />
+                        </button>
                         <span className="flex-1 min-w-0 text-[12px] truncate" style={{ color: 'var(--theme-foreground)' }} title={label}>
                           {label}
                         </span>
                         <button
-                          onClick={() => useModel(model.path || model.name)}
-                          disabled={loadingModel === (model.path || model.name)}
+                          onClick={() => !isActive && useModel(mp)}
+                          disabled={isLoading || isActive}
                           className="flex-shrink-0 text-[11px] px-2 py-0.5 rounded font-medium flex items-center justify-center gap-1 transition-opacity"
-                          style={{ backgroundColor: 'var(--theme-accent)', color: 'var(--theme-bg)', minWidth: 36, opacity: loadingModel === (model.path || model.name) ? 0.7 : 1 }}
-                          onMouseEnter={(e) => { if (loadingModel !== (model.path || model.name)) (e.currentTarget as HTMLElement).style.opacity = '0.8'; }}
-                          onMouseLeave={(e) => { if (loadingModel !== (model.path || model.name)) (e.currentTarget as HTMLElement).style.opacity = '1'; }}
-                          title={loadingModel === (model.path || model.name) ? 'Loading...' : `Load ${label}`}
+                          style={{
+                            backgroundColor: isActive ? '#89d185' : 'var(--theme-accent)',
+                            color: 'var(--theme-bg)',
+                            minWidth: 46,
+                            opacity: isLoading ? 0.7 : 1,
+                            cursor: isActive ? 'default' : 'pointer',
+                          }}
+                          onMouseEnter={(e) => { if (!isLoading && !isActive) (e.currentTarget as HTMLElement).style.opacity = '0.8'; }}
+                          onMouseLeave={(e) => { if (!isLoading && !isActive) (e.currentTarget as HTMLElement).style.opacity = '1'; }}
+                          title={isActive ? 'Model is active' : isLoading ? 'Loading...' : `Load ${label}`}
                         >
-                          {loadingModel === (model.path || model.name) ? <Loader2 size={10} className="animate-spin" /> : 'Use'}
+                          {isLoading ? <Loader2 size={10} className="animate-spin" /> : isActive ? 'Active' : 'Use'}
                         </button>
                       </div>
                     );
diff --git a/src/types/electron.ts b/src/types/electron.ts
@@ -123,6 +123,9 @@ export interface ElectronAPI {
   modelsAdd(): Promise<{ success: boolean; models: AvailableModel[] }>;
   modelsRemove(modelPath: string): Promise<{ success: boolean }>;
   onModelsAvailable(callback: (models: AvailableModel[]) => void): (() => void) | void;
+  onModelAutoLoaded(callback: (data: { path: string; name: string }) => void): (() => void) | void;
+  setDefaultModel(modelPath: string): Promise<{ success: boolean }>;
+  getDefaultModelPath(): Promise<string | null>;
 
   // Hardware & Model Recommendations
   getHardwareInfo(): Promise<{ vramGB: number; gpuName: string; totalRAM: number; freeRAM: number; cpuModel: string; cpuCores: number }>;