Skip to content

Commit 5f5b5e1

Browse files
author
Brendan Gray
committed
fix: v1.6.9 - remove Auto Mode provider label from chat, fix truncated cloud responses, cache nvidia-smi VRAM per session
1 parent 29d084f commit 5f5b5e1

4 files changed

Lines changed: 26 additions & 25 deletions

File tree

main/agenticChat.js

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -295,10 +295,7 @@ function register(ctx) {
295295
context.cloudProvider = autoSelect.provider;
296296
context.cloudModel = autoSelect.model;
297297
console.log(`[Auto Mode] Selected: ${autoSelect.provider} / ${autoSelect.model}`);
298-
// Notify the UI which model was auto-selected
299-
if (mainWindow) {
300-
mainWindow.webContents.send('llm-token', `*Auto Mode selected: ${cloudLLM._getProviderLabel(autoSelect.provider)} / ${autoSelect.model}*\n\n`);
301-
}
298+
// Model selection is intentionally not shown in chat — auto mode is meant to be seamless
302299
} else {
303300
console.log('[Auto Mode] No cloud providers available, falling back to local model');
304301
if (mainWindow) {

main/agenticChatHelpers.js

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -267,15 +267,13 @@ function createIpcTokenBatcher(mainWindow, channel, canSend, opts = {}) {
267267
};
268268

269269
const dispose = () => {
270-
if (charsPerFlush) {
271-
// Paced batcher: do NOT cancel the timer or instant-dump the buffer.
272-
// Let the existing timer continue draining at the charsPerFlush rate.
273-
// Instant-dumping here bypasses pacing and causes a wall-of-text flash
274-
// when a fast cloud API has buffered many tokens by response end.
275-
// canSend() will block stale sends if a new request starts before drain completes.
276-
return;
277-
}
278-
// Non-paced batcher: cancel timer and flush remaining buffer immediately as before.
270+
// Always cancel the timer and flush any remaining buffer immediately.
271+
// For paced batchers (charsPerFlush > 0), pacing is for smooth live display
272+
// DURING streaming. At dispose() time streaming is complete — leaving chars
273+
// stranded in the buffer means the aiChat() IPC resolve races ahead of the
274+
// remaining tokens, causing the frontend to commit a truncated message
275+
// (e.g. "Hell" instead of "Hello. How can I help you?").
276+
// Flushing immediately here ensures all tokens arrive BEFORE the IPC resolve.
279277
if (timer) {
280278
clearTimeout(timer);
281279
timer = null;

main/llmEngine.js

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -257,19 +257,25 @@ class LLMEngine extends EventEmitter {
257257
// Windows), then CPU. 'auto' left in the chain so non-NVIDIA systems still get GPU.
258258
let nvidiaDedicatedVramBytes = 0;
259259
if (this.gpuPreference !== 'cpu') {
260-
try {
261-
const { execSync } = require('child_process');
262-
const nvOut = execSync('nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits', {
263-
timeout: 3000, encoding: 'utf8', windowsHide: true,
264-
});
265-
const mib = parseFloat(nvOut.trim());
266-
if (mib > 0) {
267-
nvidiaDedicatedVramBytes = mib * 1024 * 1024; // MiB → bytes
268-
console.log(`[LLM] nvidia-smi dedicated VRAM: ${(nvidiaDedicatedVramBytes / (1024 ** 3)).toFixed(1)}GB`);
260+
// Cache nvidia-smi result — only probe once per session. Avoids a 100–300ms sync
261+
// block (or 3s timeout on non-NVIDIA systems) on every model load/switch.
262+
if (this._cachedNvidiaDedicatedVramBytes === undefined) {
263+
this._cachedNvidiaDedicatedVramBytes = 0; // default: unknown / non-NVIDIA
264+
try {
265+
const { execSync } = require('child_process');
266+
const nvOut = execSync('nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits', {
267+
timeout: 3000, encoding: 'utf8', windowsHide: true,
268+
});
269+
const mib = parseFloat(nvOut.trim());
270+
if (mib > 0) {
271+
this._cachedNvidiaDedicatedVramBytes = mib * 1024 * 1024; // MiB → bytes
272+
console.log(`[LLM] nvidia-smi dedicated VRAM: ${(this._cachedNvidiaDedicatedVramBytes / (1024 ** 3)).toFixed(1)}GB (cached for session)`);
273+
}
274+
} catch (_) {
275+
console.log('[LLM] nvidia-smi unavailable — Vulkan total VRAM used as-is for padding');
269276
}
270-
} catch (_) {
271-
console.log('[LLM] nvidia-smi unavailable — Vulkan total VRAM used as-is for padding');
272277
}
278+
nvidiaDedicatedVramBytes = this._cachedNvidiaDedicatedVramBytes;
273279
}
274280

275281
// If the model is too large for full GPU offload, insert a partial layer fallback

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "guide-ide",
3-
"version": "1.6.8",
3+
"version": "1.6.9",
44
"description": "guIDE - AI-Powered Offline IDE with local LLM, RAG, MCP tools, browser automation, and integrated terminal",
55
"author": {
66
"name": "Brendan Gray",

0 commit comments

Comments
 (0)