Skip to content

Commit 6ca0797

Browse files
author
Brendan Gray
committed
v1.8.33: GPU auto mode fix, toolPaceMs 0 for local, context-aware overlap detection, CSS filename inference, reasoning cleanup, history preservation across model switch
1 parent f90479d commit 6ca0797

3 files changed

Lines changed: 67 additions & 21 deletions

File tree

main/agenticChat.js

Lines changed: 35 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1203,9 +1203,11 @@ function register(ctx) {
12031203
await llmEngine.resetSession(true);
12041204
await ensureLlmChat(llmEngine, getNodeLlamaCppPath);
12051205

1206-
if (convSummary && mainWindow) {
1207-
const cleaned = convSummary.replace(/```(?:json|tool_call|tool)[^\n]*\n[\s\S]*?```/g, '');
1208-
mainWindow.webContents.send('llm-thinking-token', cleaned + '\n[Context rotated]\n');
1206+
// Only send minimal notification to thinking panel — NOT the full summary
1207+
// The summary (with ## COMPLETED WORK, ## INSTRUCTION, etc.) goes into the model's prompt,
1208+
// NOT into the reasoning dropdown. The reasoning dropdown should only show model thinking.
1209+
if (mainWindow) {
1210+
mainWindow.webContents.send('llm-thinking-token', '\n[Context rotated — continuing task]\n');
12091211
}
12101212

12111213
const partial = fullResponseText.trim().length > 0 ? fullResponseText.substring(Math.max(0, fullResponseText.length - 1500)) : '';
@@ -1214,17 +1216,22 @@ function register(ctx) {
12141216
const incrementalHint = summarizer.incrementalTask
12151217
? `\n**INCREMENTAL TASK: ${summarizer.incrementalTask.current}/${summarizer.incrementalTask.target} ${summarizer.incrementalTask.type} completed.**`
12161218
: '';
1217-
const fileProgressHint = Object.keys(summarizer.fileProgress).length > 0
1219+
const fileProgressKeys = Object.keys(summarizer.fileProgress);
1220+
const primaryFile = fileProgressKeys.length > 0 ? fileProgressKeys[fileProgressKeys.length - 1] : null;
1221+
const fileProgressHint = fileProgressKeys.length > 0
12181222
? `\n**FILES IN PROGRESS:** ${Object.entries(summarizer.fileProgress).map(([f, p]) => `${f} (${p.writtenLines} lines)`).join(', ')}`
12191223
: '';
1224+
const explicitFileHint = primaryFile
1225+
? `\n**CONTINUE WRITING TO: ${primaryFile}** — Use append_to_file with this exact file path.`
1226+
: '';
12201227

12211228
const hint = partial
12221229
? `\n\n## CONTINUE FROM HERE\n---\n${partial}\n---` +
1223-
incrementalHint + fileProgressHint +
1230+
incrementalHint + fileProgressHint + explicitFileHint +
12241231
`\n\n**CRITICAL: DO NOT REFUSE. DO NOT SAY "I cannot continue."**` +
1225-
`\nUse append_to_file to add more content. Call a tool NOW to make progress.`
1232+
`\nUse append_to_file to add more content to the same file. Call a tool NOW to make progress.`
12261233
: `\nContext was rotated. The user request is: ${message.substring(0, 300)}` +
1227-
incrementalHint + fileProgressHint +
1234+
incrementalHint + fileProgressHint + explicitFileHint +
12281235
`\n\n**Continue the task using tools. Do not refuse.**`;
12291236

12301237
currentPrompt = {
@@ -1544,8 +1551,14 @@ function register(ctx) {
15441551
if (_contRepeatCount < 2 && fullResponseText.length > 500 && responseText.length > 200) {
15451552
const priorText = fullResponseText.slice(0, fullResponseText.length - responseText.length);
15461553
if (priorText.length > 200) {
1547-
const CHUNK_SIZE = 150;
1548-
const SAMPLE_COUNT = 6;
1554+
// Context-aware overlap detection: file content has natural boilerplate
1555+
// (HTML tags, CSS declarations, repeated style= attributes) that triggers
1556+
// false positives with small chunks. Use larger chunks + higher threshold
1557+
// for file content, moderate settings for non-file content.
1558+
const isFileContent = /\\n|\\"|write_file|filePath|"content"\s*:/.test(responseText.substring(0, 500));
1559+
const CHUNK_SIZE = isFileContent ? 300 : 200;
1560+
const THRESHOLD = isFileContent ? 0.75 : 0.60;
1561+
const SAMPLE_COUNT = 8;
15491562
const step = Math.max(1, Math.floor((responseText.length - CHUNK_SIZE) / SAMPLE_COUNT));
15501563
let foundCount = 0;
15511564
let totalSamples = 0;
@@ -1554,13 +1567,12 @@ function register(ctx) {
15541567
const chunk = responseText.slice(si, si + CHUNK_SIZE);
15551568
if (priorText.includes(chunk)) foundCount++;
15561569
}
1557-
if (totalSamples > 0 && foundCount / totalSamples >= 0.5) {
1570+
if (totalSamples > 0 && foundCount / totalSamples >= THRESHOLD) {
15581571
_contRepeatCount += 2; // immediately trigger abort threshold
1559-
console.log(`[AI Chat] Content-overlap detected: ${foundCount}/${totalSamples} chunks found in prior output`);
1572+
console.log(`[AI Chat] Content-overlap detected: ${foundCount}/${totalSamples} chunks (threshold ${THRESHOLD}, chunkSize ${CHUNK_SIZE}) in prior output`);
15601573
}
15611574
}
15621575
}
1563-
15641576
// Hard total accumulated char limit: stop runaway continuation
15651577
// Increased from 50K to allow large file generation — context rotation will handle memory
15661578
const MAX_CONTINUATION_CHARS = 500000; // 500K chars (~125K lines of code)
@@ -1615,17 +1627,24 @@ function register(ctx) {
16151627
const incrementalHint = summarizer.incrementalTask
16161628
? `\n**INCREMENTAL TASK: ${summarizer.incrementalTask.current}/${summarizer.incrementalTask.target} ${summarizer.incrementalTask.type} completed.**`
16171629
: '';
1618-
const fileProgressHint = Object.keys(summarizer.fileProgress).length > 0
1630+
// Extract the primary file being worked on for explicit append instruction
1631+
const fileProgressKeys = Object.keys(summarizer.fileProgress);
1632+
const primaryFile = fileProgressKeys.length > 0 ? fileProgressKeys[fileProgressKeys.length - 1] : null;
1633+
const fileProgressHint = fileProgressKeys.length > 0
16191634
? `\n**FILES IN PROGRESS:** ${Object.entries(summarizer.fileProgress).map(([f, p]) => `${f} (${p.writtenLines} lines)`).join(', ')}`
16201635
: '';
1636+
// Explicit filename continuation to prevent model from creating new files
1637+
const explicitFileHint = primaryFile
1638+
? `\n**CONTINUE WRITING TO: ${primaryFile}** — Use append_to_file with this exact file path.`
1639+
: '';
16211640

16221641
currentPrompt = {
16231642
systemContext: buildStaticPrompt(),
16241643
userMessage: buildDynamicContext() + '\n\n' + convSummary +
16251644
`\n\n## CONTINUE FROM HERE\n---\n${partialOutput}\n---` +
1626-
incrementalHint + fileProgressHint +
1645+
incrementalHint + fileProgressHint + explicitFileHint +
16271646
`\n\n**CRITICAL: DO NOT REFUSE. DO NOT SAY "I cannot continue."**` +
1628-
`\nUse append_to_file to add more content. Call a tool NOW to make progress.`,
1647+
`\nUse append_to_file to add more content to the same file. Call a tool NOW to make progress.`,
16291648
};
16301649
sessionJustRotated = true;
16311650
continue;
@@ -1721,7 +1740,7 @@ function register(ctx) {
17211740
});
17221741
} else {
17231742
const textOpts = {
1724-
toolPaceMs: 50, skipWriteDeferral: modelTier.tier === 'tiny',
1743+
toolPaceMs: 0, skipWriteDeferral: modelTier.tier === 'tiny',
17251744
userMessage: message, lastDroppedFilePaths: _pendingDroppedFilePaths, writeFileHistory,
17261745
continuationCount,
17271746
};

main/llmEngine.js

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,12 @@ class LLMEngine extends EventEmitter {
232232
// (_eraseContextTokenRanges, streaming callbacks, etc.)
233233
// Increased from 500ms to 1000ms to prevent "Object is disposed" race
234234
await new Promise(r => setTimeout(r, 1000));
235+
236+
// Preserve conversation history across model switch (exclude system message)
237+
// User experience: switching models mid-conversation should not lose context
238+
const _preservedHistory = Array.isArray(this.chatHistory) && this.chatHistory.length > 1
239+
? this.chatHistory.filter(m => m.type !== 'system')
240+
: [];
235241

236242
// Wrap dispose in additional try-catch for race protection
237243
try {
@@ -291,16 +297,17 @@ class LLMEngine extends EventEmitter {
291297
'Model loading',
292298
);
293299

294-
// Track auto mode GPU layer usage (do not reject — auto mode optimizes layer split)
300+
// Track auto mode GPU layer usage
295301
if (mode === 'auto' && loadedModel.gpuLayers != null) {
296302
bestAutoGpuLayers = loadedModel.gpuLayers;
297303
}
298304

299-
// Reject 'cuda' mode if it loaded 0 layers despite available VRAM
300-
// This forces fallback to explicit layer counts which work better on constrained VRAM
301-
if (mode === 'cuda' && loadedModel.gpuLayers === 0 && gpuConfig.vramGB > 0.5) {
305+
// Reject 'cuda' OR 'auto' mode if it loaded 0 layers despite available VRAM
306+
// AND there are explicit layer modes to try. This forces fallback to explicit
307+
// layer counts which work better on constrained VRAM GPUs.
308+
if ((mode === 'cuda' || mode === 'auto') && loadedModel.gpuLayers === 0 && gpuConfig.vramGB > 0.5 && gpuConfig.roughMaxLayers > 0) {
302309
const log = require('./logger');
303-
log.warn(`CUDA mode loaded 0 layers despite ${gpuConfig.vramGB.toFixed(1)}GB VRAM — trying next mode`);
310+
log.warn(`${mode.toUpperCase()} mode loaded 0 layers despite ${gpuConfig.vramGB.toFixed(1)}GB VRAM & ${gpuConfig.roughMaxLayers} estimated layers — trying explicit layer count`);
304311
loadedModel.dispose?.();
305312
loadedModel = null;
306313
continue;
@@ -373,6 +380,25 @@ class LLMEngine extends EventEmitter {
373380
this.chatHistory = [{ type: 'system', text: sysPreamble }];
374381
this.lastEvaluation = null;
375382

383+
// Restore preserved conversation history (if any) with fresh system message
384+
// This allows users to switch models mid-conversation without losing context
385+
if (_preservedHistory.length > 0) {
386+
// Cap preserved history to fit new model's context (keep most recent 60%)
387+
const maxHistory = Math.floor((this.context?.contextSize || 8192) * 0.15);
388+
const historyChars = _preservedHistory.reduce((sum, m) => sum + (typeof m.text === 'string' ? m.text.length : 0), 0);
389+
if (historyChars < maxHistory * 4) {
390+
this.chatHistory.push(..._preservedHistory);
391+
const log = require('./logger');
392+
log.info(`Model switch: preserved ${_preservedHistory.length} conversation turns (${historyChars} chars)`);
393+
} else {
394+
// History too large for new context — keep only recent portion
395+
const keep = Math.ceil(_preservedHistory.length * 0.4);
396+
this.chatHistory.push(..._preservedHistory.slice(-keep));
397+
const log = require('./logger');
398+
log.info(`Model switch: preserved ${keep}/${_preservedHistory.length} recent turns due to context limits`);
399+
}
400+
}
401+
376402
// Model info
377403
const paramSize = this._getModelParamSize();
378404
const family = this._getModelFamily();

main/tools/toolParser.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -586,6 +586,7 @@ function _inferFilePath(text, content, lang) {
586586
if (content) {
587587
if (content.includes('<!DOCTYPE') || content.includes('<html')) return 'index.html';
588588
if (content.includes('import React') || content.includes('from "react"')) return 'component.jsx';
589+
if (/^[.#@][a-zA-Z][\w-]*\s*\{/m.test(content) || /@media\s|@keyframes\s|@import\s|:root\s*\{/.test(content)) return 'style.css';
589590
if (content.includes('def ') || content.includes('import ')) return 'script.py';
590591
if (content.trimStart().startsWith('{')) return 'data.json';
591592
}

0 commit comments

Comments
 (0)