Skip to content

Commit e4e6796

Browse files
author
Brendan Gray
committed
fix: remove filter patch, reduce tiny tier tool count 8->6 (root cause fix)
1 parent 1960e2e commit e4e6796

3 files changed

Lines changed: 9 additions & 32 deletions

File tree

main/agenticChat.js

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1090,7 +1090,6 @@ function register(ctx) {
10901090
let sessionJustRotated = false; // Flag to rebuild prompt after rotation
10911091
let overflowResponseBudgetReduced = false; // Flag: already tried reducing response budget on first-turn overflow
10921092
let forcedToolFunctions = null; // Set by PILLAR 3 refusal recovery to force grammar on next iteration
1093-
let forceChatModeForRetry = false; // Set on stuck_greeting ROLLBACK — strips tools and uses chat preamble for retry
10941093
let consecutiveEmptyGrammarRetries = 0; // Track grammar failures for text-mode fallback
10951094

10961095
// ── Execution State Tracking (ported from Pocket Guide) ──
@@ -1385,12 +1384,7 @@ function register(ctx) {
13851384
: modelTier.tier === 'large' ? 5 : 2;
13861385
const useNativeFunctions = (taskType !== 'chat') && iteration <= grammarIterLimit;
13871386
let nativeFunctions = null;
1388-
if (forceChatModeForRetry) {
1389-
// stuck_greeting recovery: strip all tools, use plain text with chat preamble
1390-
nativeFunctions = null;
1391-
forcedToolFunctions = null;
1392-
forceChatModeForRetry = false; // One-shot
1393-
} else if (consecutiveEmptyGrammarRetries >= 1) {
1387+
if (consecutiveEmptyGrammarRetries >= 1) {
13941388
// Grammar-to-text fallback: model can't produce grammar output, degrade gracefully.
13951389
// Threshold lowered to 1 — the second native function call attempt can hang at the
13961390
// C++ level and never return. One failure is enough to switch to text mode safely.
@@ -1709,16 +1703,7 @@ function register(ctx) {
17091703
}
17101704

17111705
// Escalating retry strategy
1712-
if (responseVerdict.reason === 'stuck_greeting') {
1713-
// Model outputted its trained greeting despite an agentic task.
1714-
// Retry with the simple chat preamble and NO tools — removes all tool pressure.
1715-
currentPrompt = {
1716-
systemContext: buildStaticPrompt('chat'),
1717-
userMessage: message.substring(0, 500),
1718-
};
1719-
forceChatModeForRetry = true;
1720-
console.log('[AI Chat] Stuck greeting detected — retrying in chat mode (tool-free)');
1721-
} else if (rollbackRetries === 1) {
1706+
if (rollbackRetries === 1) {
17221707
// First retry: same prompt, slightly lower temperature for focus
17231708
if (context?.params) context.params.temperature = Math.max((context.params.temperature || 0.7) - 0.2, 0.1);
17241709
} else if (rollbackRetries === 2) {

main/agenticChatHelpers.js

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -563,14 +563,6 @@ function evaluateResponse(responseText, functionCalls, taskType, iteration, hasR
563563
return { verdict: 'ROLLBACK', reason: 'described_not_executed' };
564564
}
565565

566-
// ── STUCK GREETING: generic greeting response on an agentic (non-chat) task ──
567-
// Tiny models overwhelmed by tool injection context fall back to their trained greeting.
568-
// Detect the exact pattern and ROLLBACK so the retry can strip tool pressure.
569-
const STUCK_GREETING_RX = /^(Hello!?\s+How\s+can\s+I\s+(assist|help)\s+(you\s+)?(today)?[!?.]*\s*|Hi!?\s+How\s+can\s+I\s+(assist|help)[!?.]*\s*)$/i;
570-
if (taskType !== 'chat' && iteration === 1 && STUCK_GREETING_RX.test(text)) {
571-
return { verdict: 'ROLLBACK', reason: 'stuck_greeting' };
572-
}
573-
574566
// ── DEFAULT: Accept ──
575567
return { verdict: 'COMMIT', reason: 'default' };
576568
}

main/modelProfiles.js

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ const FAMILY_PROFILES = {
126126
prompt: { style: 'compact', toolPromptStyle: 'compact', fewShotExamples: 1 },
127127
thinkTokens: { mode: 'none' },
128128
_thinkBudgetWhenActive: 128, // used by isThinkingVariant override in llmEngine.js
129-
generation: { maxToolsPerTurn: 8 },
129+
generation: { maxToolsPerTurn: 6 },
130130
quirks: { truncatesMidTool: true, poorMultiTool: true },
131131
},
132132
small: {
@@ -183,7 +183,7 @@ const FAMILY_PROFILES = {
183183
context: { effectiveContextSize: 32768, maxResponseTokens: 4096 },
184184
prompt: { style: 'compact', toolPromptStyle: 'compact', fewShotExamples: 2 },
185185
thinkTokens: { mode: 'none' }, // Llama 3.2 is not a thinking model
186-
generation: { maxToolsPerTurn: 8 },
186+
generation: { maxToolsPerTurn: 6 },
187187
quirks: { truncatesMidTool: true, poorMultiTool: true, loopsFrequently: true },
188188
},
189189
small: {
@@ -236,7 +236,7 @@ const FAMILY_PROFILES = {
236236
context: { effectiveContextSize: 32768, maxResponseTokens: 4096 },
237237
prompt: { style: 'compact', toolPromptStyle: 'compact' },
238238
thinkTokens: { mode: 'budget', budget: 128 },
239-
generation: { maxToolsPerTurn: 8 },
239+
generation: { maxToolsPerTurn: 6 },
240240
quirks: { loopsFrequently: true, truncatesMidTool: true, poorMultiTool: true },
241241
},
242242
small: {
@@ -290,7 +290,7 @@ const FAMILY_PROFILES = {
290290
context: { effectiveContextSize: 32768, maxResponseTokens: 4096 },
291291
prompt: { style: 'compact', toolPromptStyle: 'compact', fewShotExamples: 1 },
292292
thinkTokens: { mode: 'budget', budget: 128 },
293-
generation: { maxToolsPerTurn: 8 },
293+
generation: { maxToolsPerTurn: 6 },
294294
quirks: { truncatesMidTool: true, poorMultiTool: true },
295295
},
296296
small: {
@@ -337,7 +337,7 @@ const FAMILY_PROFILES = {
337337
context: { effectiveContextSize: 32768, maxResponseTokens: 4096 },
338338
prompt: { style: 'compact', toolPromptStyle: 'compact' },
339339
thinkTokens: { mode: 'budget', budget: 128 },
340-
generation: { maxToolsPerTurn: 8 },
340+
generation: { maxToolsPerTurn: 6 },
341341
quirks: { overlyVerbose: true, truncatesMidTool: true },
342342
},
343343
small: {
@@ -530,7 +530,7 @@ const FAMILY_PROFILES = {
530530
sampling: { temperature: 0.40, topP: 0.85, topK: 20, repeatPenalty: 1.12 },
531531
context: { effectiveContextSize: 32768, maxResponseTokens: 4096 },
532532
prompt: { style: 'compact', toolPromptStyle: 'compact', fewShotExamples: 1 },
533-
generation: { maxToolsPerTurn: 8 },
533+
generation: { maxToolsPerTurn: 6 },
534534
},
535535
small: {
536536
// LFM2-2.6B
@@ -613,7 +613,7 @@ const FAMILY_PROFILES = {
613613
sampling: { temperature: 0.40, topP: 0.85, topK: 20, repeatPenalty: 1.12 },
614614
context: { effectiveContextSize: 32768, maxResponseTokens: 4096 },
615615
prompt: { style: 'compact', toolPromptStyle: 'compact', fewShotExamples: 1 },
616-
generation: { maxToolsPerTurn: 8 },
616+
generation: { maxToolsPerTurn: 6 },
617617
},
618618
},
619619

0 commit comments

Comments
 (0)