fix: remove filter patch, reduce tiny tier tool count 8->6 (root cause fix)

Brendan Gray · Brendan Gray · commit e4e6796dbc7b · 2026-02-26T20:14:22.000-05:00
diff --git a/main/agenticChat.js b/main/agenticChat.js
@@ -1090,7 +1090,6 @@ function register(ctx) {
       let sessionJustRotated = false; // Flag to rebuild prompt after rotation
       let overflowResponseBudgetReduced = false; // Flag: already tried reducing response budget on first-turn overflow
       let forcedToolFunctions = null; // Set by PILLAR 3 refusal recovery to force grammar on next iteration
-      let forceChatModeForRetry = false; // Set on stuck_greeting ROLLBACK — strips tools and uses chat preamble for retry
       let consecutiveEmptyGrammarRetries = 0; // Track grammar failures for text-mode fallback
 
       // ── Execution State Tracking (ported from Pocket Guide) ──
@@ -1385,12 +1384,7 @@ function register(ctx) {
           : modelTier.tier === 'large' ? 5 : 2;
         const useNativeFunctions = (taskType !== 'chat') && iteration <= grammarIterLimit;
         let nativeFunctions = null;
-        if (forceChatModeForRetry) {
-          // stuck_greeting recovery: strip all tools, use plain text with chat preamble
-          nativeFunctions = null;
-          forcedToolFunctions = null;
-          forceChatModeForRetry = false; // One-shot
-        } else if (consecutiveEmptyGrammarRetries >= 1) {
+        if (consecutiveEmptyGrammarRetries >= 1) {
           // Grammar-to-text fallback: model can't produce grammar output, degrade gracefully.
           // Threshold lowered to 1 — the second native function call attempt can hang at the
           // C++ level and never return. One failure is enough to switch to text mode safely.
@@ -1709,16 +1703,7 @@ function register(ctx) {
           }
 
           // Escalating retry strategy
-          if (responseVerdict.reason === 'stuck_greeting') {
-            // Model outputted its trained greeting despite an agentic task.
-            // Retry with the simple chat preamble and NO tools — removes all tool pressure.
-            currentPrompt = {
-              systemContext: buildStaticPrompt('chat'),
-              userMessage: message.substring(0, 500),
-            };
-            forceChatModeForRetry = true;
-            console.log('[AI Chat] Stuck greeting detected — retrying in chat mode (tool-free)');
-          } else if (rollbackRetries === 1) {
+          if (rollbackRetries === 1) {
             // First retry: same prompt, slightly lower temperature for focus
             if (context?.params) context.params.temperature = Math.max((context.params.temperature || 0.7) - 0.2, 0.1);
           } else if (rollbackRetries === 2) {
diff --git a/main/agenticChatHelpers.js b/main/agenticChatHelpers.js
@@ -563,14 +563,6 @@ function evaluateResponse(responseText, functionCalls, taskType, iteration, hasR
     return { verdict: 'ROLLBACK', reason: 'described_not_executed' };
   }
 
-  // ── STUCK GREETING: generic greeting response on an agentic (non-chat) task ──
-  // Tiny models overwhelmed by tool injection context fall back to their trained greeting.
-  // Detect the exact pattern and ROLLBACK so the retry can strip tool pressure.
-  const STUCK_GREETING_RX = /^(Hello!?\s+How\s+can\s+I\s+(assist|help)\s+(you\s+)?(today)?[!?.]*\s*|Hi!?\s+How\s+can\s+I\s+(assist|help)[!?.]*\s*)$/i;
-  if (taskType !== 'chat' && iteration === 1 && STUCK_GREETING_RX.test(text)) {
-    return { verdict: 'ROLLBACK', reason: 'stuck_greeting' };
-  }
-
   // ── DEFAULT: Accept ──
   return { verdict: 'COMMIT', reason: 'default' };
 }
diff --git a/main/modelProfiles.js b/main/modelProfiles.js
@@ -126,7 +126,7 @@ const FAMILY_PROFILES = {
       prompt: { style: 'compact', toolPromptStyle: 'compact', fewShotExamples: 1 },
       thinkTokens: { mode: 'none' },
       _thinkBudgetWhenActive: 128, // used by isThinkingVariant override in llmEngine.js
-      generation: { maxToolsPerTurn: 8 },
+      generation: { maxToolsPerTurn: 6 },
       quirks: { truncatesMidTool: true, poorMultiTool: true },
     },
     small: {
@@ -183,7 +183,7 @@ const FAMILY_PROFILES = {
       context: { effectiveContextSize: 32768, maxResponseTokens: 4096 },
       prompt: { style: 'compact', toolPromptStyle: 'compact', fewShotExamples: 2 },
       thinkTokens: { mode: 'none' }, // Llama 3.2 is not a thinking model
-      generation: { maxToolsPerTurn: 8 },
+      generation: { maxToolsPerTurn: 6 },
       quirks: { truncatesMidTool: true, poorMultiTool: true, loopsFrequently: true },
     },
     small: {
@@ -236,7 +236,7 @@ const FAMILY_PROFILES = {
       context: { effectiveContextSize: 32768, maxResponseTokens: 4096 },
       prompt: { style: 'compact', toolPromptStyle: 'compact' },
       thinkTokens: { mode: 'budget', budget: 128 },
-      generation: { maxToolsPerTurn: 8 },
+      generation: { maxToolsPerTurn: 6 },
       quirks: { loopsFrequently: true, truncatesMidTool: true, poorMultiTool: true },
     },
     small: {
@@ -290,7 +290,7 @@ const FAMILY_PROFILES = {
       context: { effectiveContextSize: 32768, maxResponseTokens: 4096 },
       prompt: { style: 'compact', toolPromptStyle: 'compact', fewShotExamples: 1 },
       thinkTokens: { mode: 'budget', budget: 128 },
-      generation: { maxToolsPerTurn: 8 },
+      generation: { maxToolsPerTurn: 6 },
       quirks: { truncatesMidTool: true, poorMultiTool: true },
     },
     small: {
@@ -337,7 +337,7 @@ const FAMILY_PROFILES = {
       context: { effectiveContextSize: 32768, maxResponseTokens: 4096 },
       prompt: { style: 'compact', toolPromptStyle: 'compact' },
       thinkTokens: { mode: 'budget', budget: 128 },
-      generation: { maxToolsPerTurn: 8 },
+      generation: { maxToolsPerTurn: 6 },
       quirks: { overlyVerbose: true, truncatesMidTool: true },
     },
     small: {
@@ -530,7 +530,7 @@ const FAMILY_PROFILES = {
       sampling: { temperature: 0.40, topP: 0.85, topK: 20, repeatPenalty: 1.12 },
       context: { effectiveContextSize: 32768, maxResponseTokens: 4096 },
       prompt: { style: 'compact', toolPromptStyle: 'compact', fewShotExamples: 1 },
-      generation: { maxToolsPerTurn: 8 },
+      generation: { maxToolsPerTurn: 6 },
     },
     small: {
       // LFM2-2.6B
@@ -613,7 +613,7 @@ const FAMILY_PROFILES = {
       sampling: { temperature: 0.40, topP: 0.85, topK: 20, repeatPenalty: 1.12 },
       context: { effectiveContextSize: 32768, maxResponseTokens: 4096 },
       prompt: { style: 'compact', toolPromptStyle: 'compact', fewShotExamples: 1 },
-      generation: { maxToolsPerTurn: 8 },
+      generation: { maxToolsPerTurn: 6 },
     },
   },
 

Original file line number	Diff line number	Diff line change
`@@ -563,14 +563,6 @@ function evaluateResponse(responseText, functionCalls, taskType, iteration, hasR`
`563`	`563`	`return { verdict: 'ROLLBACK', reason: 'described_not_executed' };`
`564`	`564`	`}`
`565`	`565`
`566`		`- // ── STUCK GREETING: generic greeting response on an agentic (non-chat) task ──`
`567`		`- // Tiny models overwhelmed by tool injection context fall back to their trained greeting.`
`568`		`- // Detect the exact pattern and ROLLBACK so the retry can strip tool pressure.`
`569`		`- const STUCK_GREETING_RX = /^(Hello!?\s+How\s+can\s+I\s+(assist\|help)\s+(you\s+)?(today)?[!?.]\s\|Hi!?\s+How\s+can\s+I\s+(assist\|help)[!?.]\s)$/i;`
`570`		`- if (taskType !== 'chat' && iteration === 1 && STUCK_GREETING_RX.test(text)) {`
`571`		`- return { verdict: 'ROLLBACK', reason: 'stuck_greeting' };`
`572`		`- }`
`573`		`-`
`574`	`566`	`// ── DEFAULT: Accept ──`
`575`	`567`	`return { verdict: 'COMMIT', reason: 'default' };`
`576`	`568`	`}`