|
1 | | -/** |
| 1 | +/** |
2 | 2 | * Agentic AI Chat Handler — the core conversational loop with RAG, MCP tools, memory, and browser automation. |
3 | 3 | * Also contains the find-bug analysis handler. |
4 | 4 | */ |
@@ -715,14 +715,17 @@ function register(ctx) { |
715 | 715 | // ── Context budget viability check ── |
716 | 716 | // On extremely constrained contexts (e.g., 3840 ctx models), verify the generation |
717 | 717 | // budget is viable. If system prompt alone leaves < 512 tokens for response + user |
718 | | - // message, log a diagnostic warning and cap rotation count to prevent the |
719 | | - // "rotate → forget → re-emit → rotate" death spiral. |
| 718 | + // message, log a diagnostic warning. |
720 | 719 | const _viableResponseBudget = totalCtx - sysPromptReserve; |
721 | 720 | if (_viableResponseBudget < 512) { |
722 | | - console.log(`[AI Chat] Context budget WARNING: totalCtx=${totalCtx}, sysReserve=${sysPromptReserve}, viable response budget=${_viableResponseBudget} tokens (<512). Reducing max rotations to prevent death spiral.`); |
| 721 | + console.log(`[AI Chat] Context budget WARNING: totalCtx=${totalCtx}, sysReserve=${sysPromptReserve}, viable response budget=${_viableResponseBudget} tokens (<512).`); |
723 | 722 | } |
724 | | - // Scale max rotations based on context size to prevent oscillation on tiny contexts |
725 | | - const effectiveMaxRotations = totalCtx < 4096 ? 3 : (totalCtx < 8192 ? 5 : MAX_CONTEXT_ROTATIONS); |
| 723 | + // Max context rotations — high default so large tasks (10K+ lines of code) can |
| 724 | + // rotate many times. For tiny contexts (<4096) we cap lower to prevent the |
| 725 | + // "rotate → forget → re-emit → rotate" death spiral where each rotation produces |
| 726 | + // essentially the same prompt. |
| 727 | + const MAX_CONTEXT_ROTATIONS = 50; |
| 728 | + const effectiveMaxRotations = totalCtx < 4096 ? 5 : (totalCtx < 8192 ? 15 : MAX_CONTEXT_ROTATIONS); |
726 | 729 |
|
727 | 730 | // Task-type routing is handled by the model via system prompt — always return 'general' |
728 | 731 | // so the model receives full tool context. The regex classifier was removed because |
@@ -959,7 +962,6 @@ function register(ctx) { |
959 | 962 | const toolFailCounts = {}; // Track per-tool failure counts for enrichErrorFeedback |
960 | 963 | let nudgesRemaining = 3; // Allow 3 nudges when model responds with text instead of tool calls |
961 | 964 | let contextRotations = 0; // Track how many times we've rotated context |
962 | | - const MAX_CONTEXT_ROTATIONS = 10; // Allow up to 10 rotations for long tasks |
963 | 965 | let lastConvSummary = ''; // Conversation summary from last rotation |
964 | 966 | let sessionJustRotated = false; // Flag to rebuild prompt after rotation |
965 | 967 | let overflowResponseBudgetReduced = false; // Flag: already tried reducing response budget on first-turn overflow |
@@ -1878,10 +1880,58 @@ function register(ctx) { |
1878 | 1880 | } |
1879 | 1881 | _contContextPct = _contUsed / totalCtx; |
1880 | 1882 | } catch (_) {} |
1881 | | - if (_contContextPct > 0.70) { |
1882 | | - console.log(`[AI Chat] Seamless continuation aborted: context at ${Math.round(_contContextPct * 100)}% (>70% budget). Rotating instead.`); |
| 1883 | + // Use a higher budget threshold when mid-tool-call (unclosed fence). |
| 1884 | + // Continuation prompts for unclosed fences are only ~200 chars of tail, |
| 1885 | + // so the NEXT iteration's context will be much smaller than current usage. |
| 1886 | + // Aborting at 70% while the model is mid-JSON loses the entire tool call. |
| 1887 | + // For maxTokens truncation (no fence), keep the conservative 70% limit. |
| 1888 | + const _contBudgetLimit = _hasUnclosedToolFence ? 0.92 : 0.70; |
| 1889 | + if (_contContextPct > _contBudgetLimit) { |
| 1890 | + console.log(`[AI Chat] Seamless continuation aborted: context at ${Math.round(_contContextPct * 100)}% (>${Math.round(_contBudgetLimit * 100)}% budget${_hasUnclosedToolFence ? ', mid-tool-call' : ''}). Rotating instead.`); |
1883 | 1891 | continuationCount = 0; |
1884 | | - // Fall through to normal post-generation compaction / rotation below |
| 1892 | + // ── Salvage partial tool call content on forced abort ── |
| 1893 | + // When aborting with an unclosed tool fence, the accumulated content in |
| 1894 | + // _stitchedForMcp contains a partial write_file JSON that processResponse |
| 1895 | + // can't parse. Instead of losing all content, attempt to extract the |
| 1896 | + // filePath and content from the partial JSON and inject a salvaged tool call. |
| 1897 | + if (_hasUnclosedToolFence && _stitchedForMcp) { |
| 1898 | + const _salvageFenceContent = _stitchedForMcp.slice(_fenceIdx); |
| 1899 | + // Try to extract filePath and content from incomplete JSON |
| 1900 | + const _fpMatch = _salvageFenceContent.match(/"filePath"\s*:\s*"([^"]+)"/); |
| 1901 | + const _ctMatch = _salvageFenceContent.match(/"content"\s*:\s*"([\s\S]+)/); |
| 1902 | + if (_fpMatch && _ctMatch && _ctMatch[1].length > 100) { |
| 1903 | + // Extract content: unescape what we can, trim the trailing incomplete part |
| 1904 | + let _salvageContent = _ctMatch[1]; |
| 1905 | + // Remove trailing incomplete escape sequences or unterminated strings |
| 1906 | + // Find the last complete line (before any dangling quote/brace) |
| 1907 | + const _lastNewline = _salvageContent.lastIndexOf('\\n'); |
| 1908 | + if (_lastNewline > 50) { |
| 1909 | + _salvageContent = _salvageContent.substring(0, _lastNewline); |
| 1910 | + } |
| 1911 | + // Unescape JSON string escapes |
| 1912 | + try { |
| 1913 | + _salvageContent = JSON.parse('"' + _salvageContent + '"'); |
| 1914 | + } catch (_) { |
| 1915 | + // Manual unescape for common cases |
| 1916 | + _salvageContent = _salvageContent.replace(/\\n/g, '\n').replace(/\\t/g, '\t').replace(/\\"/g, '"').replace(/\\\\/g, '\\'); |
| 1917 | + } |
| 1918 | + if (_salvageContent.length > 100) { |
| 1919 | + console.log(`[AI Chat] Salvaged ${_salvageContent.length} chars of write_file content for "${_fpMatch[1]}" from aborted continuation`); |
| 1920 | + // Close the fence and reconstruct as a complete tool call for processResponse |
| 1921 | + const _salvageJson = JSON.stringify({ |
| 1922 | + tool: 'write_file', |
| 1923 | + params: { filePath: _fpMatch[1], content: _salvageContent } |
| 1924 | + }); |
| 1925 | + // Replace responseText with the reconstructed complete tool call |
| 1926 | + // so processResponse can parse and execute it |
| 1927 | + _pendingPartialBlock = null; // Clear stale partial |
| 1928 | + // Inject as a fenced block that processResponse can parse |
| 1929 | + responseText = '```json\n' + _salvageJson + '\n```'; |
| 1930 | + fullResponseText = fullResponseText.slice(0, fullResponseText.length - (result.text || '').length) + responseText; |
| 1931 | + } |
| 1932 | + } |
| 1933 | + } |
| 1934 | + // Fall through to normal post-generation compaction / rotation below |
1885 | 1935 | } else { |
1886 | 1936 | continuationCount++; |
1887 | 1937 | // Forward-progress guard: if the last 3 consecutive continuations each |
|
0 commit comments