@@ -1105,10 +1105,27 @@ function register(ctx) {
11051105 consecutiveEmptyGrammarRetries = 0 ;
11061106 }
11071107
1108- fullResponseText += responseText ;
1108+ // ── Overlap de-duplication for ALL continuation passes ──
1109+ // Detect if model repeated the tail we sent as context.
1110+ // Applied BEFORE accumulating to fullResponseText/displayResponseText
1111+ // so duplicate content never enters the display pipeline.
1112+ let _overlapLen = 0 ;
1113+ if ( _pendingPartialBlock && continuationCount > 0 ) {
1114+ const maxCheck = Math . min ( _pendingPartialBlock . length , responseText . length , 2000 ) ;
1115+ for ( let len = maxCheck ; len >= 20 ; len -- ) {
1116+ const suffix = _pendingPartialBlock . slice ( - len ) ;
1117+ if ( responseText . startsWith ( suffix ) ) { _overlapLen = len ; break ; }
1118+ }
1119+ if ( _overlapLen > 0 ) {
1120+ console . log ( `[AI Chat] Continuation overlap: removed ${ _overlapLen } duplicate chars` ) ;
1121+ }
1122+ }
1123+ const newContent = _overlapLen > 0 ? responseText . slice ( _overlapLen ) : responseText ;
1124+
1125+ fullResponseText += newContent ;
11091126
11101127 // Strip tool fences and raw inline JSON tool calls from display copy
1111- let displayChunk = responseText
1128+ let displayChunk = newContent
11121129 . replace ( / \n ? ` ` ` (?: j s o n | t o o l _ c a l l | t o o l ) \b [ \s \S ] * ?` ` ` \n ? / g, '' )
11131130 . replace ( / \n ? ` ` ` (?: j s o n | t o o l _ c a l l | t o o l ) \b [ \s \S ] * $ / g, '' )
11141131 . replace ( / \[ ? \s * \{ \s * " (?: t o o l | n a m e ) " \s * : \s * " [ ^ " ] * " [ \s \S ] * ?\} \s * \] ? / g, '' )
@@ -1118,17 +1135,18 @@ function register(ctx) {
11181135 }
11191136 displayResponseText += displayChunk ;
11201137
1121- // ── SEAMLESS CONTINUATION ──
1138+ // Correct UI stream buffer: the overlapping tokens were already streamed
1139+ // during generation. Trim them by resetting to iteration start and
1140+ // re-sending just the de-duplicated new content.
1141+ if ( _overlapLen > 0 && mainWindow && ! mainWindow . isDestroyed ( ) ) {
1142+ mainWindow . webContents . send ( 'llm-stream-reset' ) ;
1143+ if ( displayChunk ) mainWindow . webContents . send ( 'llm-token' , displayChunk ) ;
1144+ }
1145+
1146+ // ── SEAMLESS CONTINUATION — stitch for MCP tool detection ──
11221147 let _stitchedForMcp ;
11231148 if ( _pendingPartialBlock ) {
1124- // Overlap de-duplication: detect if model repeated the tail we sent
1125- let overlap = 0 ;
1126- const maxCheck = Math . min ( _pendingPartialBlock . length , responseText . length , 2000 ) ;
1127- for ( let len = maxCheck ; len >= 20 ; len -- ) {
1128- const suffix = _pendingPartialBlock . slice ( - len ) ;
1129- if ( responseText . startsWith ( suffix ) ) { overlap = len ; break ; }
1130- }
1131- _stitchedForMcp = _pendingPartialBlock + responseText . slice ( overlap ) ;
1149+ _stitchedForMcp = _pendingPartialBlock + responseText . slice ( _overlapLen ) ;
11321150
11331151 // Fence-aware cleanup: if stitching produced duplicate ```json fences,
11341152 // keep only the LAST complete one (the continuation's fresh attempt)
@@ -1289,6 +1307,7 @@ function register(ctx) {
12891307 const tailForModel = partialFence . length > maxTailChars ? partialFence . slice ( - maxTailChars ) : partialFence ;
12901308 continuationMsg = `${ taskHint } ${ fileManifest } [Continue the tool call JSON from exactly where it was cut. Output ONLY the JSON continuation. Do NOT restart the tool call. Continue from:\n${ tailForModel } ]` ;
12911309 } else {
1310+ _pendingPartialBlock = responseText ; // enable overlap detection for ALL continuation types
12921311 const tailForModel = responseText . length > maxTailChars ? responseText . slice ( - maxTailChars ) : responseText ;
12931312 continuationMsg = `${ taskHint } ${ fileManifest } [Continue your response exactly where you left off. Do not restart or repeat content. Here is the end of what you wrote:\n${ tailForModel } ]` ;
12941313 }
@@ -1412,15 +1431,38 @@ function register(ctx) {
14121431 const _unclosedFenceMatch = ! hasCodeBlocks && responseText . match ( / ` ` ` (?: h t m l ? | c s s | j a v a s c r i p t | j s | t y p e s c r i p t | t s | p y t h o n | p y | j s o n ) \s * \n ( [ \s \S ] { 500 , } ) $ / i) ;
14131432 const hasUnclosedLargeBlock = ! ! _unclosedFenceMatch ;
14141433 // Detect raw HTML/code dumped without fences (model obeyed "no code blocks" but didn't use write_file)
1415- const hasRawCodeDump = ! hasCodeBlocks && ! hasUnclosedLargeBlock && responseText . length > 500 &&
1416- ( / < h t m l [ \s > ] / i. test ( responseText ) || / < s t y l e [ \s > ] / i. test ( responseText ) || / < s c r i p t [ \s > ] / i. test ( responseText ) ||
1417- ( / < \w + [ \s > ] / . test ( responseText ) && ( responseText . match ( / < \w + / g) || [ ] ) . length > 10 ) ) ;
1418- if ( ( hasCodeBlocks || hasUnclosedLargeBlock || hasRawCodeDump ) && nudgesRemaining > 0 && iteration < MAX_AGENTIC_ITERATIONS - 1 ) {
1434+ // Requires STRUCTURAL HTML document tags (<!DOCTYPE, <html, <head, <body) to avoid false
1435+ // positives on plain-text descriptions that mention individual element names like <div>, <section>.
1436+ const hasRawCodeDump = ! hasCodeBlocks && ! hasUnclosedLargeBlock && responseText . length > 500 && (
1437+ ( / < h t m l [ \s > ] / i. test ( responseText ) && ( / < h e a d [ \s > ] / i. test ( responseText ) || / < b o d y [ \s > ] / i. test ( responseText ) ) ) ||
1438+ ( / < s t y l e [ \s > ] / i. test ( responseText ) && ( responseText . match ( / [ { } ; ] \s * \w + \s * : / g) || [ ] ) . length > 5 ) ||
1439+ ( / < s c r i p t [ \s > ] / i. test ( responseText ) && ( responseText . match ( / (?: f u n c t i o n | c o n s t | l e t | v a r | = > ) / g) || [ ] ) . length > 3 )
1440+ ) ;
1441+
1442+ // Skip nudge when context is critically small — resetSession can hang on degraded KV cache
1443+ // and the model can't do better with even less context.
1444+ let contextTooSmallForNudge = false ;
1445+ if ( totalCtx <= 4096 ) {
1446+ let _nudgeUsed = 0 ;
1447+ try { if ( llmEngine . sequence ?. nTokens ) _nudgeUsed = llmEngine . sequence . nTokens ; } catch ( _ ) { }
1448+ if ( _nudgeUsed > 0 && _nudgeUsed / totalCtx > 0.50 ) contextTooSmallForNudge = true ;
1449+ }
1450+
1451+ if ( ( hasCodeBlocks || hasUnclosedLargeBlock || hasRawCodeDump ) && nudgesRemaining > 0 && iteration < MAX_AGENTIC_ITERATIONS - 1 && ! contextTooSmallForNudge ) {
14191452 nudgesRemaining -- ;
1453+ console . log ( `[AI Chat] Code-dump nudge firing (codeBlocks=${ hasCodeBlocks } , unclosed=${ hasUnclosedLargeBlock } , rawDump=${ hasRawCodeDump } )` ) ;
14201454 // Strip the raw code dump from accumulated response to free context budget.
14211455 // The model will regenerate the content properly via write_file.
14221456 fullResponseText = '' ;
1423- try { await llmEngine . resetSession ( true ) ; } catch ( _ ) { }
1457+ // Timeout-guarded resetSession to prevent indefinite hang from degraded C++ KV cache
1458+ try {
1459+ await Promise . race ( [
1460+ llmEngine . resetSession ( true ) ,
1461+ new Promise ( ( _ , rej ) => setTimeout ( ( ) => rej ( new Error ( 'resetSession timeout' ) ) , 8000 ) ) ,
1462+ ] ) ;
1463+ } catch ( resetErr ) {
1464+ console . warn ( `[AI Chat] Code-dump nudge resetSession failed: ${ resetErr . message } ` ) ;
1465+ }
14241466 sessionJustRotated = true ;
14251467 currentPrompt = {
14261468 systemContext : buildStaticPrompt ( ) ,
0 commit comments