|
| 1 | +import { buildPreCompactionSummary, judgeCompactionQuality } from "./compaction-judge.js"; |
| 2 | +import { extractPreCompactionKnowledge } from "./knowledge-extractor.js"; |
| 3 | +import { trySessionMemoryCompact } from "./session-memory-compact.js"; |
| 4 | +import type { SessionState } from "./session-state.js"; |
| 5 | +import type { ContextManager, EventBus, LLMProvider, Message } from "../core/index.js"; |
| 6 | +import { MessageRole, estimateMessageTokens, estimateTokens } from "../core/index.js"; |
| 7 | + |
| 8 | +const PINNED_TOKEN_BUDGET = 80_000; |
| 9 | +const APPROACHING_LIMIT_RATIO = 0.6; |
| 10 | +const REACTIVE_COMPACT_MAX_FAILURES = 3; |
| 11 | + |
| 12 | +interface CompactionHost { |
| 13 | + readonly contextManager: ContextManager | null; |
| 14 | + readonly config: { |
| 15 | + readonly context: { |
| 16 | + readonly triggerRatio: number; |
| 17 | + }; |
| 18 | + }; |
| 19 | + readonly sessionState: SessionState | null; |
| 20 | + readonly bus: EventBus; |
| 21 | + readonly provider: LLMProvider; |
| 22 | + messages: Message[]; |
| 23 | + estimatedTokens: number; |
| 24 | + lastReportedInputTokens: number; |
| 25 | + approachingLimitWarned: boolean; |
| 26 | + iterations: number; |
| 27 | + getEffectiveContextBudget(): number; |
| 28 | + pushMessage(message: Message): void; |
| 29 | + pruneToolOutputs( |
| 30 | + currentTokens: number, |
| 31 | + threshold: number, |
| 32 | + ): { savedTokens: number; prunedCount: number }; |
| 33 | + resetPostCompactionState(full: boolean): void; |
| 34 | + injectSessionState(knownTokenEstimate?: number): void; |
| 35 | + reinjectSkillContent(): void; |
| 36 | +} |
| 37 | + |
| 38 | +interface CompactionStats { |
| 39 | + readonly estimatedTokens: number; |
| 40 | + readonly maxTokens: number; |
| 41 | + readonly threshold: number; |
| 42 | + readonly overhead: number; |
| 43 | +} |
| 44 | + |
| 45 | +interface ReactiveCompactionHost { |
| 46 | + readonly bus: EventBus; |
| 47 | + readonly sessionState: SessionState | null; |
| 48 | + readonly provider: LLMProvider; |
| 49 | + messages: Message[]; |
| 50 | + iterations: number; |
| 51 | + reactiveCompactFailures: number; |
| 52 | + compactionCycles: number; |
| 53 | + microcompact(): void; |
| 54 | + maybeCompactContext(options?: { force?: boolean }): Promise<void>; |
| 55 | +} |
| 56 | + |
| 57 | +export async function maybeCompactTaskLoopContext( |
| 58 | + loop: CompactionHost, |
| 59 | + options?: { force?: boolean }, |
| 60 | +): Promise<void> { |
| 61 | + const stats = getCompactionStats(loop); |
| 62 | + if (!stats) return; |
| 63 | + if (skipCompaction(loop, stats, options)) return; |
| 64 | + |
| 65 | + const pruneResult = tryPruneToolOutputs(loop, stats, options); |
| 66 | + if (pruneResult.completed) return; |
| 67 | + |
| 68 | + if (trySessionMemoryCompaction(loop, stats)) return; |
| 69 | + await runFullCompaction(loop, stats, pruneResult); |
| 70 | +} |
| 71 | + |
| 72 | +export async function reactiveCompactTaskLoop(loop: ReactiveCompactionHost): Promise<void> { |
| 73 | + if (loop.reactiveCompactFailures >= REACTIVE_COMPACT_MAX_FAILURES) { |
| 74 | + emitReactiveCompactionCircuitBreaker(loop.bus); |
| 75 | + throw new Error("Reactive compaction circuit breaker: too many consecutive compaction failures"); |
| 76 | + } |
| 77 | + |
| 78 | + try { |
| 79 | + loop.microcompact(); |
| 80 | + await loop.maybeCompactContext({ force: true }); |
| 81 | + loop.reactiveCompactFailures = 0; |
| 82 | + loop.compactionCycles++; |
| 83 | + await maybeExtractReactiveCompactionKnowledge(loop); |
| 84 | + } catch (err) { |
| 85 | + loop.reactiveCompactFailures++; |
| 86 | + throw err; |
| 87 | + } |
| 88 | +} |
| 89 | + |
| 90 | +function emitReactiveCompactionCircuitBreaker(bus: EventBus): void { |
| 91 | + bus.emit("error", { |
| 92 | + message: `Reactive compaction circuit breaker tripped after ${REACTIVE_COMPACT_MAX_FAILURES} consecutive failures.`, |
| 93 | + code: "REACTIVE_COMPACT_CIRCUIT_BREAKER", |
| 94 | + fatal: true, |
| 95 | + }); |
| 96 | +} |
| 97 | + |
| 98 | +async function maybeExtractReactiveCompactionKnowledge(loop: ReactiveCompactionHost): Promise<void> { |
| 99 | + if (loop.compactionCycles % 2 !== 0 || !loop.sessionState) return; |
| 100 | + try { |
| 101 | + const firstUser = loop.messages.find((message) => message.role === MessageRole.USER && message.content); |
| 102 | + const summary = buildPreCompactionSummary(loop.sessionState, loop.messages, loop.iterations); |
| 103 | + const result = await extractPreCompactionKnowledge( |
| 104 | + loop.provider, |
| 105 | + summary, |
| 106 | + loop.sessionState, |
| 107 | + loop.messages, |
| 108 | + firstUser?.content ?? null, |
| 109 | + ); |
| 110 | + if (result) { |
| 111 | + for (const entry of result.entries) { |
| 112 | + loop.sessionState.addKnowledge(entry.key, entry.content, loop.iterations); |
| 113 | + } |
| 114 | + } |
| 115 | + } catch { |
| 116 | + // Session memory extraction is best-effort during reactive recovery. |
| 117 | + } |
| 118 | +} |
| 119 | + |
| 120 | +function getCompactionStats(loop: CompactionHost): CompactionStats | null { |
| 121 | + if (!loop.contextManager) return null; |
| 122 | + const maxTokens = loop.getEffectiveContextBudget(); |
| 123 | + if (maxTokens <= 0) return null; |
| 124 | + const estimatedTokens = Math.max(loop.estimatedTokens, loop.lastReportedInputTokens); |
| 125 | + const threshold = maxTokens * loop.config.context.triggerRatio; |
| 126 | + const overhead = Math.max(0, loop.lastReportedInputTokens - loop.estimatedTokens); |
| 127 | + return { estimatedTokens, maxTokens, threshold, overhead }; |
| 128 | +} |
| 129 | + |
| 130 | +function skipCompaction( |
| 131 | + loop: CompactionHost, |
| 132 | + stats: CompactionStats, |
| 133 | + options?: { force?: boolean }, |
| 134 | +) { |
| 135 | + if (options?.force || stats.estimatedTokens > stats.threshold) return false; |
| 136 | + maybeWarnApproachingLimit(loop, stats.estimatedTokens, stats.threshold); |
| 137 | + return true; |
| 138 | +} |
| 139 | + |
| 140 | +function maybeWarnApproachingLimit( |
| 141 | + loop: CompactionHost, |
| 142 | + estimatedTokens: number, |
| 143 | + threshold: number, |
| 144 | +): void { |
| 145 | + const warningThreshold = threshold * APPROACHING_LIMIT_RATIO; |
| 146 | + if (loop.approachingLimitWarned || estimatedTokens <= warningThreshold || !loop.sessionState) return; |
| 147 | + loop.approachingLimitWarned = true; |
| 148 | + loop.pushMessage({ |
| 149 | + role: MessageRole.SYSTEM, |
| 150 | + content: "Context is filling up. You MUST persist any analysis conclusions or review findings NOW using save_finding. After context pruning, old tool outputs will be replaced with summaries. Do NOT re-read files already listed in session state — rely on the summaries and findings you've saved.", |
| 151 | + }); |
| 152 | +} |
| 153 | + |
| 154 | +function enforcePinnedTokenBudget(loop: CompactionHost): void { |
| 155 | + let pinnedTokens = 0; |
| 156 | + const pinnedIndices: number[] = []; |
| 157 | + for (let i = 0; i < loop.messages.length; i++) { |
| 158 | + const message = loop.messages[i]!; |
| 159 | + if (!message.pinned) continue; |
| 160 | + pinnedTokens += estimateTokens(message.content ?? ""); |
| 161 | + pinnedIndices.push(i); |
| 162 | + } |
| 163 | + unpinOldestOverBudget(loop, pinnedIndices, pinnedTokens); |
| 164 | +} |
| 165 | + |
| 166 | +function unpinOldestOverBudget( |
| 167 | + loop: CompactionHost, |
| 168 | + pinnedIndices: ReadonlyArray<number>, |
| 169 | + pinnedTokens: number, |
| 170 | +): void { |
| 171 | + let remainingTokens = pinnedTokens; |
| 172 | + for (const index of pinnedIndices) { |
| 173 | + if (remainingTokens <= PINNED_TOKEN_BUDGET) break; |
| 174 | + const message = loop.messages[index]!; |
| 175 | + remainingTokens -= estimateTokens(message.content ?? ""); |
| 176 | + loop.messages[index] = { ...message, pinned: undefined }; |
| 177 | + } |
| 178 | +} |
| 179 | + |
| 180 | +function tryPruneToolOutputs( |
| 181 | + loop: CompactionHost, |
| 182 | + stats: CompactionStats, |
| 183 | + options?: { force?: boolean }, |
| 184 | +) { |
| 185 | + enforcePinnedTokenBudget(loop); |
| 186 | + const messageThreshold = Math.max(0, stats.threshold - stats.overhead); |
| 187 | + const result = loop.pruneToolOutputs(loop.estimatedTokens, messageThreshold); |
| 188 | + if (result.savedTokens <= 0) return { ...result, completed: false }; |
| 189 | + |
| 190 | + loop.resetPostCompactionState(false); |
| 191 | + loop.injectSessionState(); |
| 192 | + loop.estimatedTokens = estimateMessageTokens(loop.messages); |
| 193 | + const postPruneTokens = loop.estimatedTokens; |
| 194 | + if (!options?.force && postPruneTokens + stats.overhead <= stats.threshold) { |
| 195 | + emitCompacted(loop, stats.estimatedTokens, postPruneTokens, result); |
| 196 | + loop.resetPostCompactionState(true); |
| 197 | + return { ...result, completed: true }; |
| 198 | + } |
| 199 | + return { ...result, completed: false }; |
| 200 | +} |
| 201 | + |
| 202 | +function trySessionMemoryCompaction(loop: CompactionHost, stats: CompactionStats): boolean { |
| 203 | + if (!loop.sessionState?.hasContent()) return false; |
| 204 | + const result = trySessionMemoryCompact(loop.messages, loop.sessionState, stats.maxTokens); |
| 205 | + if (!result.success) return false; |
| 206 | + |
| 207 | + loop.messages = result.messages; |
| 208 | + loop.injectSessionState(); |
| 209 | + loop.reinjectSkillContent(); |
| 210 | + loop.resetPostCompactionState(true); |
| 211 | + loop.estimatedTokens = estimateMessageTokens(loop.messages); |
| 212 | + loop.bus.emit("context:compacted", { |
| 213 | + removedCount: 0, |
| 214 | + estimatedTokens: loop.estimatedTokens, |
| 215 | + tokensBefore: stats.estimatedTokens, |
| 216 | + }); |
| 217 | + maybeEmitAggressiveCompactionWarning(loop, stats.estimatedTokens, loop.estimatedTokens, " via session memory"); |
| 218 | + return true; |
| 219 | +} |
| 220 | + |
| 221 | +async function runFullCompaction( |
| 222 | + loop: CompactionHost, |
| 223 | + stats: CompactionStats, |
| 224 | + pruneResult: { readonly savedTokens: number; readonly prunedCount: number }, |
| 225 | +): Promise<void> { |
| 226 | + const summary = buildPreCompactionSummary(loop.sessionState, loop.messages, loop.iterations); |
| 227 | + await extractKnowledgeBeforeCompaction(loop, summary); |
| 228 | + loop.bus.emit("context:compacting", { |
| 229 | + estimatedTokens: stats.estimatedTokens, |
| 230 | + maxTokens: stats.maxTokens, |
| 231 | + }); |
| 232 | + try { |
| 233 | + await truncateWithContextManager(loop, stats, summary, pruneResult); |
| 234 | + } catch (err) { |
| 235 | + loop.bus.emit("error", { |
| 236 | + message: `Context compaction failed: ${(err as Error).message}`, |
| 237 | + code: "COMPACTION_FAILED", |
| 238 | + fatal: true, |
| 239 | + }); |
| 240 | + throw err; |
| 241 | + } |
| 242 | +} |
| 243 | + |
| 244 | +async function truncateWithContextManager( |
| 245 | + loop: CompactionHost, |
| 246 | + stats: CompactionStats, |
| 247 | + summary: string, |
| 248 | + pruneResult: { readonly savedTokens: number; readonly prunedCount: number }, |
| 249 | +): Promise<void> { |
| 250 | + const result = await loop.contextManager!.truncateAsync(loop.messages, stats.maxTokens, { force: true }); |
| 251 | + if (result.truncated) { |
| 252 | + await applyFullCompactionResult(loop, stats, summary, pruneResult, result); |
| 253 | + return; |
| 254 | + } |
| 255 | + if (result.estimatedTokens > stats.maxTokens) { |
| 256 | + throw new Error(`Compaction did not fit budget: ${result.estimatedTokens} > ${stats.maxTokens}`); |
| 257 | + } |
| 258 | +} |
| 259 | + |
| 260 | +async function applyFullCompactionResult( |
| 261 | + loop: CompactionHost, |
| 262 | + stats: CompactionStats, |
| 263 | + summary: string, |
| 264 | + pruneResult: { readonly savedTokens: number; readonly prunedCount: number }, |
| 265 | + result: { readonly messages: ReadonlyArray<Message>; readonly removedCount: number }, |
| 266 | +): Promise<void> { |
| 267 | + loop.messages = [...result.messages]; |
| 268 | + loop.injectSessionState(); |
| 269 | + loop.reinjectSkillContent(); |
| 270 | + loop.resetPostCompactionState(true); |
| 271 | + loop.estimatedTokens = estimateMessageTokens(loop.messages); |
| 272 | + emitCompacted(loop, stats.estimatedTokens, loop.estimatedTokens, pruneResult, result.removedCount); |
| 273 | + maybeEmitAggressiveCompactionWarning(loop, stats.estimatedTokens, loop.estimatedTokens, ""); |
| 274 | + await maybeWarnCompactionQuality(loop, summary); |
| 275 | + maybeAppendContinuationGuidance(loop); |
| 276 | + if (loop.estimatedTokens > stats.maxTokens) { |
| 277 | + throw new Error(`Compaction did not fit budget: ${loop.estimatedTokens} > ${stats.maxTokens}`); |
| 278 | + } |
| 279 | +} |
| 280 | + |
| 281 | +async function extractKnowledgeBeforeCompaction(loop: CompactionHost, summary: string): Promise<void> { |
| 282 | + if (!loop.sessionState) return; |
| 283 | + const firstUser = loop.messages.find((message) => message.role === MessageRole.USER && message.content); |
| 284 | + const result = await extractPreCompactionKnowledge( |
| 285 | + loop.provider, |
| 286 | + summary, |
| 287 | + loop.sessionState, |
| 288 | + loop.messages, |
| 289 | + firstUser?.content ?? null, |
| 290 | + ); |
| 291 | + if (!result) return; |
| 292 | + for (const entry of result.entries) { |
| 293 | + loop.sessionState.addKnowledge(entry.key, entry.content, loop.iterations); |
| 294 | + } |
| 295 | +} |
| 296 | + |
| 297 | +async function maybeWarnCompactionQuality(loop: CompactionHost, summary: string): Promise<void> { |
| 298 | + const result = await judgeCompactionQuality(loop.provider, summary, loop.messages, loop.sessionState); |
| 299 | + if (!result || result.quality_loss < 0.6) return; |
| 300 | + loop.pushMessage({ |
| 301 | + role: MessageRole.SYSTEM, |
| 302 | + content: `COMPACTION GAP WARNING: ${result.recommendation}\nMissing context: ${result.missing_context.join("; ")}`, |
| 303 | + }); |
| 304 | + loop.bus.emit("error", { |
| 305 | + message: `Compaction quality loss: ${result.quality_loss.toFixed(2)}`, |
| 306 | + code: "COMPACTION_QUALITY_LOSS", |
| 307 | + fatal: false, |
| 308 | + }); |
| 309 | +} |
| 310 | + |
| 311 | +function maybeAppendContinuationGuidance(loop: CompactionHost): void { |
| 312 | + if (!loop.sessionState) return; |
| 313 | + loop.pushMessage({ |
| 314 | + role: MessageRole.SYSTEM, |
| 315 | + content: "Context was compacted. Your accumulated knowledge, plan, and file edit history are preserved above. Continue from your current plan step. Your next action should be based on the knowledge and progress sections — do NOT re-scan or re-read files listed in recent activity.", |
| 316 | + }); |
| 317 | +} |
| 318 | + |
| 319 | +function emitCompacted( |
| 320 | + loop: CompactionHost, |
| 321 | + tokensBefore: number, |
| 322 | + estimatedTokens: number, |
| 323 | + pruneResult: { readonly savedTokens: number; readonly prunedCount: number }, |
| 324 | + removedCount: number = 0, |
| 325 | +): void { |
| 326 | + loop.bus.emit("context:compacted", { |
| 327 | + removedCount, |
| 328 | + prunedCount: pruneResult.prunedCount > 0 ? pruneResult.prunedCount : undefined, |
| 329 | + tokensSaved: pruneResult.savedTokens > 0 ? pruneResult.savedTokens : undefined, |
| 330 | + estimatedTokens, |
| 331 | + tokensBefore, |
| 332 | + }); |
| 333 | +} |
| 334 | + |
| 335 | +function maybeEmitAggressiveCompactionWarning( |
| 336 | + loop: CompactionHost, |
| 337 | + before: number, |
| 338 | + after: number, |
| 339 | + suffix: string, |
| 340 | +): void { |
| 341 | + const reduction = before > 0 ? (before - after) / before : 0; |
| 342 | + if (reduction <= 0.5) return; |
| 343 | + loop.bus.emit("error", { |
| 344 | + message: `Aggressive compaction: ${Math.round(reduction * 100)}% reduction (${before} → ${after} tokens)${suffix}. Critical context may be lost.`, |
| 345 | + code: "COMPACTION_AGGRESSIVE", |
| 346 | + fatal: false, |
| 347 | + }); |
| 348 | +} |
0 commit comments