Skip to content

Commit e538d6d

Browse files
kimjune01claude
andcommitted
feat(core): v2 union-find with overlap window and deferred summarization
Architectural changes: - Forest.union() is now synchronous — structural merge only, no LLM calls - ContextWindow.append() is now synchronous — no blocking - ContextWindow.render() returns cached summaries (synchronous) - New resolveDirty() method — async fire-and-forget batch summarization - Overlap window (graduateAt/evictAt) eliminates staleness Key improvements over v1: - O(n) cost instead of O(n²) — each message summarized at most once - Zero blocking on append or render - Background resolveDirty runs during main LLM call wait 89/89 tests pass, 0 type errors. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 4520529 commit e538d6d

3 files changed

Lines changed: 484 additions & 130 deletions

File tree

packages/core/src/context/chatCompressionService.ts

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -162,13 +162,11 @@ async function truncateHistoryToBudget(
162162
} else if (responseObj && typeof responseObj === 'object') {
163163
if (
164164
'output' in responseObj &&
165-
166165
typeof responseObj['output'] === 'string'
167166
) {
168167
contentStr = responseObj['output'];
169168
} else if (
170169
'content' in responseObj &&
171-
172170
typeof responseObj['content'] === 'string'
173171
) {
174172
contentStr = responseObj['content'];
@@ -237,8 +235,11 @@ async function truncateHistoryToBudget(
237235
return truncatedHistory;
238236
}
239237

240-
/** Number of messages kept verbatim in the hot zone. */
241-
const UNION_FIND_HOT_SIZE = 30;
238+
/** Start graduating messages when hot zone exceeds this count. */
239+
const UNION_FIND_GRADUATE_AT = 26;
240+
241+
/** Evict oldest from hot zone when it exceeds this count. */
242+
const UNION_FIND_EVICT_AT = 30;
242243

243244
/** Maximum number of clusters in the cold zone. */
244245
const UNION_FIND_MAX_COLD_CLUSTERS = 10;
@@ -610,7 +611,8 @@ export class ChatCompressionService {
610611
modelStringToModelConfigAlias(model),
611612
);
612613
const contextWindow = new ContextWindow(embedder, summarizer, {
613-
hotSize: UNION_FIND_HOT_SIZE,
614+
graduateAt: UNION_FIND_GRADUATE_AT,
615+
evictAt: UNION_FIND_EVICT_AT,
614616
maxColdClusters: UNION_FIND_MAX_COLD_CLUSTERS,
615617
mergeThreshold: UNION_FIND_MERGE_THRESHOLD,
616618
});
@@ -628,17 +630,22 @@ export class ChatCompressionService {
628630
.join(' ')
629631
.trim();
630632
if (text) {
631-
await contextWindow.append(text);
633+
contextWindow.append(text);
632634
}
633635
}
634636

635-
// Render the compacted context
637+
// Render the compacted context (synchronous — uses cached summaries)
636638
const rendered = contextWindow.render(
637639
null,
638640
UNION_FIND_RETRIEVE_K,
639641
UNION_FIND_RETRIEVE_MIN_SIM,
640642
);
641643

644+
// Fire-and-forget: resolve dirty clusters in background
645+
// In production, this runs during the main LLM call wait.
646+
// Here we await it since there's no concurrent main call.
647+
await contextWindow.resolveDirty();
648+
642649
// Build new history: cold summaries as a single user message, then hot messages
643650
const coldSummaries = rendered.slice(
644651
0,

0 commit comments

Comments
 (0)