Skip to content

Commit f9df318

Browse files
committed
feat(compression): add pluggable tokenCounter option
Allow callers to supply a custom token counter via CompressOptions.tokenCounter instead of the hardcoded ceil(chars/3.5) heuristic. The counter is threaded through all budget, stats, and forceConverge paths. Exported as defaultTokenCounter for callers that want to wrap or fall back to the default.
1 parent 28b637e commit f9df318

4 files changed

Lines changed: 138 additions & 22 deletions

File tree

src/compress.ts

Lines changed: 28 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -319,8 +319,8 @@ function contentLength(msg: Message): number {
319319
return typeof msg.content === 'string' ? msg.content.length : 0;
320320
}
321321

322-
/** Estimate token count for a single message (~3.5 chars/token). */
323-
function estimateTokens(msg: Message): number {
322+
/** Default token counter: ~3.5 chars/token heuristic. */
323+
export function defaultTokenCounter(msg: Message): number {
324324
return Math.ceil(contentLength(msg) / 3.5);
325325
}
326326

@@ -460,6 +460,7 @@ function computeStats(
460460
messagesCompressed: number,
461461
messagesPreserved: number,
462462
sourceVersion: number,
463+
counter: (msg: Message) => number,
463464
messagesDeduped?: number,
464465
messagesFuzzyDeduped?: number,
465466
): CompressResult['compression'] {
@@ -468,8 +469,8 @@ function computeStats(
468469
const totalCompressed = messagesCompressed + (messagesDeduped ?? 0);
469470
const ratio = compressedTotalChars > 0 ? originalTotalChars / compressedTotalChars : 1;
470471

471-
const originalTotalTokens = originalMessages.reduce((sum, m) => sum + estimateTokens(m), 0);
472-
const compressedTotalTokens = resultMessages.reduce((sum, m) => sum + estimateTokens(m), 0);
472+
const originalTotalTokens = sumTokens(originalMessages, counter);
473+
const compressedTotalTokens = sumTokens(resultMessages, counter);
473474
const tokenRatio = compressedTotalTokens > 0 ? originalTotalTokens / compressedTotalTokens : 1;
474475

475476
return {
@@ -492,6 +493,7 @@ function compressSync(
492493
options: CompressOptions = {},
493494
): CompressResult {
494495
const sourceVersion = options.sourceVersion ?? 0;
496+
const counter = options.tokenCounter ?? defaultTokenCounter;
495497

496498
if (messages.length === 0) {
497499
return {
@@ -640,7 +642,7 @@ function compressSync(
640642

641643
return {
642644
messages: result,
643-
compression: computeStats(messages, result, messagesCompressed, messagesPreserved, sourceVersion, messagesDeduped, messagesFuzzyDeduped),
645+
compression: computeStats(messages, result, messagesCompressed, messagesPreserved, sourceVersion, counter, messagesDeduped, messagesFuzzyDeduped),
644646
verbatim,
645647
};
646648
}
@@ -664,6 +666,7 @@ async function compressAsync(
664666
options: CompressOptions = {},
665667
): Promise<CompressResult> {
666668
const sourceVersion = options.sourceVersion ?? 0;
669+
const counter = options.tokenCounter ?? defaultTokenCounter;
667670
const userSummarizer = options.summarizer;
668671

669672
if (messages.length === 0) {
@@ -813,7 +816,7 @@ async function compressAsync(
813816

814817
return {
815818
messages: result,
816-
compression: computeStats(messages, result, messagesCompressed, messagesPreserved, sourceVersion, messagesDeduped, messagesFuzzyDeduped),
819+
compression: computeStats(messages, result, messagesCompressed, messagesPreserved, sourceVersion, counter, messagesDeduped, messagesFuzzyDeduped),
817820
verbatim,
818821
};
819822
}
@@ -822,16 +825,17 @@ async function compressAsync(
822825
// Token budget helpers (absorbed from compressToFit)
823826
// ---------------------------------------------------------------------------
824827

825-
function estimateTokensTotal(messages: Message[]): number {
826-
return messages.reduce((sum, m) => sum + estimateTokens(m), 0);
828+
function sumTokens(messages: Message[], counter: (msg: Message) => number): number {
829+
return messages.reduce((sum, m) => sum + counter(m), 0);
827830
}
828831

829832
function budgetFastPath(
830833
messages: Message[],
831834
tokenBudget: number,
832835
sourceVersion: number,
836+
counter: (msg: Message) => number,
833837
): CompressResult | undefined {
834-
const totalTokens = estimateTokensTotal(messages);
838+
const totalTokens = sumTokens(messages, counter);
835839
if (totalTokens <= tokenBudget) {
836840
return {
837841
messages,
@@ -851,8 +855,8 @@ function budgetFastPath(
851855
return undefined;
852856
}
853857

854-
function addBudgetFields(cr: CompressResult, tokenBudget: number, recencyWindow: number): CompressResult {
855-
const tokens = estimateTokensTotal(cr.messages);
858+
function addBudgetFields(cr: CompressResult, tokenBudget: number, recencyWindow: number, counter: (msg: Message) => number): CompressResult {
859+
const tokens = sumTokens(cr.messages, counter);
856860
return { ...cr, fits: tokens <= tokenBudget, tokenCount: tokens, recencyWindow };
857861
}
858862

@@ -865,6 +869,7 @@ function forceConvergePass(
865869
tokenBudget: number,
866870
preserveRoles: Set<string>,
867871
sourceVersion: number,
872+
counter: (msg: Message) => number,
868873
): CompressResult {
869874
if (cr.fits) return cr;
870875

@@ -899,7 +904,7 @@ function forceConvergePass(
899904
const truncated = content.slice(0, 512);
900905
const tag = `[truncated — ${content.length} chars: ${truncated}]`;
901906

902-
const oldTokens = estimateTokens(m);
907+
const oldTokens = counter(m);
903908

904909
// If already compressed (has _uc_original), just replace content in-place
905910
const hasOriginal = !!(m.metadata?._uc_original);
@@ -922,7 +927,7 @@ function forceConvergePass(
922927
};
923928
}
924929

925-
const newTokens = estimateTokens(messages[cand.idx]);
930+
const newTokens = counter(messages[cand.idx]);
926931
tokenCount -= (oldTokens - newTokens);
927932
}
928933

@@ -937,8 +942,9 @@ function compressSyncWithBudget(
937942
): CompressResult {
938943
const minRw = options.minRecencyWindow ?? 0;
939944
const sourceVersion = options.sourceVersion ?? 0;
945+
const counter = options.tokenCounter ?? defaultTokenCounter;
940946

941-
const fast = budgetFastPath(messages, tokenBudget, sourceVersion);
947+
const fast = budgetFastPath(messages, tokenBudget, sourceVersion, counter);
942948
if (fast) return fast;
943949

944950
let lo = minRw;
@@ -949,7 +955,7 @@ function compressSyncWithBudget(
949955
while (lo < hi) {
950956
const mid = Math.ceil((lo + hi) / 2);
951957
const cr = compressSync(messages, { ...options, recencyWindow: mid, summarizer: undefined, tokenBudget: undefined });
952-
lastResult = addBudgetFields(cr, tokenBudget, mid);
958+
lastResult = addBudgetFields(cr, tokenBudget, mid, counter);
953959
lastRw = mid;
954960

955961
if (lastResult.fits) {
@@ -964,12 +970,12 @@ function compressSyncWithBudget(
964970
result = lastResult;
965971
} else {
966972
const cr = compressSync(messages, { ...options, recencyWindow: lo, summarizer: undefined, tokenBudget: undefined });
967-
result = addBudgetFields(cr, tokenBudget, lo);
973+
result = addBudgetFields(cr, tokenBudget, lo, counter);
968974
}
969975

970976
if (!result.fits && options.forceConverge) {
971977
const preserveRoles = new Set(options.preserve ?? ['system']);
972-
result = forceConvergePass(result, tokenBudget, preserveRoles, sourceVersion);
978+
result = forceConvergePass(result, tokenBudget, preserveRoles, sourceVersion, counter);
973979
}
974980

975981
return result;
@@ -982,8 +988,9 @@ async function compressAsyncWithBudget(
982988
): Promise<CompressResult> {
983989
const minRw = options.minRecencyWindow ?? 0;
984990
const sourceVersion = options.sourceVersion ?? 0;
991+
const counter = options.tokenCounter ?? defaultTokenCounter;
985992

986-
const fast = budgetFastPath(messages, tokenBudget, sourceVersion);
993+
const fast = budgetFastPath(messages, tokenBudget, sourceVersion, counter);
987994
if (fast) return fast;
988995

989996
let lo = minRw;
@@ -994,7 +1001,7 @@ async function compressAsyncWithBudget(
9941001
while (lo < hi) {
9951002
const mid = Math.ceil((lo + hi) / 2);
9961003
const cr = await compressAsync(messages, { ...options, recencyWindow: mid, tokenBudget: undefined });
997-
lastResult = addBudgetFields(cr, tokenBudget, mid);
1004+
lastResult = addBudgetFields(cr, tokenBudget, mid, counter);
9981005
lastRw = mid;
9991006

10001007
if (lastResult.fits) {
@@ -1009,12 +1016,12 @@ async function compressAsyncWithBudget(
10091016
result = lastResult;
10101017
} else {
10111018
const cr = await compressAsync(messages, { ...options, recencyWindow: lo, tokenBudget: undefined });
1012-
result = addBudgetFields(cr, tokenBudget, lo);
1019+
result = addBudgetFields(cr, tokenBudget, lo, counter);
10131020
}
10141021

10151022
if (!result.fits && options.forceConverge) {
10161023
const preserveRoles = new Set(options.preserve ?? ['system']);
1017-
result = forceConvergePass(result, tokenBudget, preserveRoles, sourceVersion);
1024+
result = forceConvergePass(result, tokenBudget, preserveRoles, sourceVersion, counter);
10181025
}
10191026

10201027
return result;

src/index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// Primary
2-
export { compress } from './compress.js';
2+
export { compress, defaultTokenCounter } from './compress.js';
33
export { uncompress } from './expand.js';
44
export type { StoreLookup } from './expand.js';
55

src/types.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ export type CompressOptions = {
3232
embedSummaryId?: boolean;
3333
/** Hard-truncate non-recency messages when binary search bottoms out and budget still exceeded. Default: false. */
3434
forceConverge?: boolean;
35+
/** Custom token counter per message. Default: ceil(content.length / 3.5). */
36+
tokenCounter?: (msg: Message) => number;
3537
};
3638

3739
export type VerbatimMap = Record<string, Message>;

tests/compress.test.ts

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2046,3 +2046,110 @@ describe('dedup tag includes keep-target ID', () => {
20462046
expect(result.messages[2].content).not.toMatch(/^\[uc:dup/);
20472047
});
20482048
});
2049+
2050+
// ---------------------------------------------------------------------------
2051+
// Custom tokenCounter
2052+
// ---------------------------------------------------------------------------
2053+
2054+
describe('compress with custom tokenCounter', () => {
2055+
const LONG = 'This is a long message that exceeds the compression threshold and contains enough prose to compress. '.repeat(5);
2056+
2057+
it('tokenCount reflects custom counter with fixed value', () => {
2058+
const messages: Message[] = [
2059+
msg({ id: '1', index: 0, content: LONG }),
2060+
msg({ id: '2', index: 1, content: LONG }),
2061+
msg({ id: '3', index: 2, content: LONG }),
2062+
];
2063+
const fixedValue = 42;
2064+
const result = compress(messages, {
2065+
tokenBudget: 100000,
2066+
tokenCounter: () => fixedValue,
2067+
dedup: false,
2068+
});
2069+
expect(result.tokenCount).toBe(messages.length * fixedValue);
2070+
});
2071+
2072+
it('binary search uses custom counter to fit budget', () => {
2073+
const messages: Message[] = Array.from({ length: 10 }, (_, i) =>
2074+
msg({ id: String(i), index: i, content: LONG }),
2075+
);
2076+
// Each message = 100 tokens with this counter
2077+
const perMsg = 100;
2078+
const budget = perMsg * 5;
2079+
const result = compress(messages, {
2080+
tokenBudget: budget,
2081+
tokenCounter: () => perMsg,
2082+
dedup: false,
2083+
});
2084+
expect(result.tokenCount).toBeLessThanOrEqual(budget);
2085+
expect(result.fits).toBe(true);
2086+
});
2087+
2088+
it('forceConverge uses custom counter for delta math', () => {
2089+
// JSON content > 512 chars is preserved by classifier but eligible for forceConverge truncation
2090+
const bigJson = JSON.stringify({ data: Array.from({ length: 50 }, (_, i) => ({ id: i, value: `item_${i}` })) });
2091+
const messages: Message[] = [
2092+
msg({ id: '0', index: 0, role: 'user', content: bigJson }),
2093+
msg({ id: '1', index: 1, role: 'assistant', content: bigJson }),
2094+
msg({ id: '2', index: 2, role: 'user', content: bigJson }),
2095+
msg({ id: '3', index: 3, role: 'assistant', content: 'recent message' }),
2096+
];
2097+
// Heavy counter: 1 token per char
2098+
const heavyCounter = (m: Message) => typeof m.content === 'string' ? m.content.length : 0;
2099+
const without = compress(messages, {
2100+
tokenBudget: 1,
2101+
tokenCounter: heavyCounter,
2102+
dedup: false,
2103+
minRecencyWindow: 1,
2104+
});
2105+
const withForce = compress(messages, {
2106+
tokenBudget: 1,
2107+
tokenCounter: heavyCounter,
2108+
dedup: false,
2109+
forceConverge: true,
2110+
minRecencyWindow: 1,
2111+
});
2112+
expect(without.fits).toBe(false);
2113+
// forceConverge truncates using the custom counter → lower tokenCount
2114+
expect(withForce.tokenCount!).toBeLessThan(without.tokenCount!);
2115+
});
2116+
2117+
it('token_ratio uses custom counter', () => {
2118+
const messages: Message[] = [
2119+
msg({ id: '1', index: 0, content: LONG }),
2120+
msg({ id: '2', index: 1, content: LONG }),
2121+
];
2122+
// Content-length-based counter with a different ratio than default
2123+
const customCounter = (m: Message) => {
2124+
const len = typeof m.content === 'string' ? m.content.length : 0;
2125+
return Math.ceil(len / 2);
2126+
};
2127+
const defaultResult = compress(messages, { recencyWindow: 0, dedup: false });
2128+
const customResult = compress(messages, {
2129+
recencyWindow: 0,
2130+
tokenCounter: customCounter,
2131+
dedup: false,
2132+
});
2133+
// Both compress, but token_ratio differs because the counter differs
2134+
expect(customResult.compression.token_ratio).toBeGreaterThan(1);
2135+
expect(customResult.compression.token_ratio).not.toBe(defaultResult.compression.token_ratio);
2136+
});
2137+
2138+
it('default behavior unchanged when tokenCounter is omitted', () => {
2139+
const messages: Message[] = [
2140+
msg({ id: '1', index: 0, content: LONG }),
2141+
msg({ id: '2', index: 1, content: LONG }),
2142+
];
2143+
const withDefault = compress(messages, { tokenBudget: 10000, dedup: false });
2144+
const withExplicit = compress(messages, {
2145+
tokenBudget: 10000,
2146+
dedup: false,
2147+
tokenCounter: (m) => {
2148+
const len = typeof m.content === 'string' ? m.content.length : 0;
2149+
return Math.ceil(len / 3.5);
2150+
},
2151+
});
2152+
expect(withDefault.tokenCount).toBe(withExplicit.tokenCount);
2153+
expect(withDefault.fits).toBe(withExplicit.fits);
2154+
});
2155+
});

0 commit comments

Comments
 (0)