Skip to content

Commit f53bbb7

Browse files
OWConnoiryoppippi
andauthored
fix(ccusage): dedupe copied Codex token events (#989)
Deduplicate Codex token usage events with a session-independent fingerprint so branched or repeated session files do not count copied history more than once. Add regression coverage for copied branch history and validate against local Codex logs, where the current parser produced thousands of duplicate token events. Co-authored-by: ryoppippi <1560508+ryoppippi@users.noreply.github.com>
1 parent b438b4d commit f53bbb7

1 file changed

Lines changed: 95 additions & 1 deletion

File tree

apps/ccusage/src/adapter/codex/parser.ts

Lines changed: 95 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import { getCodexSessionsPaths } from './paths.ts';
2525
const LEGACY_FALLBACK_MODEL = 'gpt-5';
2626
const CODEX_JSONL_MARKERS = ['turn_context', '"type":"token_count"', '"type": "token_count"'];
2727
const ENCODED_CODEX_EVENT_NUMBER_STRIDE = 5;
28+
const TOKEN_USAGE_EVENT_KEY_SEPARATOR = '\0';
2829

2930
export function parseTokenCountLineFast(_line: string): ParsedTokenCountLine | null {
3031
if (!hasTokenCountPayload(_line)) {
@@ -122,6 +123,25 @@ function convertToEventUsage(
122123
};
123124
}
124125

126+
function createTokenUsageEventKey(event: TokenUsageEvent): string {
127+
const separator = TOKEN_USAGE_EVENT_KEY_SEPARATOR;
128+
return `${event.timestamp}${separator}${event.model ?? ''}${separator}${event.inputTokens}${separator}${event.cachedInputTokens}${separator}${event.outputTokens}${separator}${event.reasoningOutputTokens}${separator}${event.totalTokens}`;
129+
}
130+
131+
function deduplicateTokenUsageEvents(events: TokenUsageEvent[]): TokenUsageEvent[] {
132+
const seen = new Set<string>();
133+
const deduplicated: TokenUsageEvent[] = [];
134+
for (const event of events) {
135+
const key = createTokenUsageEventKey(event);
136+
if (seen.has(key)) {
137+
continue;
138+
}
139+
seen.add(key);
140+
deduplicated.push(event);
141+
}
142+
return deduplicated;
143+
}
144+
125145
function asRecord(value: unknown): Record<string, unknown> | null {
126146
return value != null && typeof value === 'object' && !Array.isArray(value)
127147
? (value as Record<string, unknown>)
@@ -411,7 +431,9 @@ export async function loadTokenUsageEvents(): Promise<TokenUsageEvent[]> {
411431
const directoryEvents = await Promise.all(
412432
getCodexSessionsPaths().map(loadTokenUsageEventsFromDirectory),
413433
);
414-
return directoryEvents.flat().sort((a, b) => compareStrings(a.timestamp, b.timestamp));
434+
return deduplicateTokenUsageEvents(directoryEvents.flat()).sort((a, b) =>
435+
compareStrings(a.timestamp, b.timestamp),
436+
);
415437
}
416438

417439
async function runCodexWorker(data: CodexWorkerData): Promise<void> {
@@ -734,6 +756,78 @@ if (import.meta.vitest != null) {
734756
{ sessionId: 'b', model: 'gpt-5.2', inputTokens: 20 },
735757
]);
736758
});
759+
760+
it('deduplicates copied branch history across session files', async () => {
761+
const copiedHistory = [
762+
JSON.stringify({
763+
timestamp: '2026-05-12T08:00:00.000Z',
764+
type: 'turn_context',
765+
payload: {
766+
model: 'gpt-5.2',
767+
},
768+
}),
769+
JSON.stringify({
770+
timestamp: '2026-05-12T08:01:00.000Z',
771+
type: 'event_msg',
772+
payload: {
773+
type: 'token_count',
774+
info: {
775+
total_token_usage: {
776+
input_tokens: 1_000,
777+
cached_input_tokens: 100,
778+
output_tokens: 200,
779+
reasoning_output_tokens: 20,
780+
total_tokens: 1_200,
781+
},
782+
},
783+
},
784+
}),
785+
].join('\n');
786+
787+
await using fixture = await createFixture({
788+
sessions: {
789+
'project-parent.jsonl': copiedHistory,
790+
'project-branch.jsonl': [
791+
copiedHistory,
792+
JSON.stringify({
793+
timestamp: '2026-05-12T08:02:00.000Z',
794+
type: 'event_msg',
795+
payload: {
796+
type: 'token_count',
797+
info: {
798+
total_token_usage: {
799+
input_tokens: 1_600,
800+
cached_input_tokens: 300,
801+
output_tokens: 450,
802+
reasoning_output_tokens: 40,
803+
total_tokens: 2_050,
804+
},
805+
},
806+
},
807+
}),
808+
].join('\n'),
809+
},
810+
});
811+
vi.stubEnv('CODEX_HOME', fixture.path);
812+
813+
const events = await loadTokenUsageEvents();
814+
expect(events).toMatchObject([
815+
{
816+
inputTokens: 1_000,
817+
cachedInputTokens: 100,
818+
outputTokens: 200,
819+
totalTokens: 1_200,
820+
},
821+
{
822+
sessionId: 'project-branch',
823+
inputTokens: 600,
824+
cachedInputTokens: 200,
825+
outputTokens: 250,
826+
totalTokens: 850,
827+
},
828+
]);
829+
expect(events).toHaveLength(2);
830+
});
737831
});
738832

739833
describe('getCodexWorkerThreadCount', () => {

0 commit comments

Comments
 (0)