Skip to content

Commit 10d911d

Browse files
fix(cursor-agent): ingest workspace-less CLI transcript layout (#542)
* fix(cursor-agent): ingest workspace-less CLI transcript layout * fix(cursor-agent): bump parse version so cached sessions pick up the new ingestion --------- Co-authored-by: AgentSeal <hello@agentseal.org>
1 parent 7c2d36f commit 10d911d

4 files changed

Lines changed: 155 additions & 52 deletions

File tree

src/providers/cursor-agent.ts

Lines changed: 87 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -139,12 +139,83 @@ function costModel(model: string): string {
139139
return model === 'cursor-agent-auto' ? CURSOR_AGENT_COST_MODEL : model
140140
}
141141

142+
function transcriptStem(transcriptPath: string): string {
143+
const name = basename(transcriptPath)
144+
if (name.endsWith('.jsonl')) return name.slice(0, -'.jsonl'.length)
145+
if (name.endsWith('.txt')) return name.slice(0, -'.txt'.length)
146+
return name
147+
}
148+
142149
function toConversationId(transcriptPath: string): string {
143-
const filename = basename(transcriptPath, '.txt')
150+
const filename = transcriptStem(transcriptPath)
144151
if (filename.length === 36 && UUID_LIKE.test(filename)) return filename
145152
return createHash('sha1').update(transcriptPath).digest('hex').slice(0, 16)
146153
}
147154

155+
async function appendTranscriptSources(
156+
scanDir: string,
157+
projectId: string,
158+
sources: SessionSource[],
159+
): Promise<void> {
160+
const transcriptEntries = await readdir(scanDir, { withFileTypes: true })
161+
for (const transcript of transcriptEntries) {
162+
// Legacy format: .txt files directly in the scan dir
163+
if (transcript.isFile() && transcript.name.endsWith('.txt')) {
164+
sources.push({
165+
path: join(scanDir, transcript.name),
166+
project: projectId,
167+
provider: 'cursor-agent',
168+
})
169+
continue
170+
}
171+
172+
// Composer 2 format: UUID subdirectories with .jsonl files
173+
if (transcript.isDirectory() && UUID_LIKE.test(transcript.name)) {
174+
const subdir = join(scanDir, transcript.name)
175+
const subEntries = await readdir(subdir, { withFileTypes: true }).catch(() => [])
176+
const transcriptFilesByStem = new Map<string, { jsonl?: string; txt?: string }>()
177+
178+
for (const sub of subEntries) {
179+
if (sub.isFile() && (sub.name.endsWith('.jsonl') || sub.name.endsWith('.txt'))) {
180+
const stem = transcriptStem(sub.name)
181+
const existing = transcriptFilesByStem.get(stem) ?? {}
182+
if (sub.name.endsWith('.jsonl')) {
183+
transcriptFilesByStem.set(stem, { ...existing, jsonl: sub.name })
184+
} else {
185+
transcriptFilesByStem.set(stem, { ...existing, txt: sub.name })
186+
}
187+
continue
188+
}
189+
190+
// Subagent transcripts inside a subagents/ directory
191+
if (sub.isDirectory() && sub.name === 'subagents') {
192+
const subagentEntries = await readdir(join(subdir, sub.name), { withFileTypes: true }).catch(() => [])
193+
for (const sa of subagentEntries) {
194+
if (!sa.isFile()) continue
195+
if (!sa.name.endsWith('.jsonl') && !sa.name.endsWith('.txt')) continue
196+
sources.push({
197+
path: join(subdir, sub.name, sa.name),
198+
project: projectId,
199+
provider: 'cursor-agent',
200+
})
201+
}
202+
}
203+
}
204+
205+
for (const files of transcriptFilesByStem.values()) {
206+
const selectedName = files.jsonl ?? files.txt
207+
if (selectedName) {
208+
sources.push({
209+
path: join(subdir, selectedName),
210+
project: projectId,
211+
provider: 'cursor-agent',
212+
})
213+
}
214+
}
215+
}
216+
}
217+
}
218+
148219
function extractUserQuery(userBlock: string): string {
149220
const chunks: string[] = []
150221
let cursor = 0
@@ -241,7 +312,7 @@ function parseTranscript(raw: string): { turns: ParsedTurn[]; recognized: boolea
241312

242313
let output = ''
243314
let reasoning = ''
244-
const toolsByTurn: Record<string, boolean> = Object.create(null)
315+
const toolsByTurn = new Map<string, true>()
245316

246317
for (const line of assistantLines) {
247318
if (TOOL_RESULT_MARKER.test(line)) continue
@@ -257,7 +328,7 @@ function parseTranscript(raw: string): { turns: ParsedTurn[]; recognized: boolea
257328
if (toolMatch) {
258329
const parsedTool = parseToolName(toolMatch[1] ?? '')
259330
const toolKey = `cursor:${parsedTool}`
260-
toolsByTurn[toolKey] = true
331+
toolsByTurn.set(toolKey, true)
261332
continue
262333
}
263334

@@ -266,7 +337,7 @@ function parseTranscript(raw: string): { turns: ParsedTurn[]; recognized: boolea
266337

267338
if (pendingUsers.length > 0) {
268339
const userMessage = pendingUsers.shift()!
269-
const tools = Object.keys(toolsByTurn)
340+
const tools = Array.from(toolsByTurn.keys())
270341
turns.push({
271342
userMessage,
272343
assistant: {
@@ -319,13 +390,13 @@ function createParser(
319390
source: SessionSource,
320391
seenKeys: Set<string>,
321392
dbPath: string,
322-
summariesByConversationId: Record<string, ConversationSummary | undefined>,
393+
summariesByConversationId: Map<string, ConversationSummary>,
323394
): SessionParser {
324395
return {
325396
async *parse(): AsyncGenerator<ParsedProviderCall> {
326397
const conversationId = toConversationId(source.path)
327398

328-
let summary = summariesByConversationId[conversationId]
399+
let summary = summariesByConversationId.get(conversationId)
329400
let db: SqliteDatabase | null = null
330401

331402
try {
@@ -348,7 +419,7 @@ function createParser(
348419
title: row.title,
349420
updatedAt: normalizeTimestamp(row.updatedAt),
350421
}
351-
summariesByConversationId[conversationId] = summary
422+
summariesByConversationId.set(conversationId, summary)
352423
}
353424
} catch {
354425
summary = undefined
@@ -426,7 +497,7 @@ export function createCursorAgentProvider(baseDirOverride?: string): Provider {
426497
const baseDir = getCursorAgentBaseDir(baseDirOverride)
427498
const projectsDir = getProjectsDir(baseDir)
428499
const dbPath = getAttributionDbPath(baseDir)
429-
const summariesByConversationId: Record<string, ConversationSummary | undefined> = Object.create(null)
500+
const summariesByConversationId = new Map<string, ConversationSummary>()
430501

431502
return {
432503
name: 'cursor-agent',
@@ -452,50 +523,15 @@ export function createCursorAgentProvider(baseDirOverride?: string): Provider {
452523
if (!entry.isDirectory()) continue
453524

454525
const projectId = prettifyProjectId(entry.name)
455-
const transcriptDir = join(projectsDir, entry.name, 'agent-transcripts')
456-
if (!existsSync(transcriptDir)) continue
457-
458-
const transcriptEntries = await readdir(transcriptDir, { withFileTypes: true })
459-
for (const transcript of transcriptEntries) {
460-
// Legacy format: .txt files directly in agent-transcripts/
461-
if (transcript.isFile() && transcript.name.endsWith('.txt')) {
462-
const transcriptPath = join(transcriptDir, transcript.name)
463-
sources.push({
464-
path: transcriptPath,
465-
project: projectId,
466-
provider: 'cursor-agent',
467-
})
468-
continue
469-
}
470-
471-
// Composer 2 format: UUID subdirectories with .jsonl files
472-
if (transcript.isDirectory() && UUID_LIKE.test(transcript.name)) {
473-
const subdir = join(transcriptDir, transcript.name)
474-
const subEntries = await readdir(subdir, { withFileTypes: true }).catch(() => [])
475-
for (const sub of subEntries) {
476-
if (sub.isFile() && (sub.name.endsWith('.jsonl') || sub.name.endsWith('.txt'))) {
477-
sources.push({
478-
path: join(subdir, sub.name),
479-
project: projectId,
480-
provider: 'cursor-agent',
481-
})
482-
}
483-
// Subagent transcripts inside a subagents/ directory
484-
if (sub.isDirectory() && sub.name === 'subagents') {
485-
const subagentEntries = await readdir(join(subdir, sub.name), { withFileTypes: true }).catch(() => [])
486-
for (const sa of subagentEntries) {
487-
if (!sa.isFile()) continue
488-
if (!sa.name.endsWith('.jsonl') && !sa.name.endsWith('.txt')) continue
489-
sources.push({
490-
path: join(subdir, sub.name, sa.name),
491-
project: projectId,
492-
provider: 'cursor-agent',
493-
})
494-
}
495-
}
496-
}
497-
}
526+
const projectDir = join(projectsDir, entry.name)
527+
if (entry.name === 'agent-transcripts') {
528+
await appendTranscriptSources(projectDir, projectId, sources)
529+
continue
498530
}
531+
532+
const transcriptDir = join(projectDir, 'agent-transcripts')
533+
if (!existsSync(transcriptDir)) continue
534+
await appendTranscriptSources(transcriptDir, projectId, sources)
499535
}
500536

501537
return sources

src/session-cache.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ export const DURABLE_PROVIDER_NAMES: ReadonlySet<string> = new Set(['copilot'])
110110
const PROVIDER_PARSE_VERSIONS: Record<string, string> = {
111111
claude: 'cowork-space-grouping-v1',
112112
cline: 'worktree-project-grouping-v1',
113+
'cursor-agent': 'workspaceless-transcript-v1',
113114
copilot: 'otel-durable-v1',
114115
hermes: 'reasoning-output-accounting-v1',
115116
'ibm-bob': 'worktree-project-grouping-v1',
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
{"role":"user","message":{"content":[{"type":"text","text":"<user_query>\nRun a quick smoke test\n</user_query>"}]}}
2+
{"role":"assistant","message":{"content":[{"type":"text","text":"Smoke test passed."}]}}

tests/providers/cursor-agent.test.ts

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
2-
import { mkdtemp, mkdir, rm, writeFile } from 'fs/promises'
2+
import { mkdtemp, mkdir, readFile, rm, writeFile } from 'fs/promises'
33
import { existsSync } from 'fs'
44
import { join } from 'path'
55
import { tmpdir } from 'os'
@@ -125,6 +125,40 @@ describe('cursor-agent provider', () => {
125125
expect(sources.every((s) => s.provider === 'cursor-agent')).toBe(true)
126126
})
127127

128+
it('does not scan a workspace root when agent-transcripts is missing', async () => {
129+
const baseDir = await makeBaseDir()
130+
const workspaceRoot = join(baseDir, 'projects', 'workspace-without-transcripts')
131+
await mkdir(workspaceRoot, { recursive: true })
132+
await writeFile(
133+
join(workspaceRoot, 'extension-state.txt'),
134+
'user:\n<user_query>not a transcript</user_query>\nA:\nnot a cursor-agent answer\n',
135+
)
136+
137+
const provider = createCursorAgentProvider(baseDir)
138+
const sources = await provider.discoverSessions()
139+
140+
expect(sources).toEqual([])
141+
})
142+
143+
it('prefers jsonl over same-session txt inside UUID transcript dirs', async () => {
144+
const baseDir = await makeBaseDir()
145+
const sessionDir = join(baseDir, 'projects', 'proj-with-duplicates', 'agent-transcripts', FIXED_UUID)
146+
const jsonlPath = join(sessionDir, `${FIXED_UUID}.jsonl`)
147+
const txtPath = join(sessionDir, `${FIXED_UUID}.txt`)
148+
await mkdir(sessionDir, { recursive: true })
149+
await writeFile(
150+
jsonlPath,
151+
'{"role":"user","message":{"content":[{"type":"text","text":"<user_query>jsonl wins</user_query>"}]}}\n{"role":"assistant","message":{"content":[{"type":"text","text":"jsonl answer"}]}}\n',
152+
)
153+
await writeFile(txtPath, 'user:\n<user_query>txt duplicate</user_query>\nA:\ntxt answer\n')
154+
155+
const provider = createCursorAgentProvider(baseDir)
156+
const sources = await provider.discoverSessions()
157+
158+
expect(sources).toHaveLength(1)
159+
expect(sources[0]!.path).toBe(jsonlPath)
160+
})
161+
128162
it('parses one user/assistant pair with estimated token counts', async () => {
129163
const baseDir = await makeBaseDir()
130164
const transcriptDir = join(baseDir, 'projects', 'my-proj', 'agent-transcripts')
@@ -212,6 +246,36 @@ describe('cursor-agent provider', () => {
212246
stderrSpy.mockRestore()
213247
})
214248

249+
it('discovers jsonl transcripts stored directly under project dir (workspace-less layout)', async () => {
250+
const baseDir = await makeBaseDir()
251+
const fixtureRoot = join(import.meta.dirname, '../fixtures/cursor-agent/workspace-less')
252+
const sessionDir = join(baseDir, 'projects', 'agent-transcripts', '1031d227-0c67-4e17-8954-0b6e2b3322f0')
253+
await mkdir(sessionDir, { recursive: true })
254+
await writeFile(
255+
join(sessionDir, '1031d227-0c67-4e17-8954-0b6e2b3322f0.jsonl'),
256+
await readFile(
257+
join(
258+
fixtureRoot,
259+
'projects/agent-transcripts/1031d227-0c67-4e17-8954-0b6e2b3322f0/1031d227-0c67-4e17-8954-0b6e2b3322f0.jsonl',
260+
),
261+
'utf-8',
262+
),
263+
)
264+
265+
const provider = createCursorAgentProvider(baseDir)
266+
const sources = await provider.discoverSessions()
267+
268+
expect(sources).toHaveLength(1)
269+
expect(sources[0]!.project).toBe('transcripts')
270+
expect(sources[0]!.path.endsWith('.jsonl')).toBe(true)
271+
272+
const calls = await collectCalls(provider, sources[0]!)
273+
expect(calls).toHaveLength(1)
274+
expect(calls[0]!.sessionId).toBe('1031d227-0c67-4e17-8954-0b6e2b3322f0')
275+
expect(calls[0]!.userMessage).toBe('Run a quick smoke test')
276+
expect(calls[0]!.costUSD).toBeGreaterThan(0)
277+
})
278+
215279
it('falls back to stable sha1 conversation id for non-uuid filenames', async () => {
216280
const baseDir = await makeBaseDir()
217281
const transcriptDir = join(baseDir, 'projects', 'sha-proj', 'agent-transcripts')

0 commit comments

Comments
 (0)