diff --git a/internal/config/config.go b/internal/config/config.go index db4f76cf..f4e4fc17 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -569,6 +569,22 @@ func (c *Config) loadEnv() { if v := os.Getenv("AGENTSVIEW_DISABLE_UPDATE_CHECK"); v != "" { c.DisableUpdateCheck = v == "1" || v == "true" } + // CURSOR_STATE_DB overrides the platform-default vscdb + // path that lives inside the cursor agent's configured + // dirs. Append-and-replace: drop any existing + // state.vscdb-named entries (the per-platform defaults) + // and add the user-supplied path so the cursor sync code + // finds it through FindCursorVscdb. + if v := os.Getenv("CURSOR_STATE_DB"); v != "" { + dirs := c.AgentDirs[parser.AgentCursor] + filtered := dirs[:0] + for _, d := range dirs { + if !parser.IsCursorVscdbPath(d) { + filtered = append(filtered, d) + } + } + c.AgentDirs[parser.AgentCursor] = append(filtered, v) + } } type stringListFlag []string diff --git a/internal/parser/cursor_vscdb.go b/internal/parser/cursor_vscdb.go new file mode 100644 index 00000000..46328a2b --- /dev/null +++ b/internal/parser/cursor_vscdb.go @@ -0,0 +1,705 @@ +package parser + +import ( + "database/sql" + "encoding/json" + "fmt" + "log" + "net/url" + "os" + "path/filepath" + "runtime" + "strings" + "time" +) + +// cursorVscdbBasename is the file name of Cursor's global SQLite +// state database. Used to detect virtual paths that point at +// vscdb-synced sessions and to identify vscdb entries within +// the cursor agent's configured paths. +const cursorVscdbBasename = "state.vscdb" + +// IsCursorVscdbPath reports whether path points at a Cursor +// state.vscdb file (basename match). Used by sync code to +// pick the vscdb out of the cursor agent's configured paths +// and by discovery to skip non-transcripts entries. +func IsCursorVscdbPath(path string) bool { + return filepath.Base(path) == cursorVscdbBasename +} + +// FindCursorVscdb returns the first existing state.vscdb path +// in the cursor agent's configured paths, or "" when none +// exist. The agent registry lists the vscdb default for each +// platform and only the matching one resolves at runtime. +func FindCursorVscdb(cursorPaths []string) string { + for _, p := range cursorPaths { + if !IsCursorVscdbPath(p) { + continue + } + if info, err := os.Stat(p); err == nil && + !info.IsDir() { + return p + } + } + return "" +} + +// IsCursorVscdbVirtualPath reports whether path looks like a +// Cursor vscdb virtual session path (state.vscdb#). +// Used by sync code to detect sessions already populated from +// the richer vscdb source so JSONL transcripts don't overwrite +// them on later watcher events. +func IsCursorVscdbVirtualPath(path string) bool { + idx := strings.LastIndex(path, "#") + if idx <= 0 { + return false + } + return filepath.Base(path[:idx]) == cursorVscdbBasename +} + +// CursorVscdbMeta is lightweight session metadata from state.vscdb, +// used by the sync engine to detect changes without parsing messages. +type CursorVscdbMeta struct { + SessionID string + VirtualPath string + FileMtime int64 // lastUpdatedAt in nanoseconds (millis * 1e6) + Project string + Name string + SubComposerIDs []string + CreatedAt int64 // unix millis + LastUpdatedAt int64 // unix millis +} + +// ListCursorVscdbSessions returns metadata for all Cursor sessions +// found in the global state.vscdb. Returns nil without error if the +// file does not exist. Project names are resolved by scanning the +// workspaceStorage directory adjacent to globalStorage. +func ListCursorVscdbSessions( + dbPath string, +) ([]CursorVscdbMeta, error) { + if _, err := os.Stat(dbPath); os.IsNotExist(err) { + return nil, nil + } + + db, err := openCursorVscdb(dbPath) + if err != nil { + return nil, err + } + defer db.Close() + + projects, err := loadCursorWorkspaceProjects(dbPath) + if err != nil { + log.Printf("cursor vscdb: loading workspace projects: %v", err) + // Non-fatal; sessions get "unknown" project. + } + + rows, err := db.Query( + "SELECT key, value FROM cursorDiskKV WHERE key LIKE 'composerData:%'", + ) + if err != nil { + return nil, fmt.Errorf( + "listing cursor vscdb sessions: %w", err, + ) + } + defer rows.Close() + + var metas []CursorVscdbMeta + for rows.Next() { + var key string + var rawVal []byte + if err := rows.Scan(&key, &rawVal); err != nil { + return nil, fmt.Errorf( + "scanning cursor vscdb row: %w", err, + ) + } + + sessionID, ok := strings.CutPrefix(key, "composerData:") + if !ok || sessionID == "" { + continue + } + + var cd cursorComposerData + if err := json.Unmarshal(rawVal, &cd); err != nil { + continue + } + + // Skip sessions with no conversation content. + if len(cd.FullConversationHeadersOnly) == 0 { + continue + } + + project := projects[sessionID] + if project == "" { + project = "unknown" + } + + subIDs := cd.SubComposerIDs + if len(cd.SubagentComposerIDs) > 0 { + subIDs = append(subIDs, cd.SubagentComposerIDs...) + } + + metas = append(metas, CursorVscdbMeta{ + SessionID: sessionID, + VirtualPath: dbPath + "#" + sessionID, + FileMtime: cd.LastUpdatedAt * 1_000_000, + Project: project, + Name: cd.Name, + SubComposerIDs: subIDs, + CreatedAt: cd.CreatedAt, + LastUpdatedAt: cd.LastUpdatedAt, + }) + } + return metas, rows.Err() +} + +// ParseCursorVscdbSession parses a single Cursor session from +// state.vscdb. Returns nil without error for empty sessions. +func ParseCursorVscdbSession( + dbPath, sessionID, project, machine string, +) (*ParsedSession, []ParsedMessage, error) { + if _, err := os.Stat(dbPath); os.IsNotExist(err) { + return nil, nil, fmt.Errorf( + "cursor vscdb not found: %s", dbPath, + ) + } + + db, err := openCursorVscdb(dbPath) + if err != nil { + return nil, nil, err + } + defer db.Close() + + // Load session metadata. + var rawVal []byte + err = db.QueryRow( + "SELECT value FROM cursorDiskKV WHERE key = ?", + "composerData:"+sessionID, + ).Scan(&rawVal) + if err == sql.ErrNoRows { + return nil, nil, nil + } + if err != nil { + return nil, nil, fmt.Errorf( + "loading cursor vscdb session %s: %w", + sessionID, err, + ) + } + + var cd cursorComposerData + if err := json.Unmarshal(rawVal, &cd); err != nil { + return nil, nil, fmt.Errorf( + "parsing cursor vscdb composerData %s: %w", + sessionID, err, + ) + } + + if len(cd.FullConversationHeadersOnly) == 0 { + return nil, nil, nil + } + + // Load all bubbles for this session. + bubbles, err := loadCursorBubbles(db, sessionID) + if err != nil { + return nil, nil, fmt.Errorf( + "loading cursor vscdb bubbles %s: %w", + sessionID, err, + ) + } + + msgs := buildCursorVscdbMessages( + cd.FullConversationHeadersOnly, bubbles, + ) + + if len(msgs) == 0 { + return nil, nil, nil + } + + firstMsg := "" + userCount := 0 + for _, m := range msgs { + if m.Role == RoleUser { + userCount++ + if firstMsg == "" && m.Content != "" { + firstMsg = truncate( + strings.ReplaceAll(m.Content, "\n", " "), + 300, + ) + } + } + } + + if userCount == 0 { + return nil, nil, nil + } + + startedAt := millisToTime(cd.CreatedAt) + endedAt := millisToTime(cd.LastUpdatedAt) + + if project == "" { + project = "unknown" + } + + sess := &ParsedSession{ + ID: "cursor:" + sessionID, + Project: project, + Machine: machine, + Agent: AgentCursor, + FirstMessage: firstMsg, + StartedAt: startedAt, + EndedAt: endedAt, + MessageCount: len(msgs), + UserMessageCount: userCount, + File: FileInfo{ + Path: dbPath + "#" + sessionID, + Mtime: cd.LastUpdatedAt * 1_000_000, + }, + } + + return sess, msgs, nil +} + +// cursorComposerData is the JSON structure stored under +// composerData: in the cursorDiskKV table. +type cursorComposerData struct { + ComposerID string `json:"composerId"` + Name string `json:"name"` + CreatedAt int64 `json:"createdAt"` + LastUpdatedAt int64 `json:"lastUpdatedAt"` + FullConversationHeadersOnly []cursorBubbleHeader `json:"fullConversationHeadersOnly"` + SubComposerIDs []string `json:"subComposerIds"` + SubagentComposerIDs []string `json:"subagentComposerIds"` + Status string `json:"status"` + UnifiedMode string `json:"unifiedMode"` +} + +// cursorBubbleHeader is one entry in fullConversationHeadersOnly. +type cursorBubbleHeader struct { + BubbleID string `json:"bubbleId"` + Type int `json:"type"` // 1=user, 2=assistant +} + +// cursorBubble is the JSON structure stored under +// bubbleId:: in cursorDiskKV. +type cursorBubble struct { + BubbleID string `json:"bubbleId"` + Type int `json:"type"` // 1=user, 2=assistant + Text string `json:"text"` + CreatedAt string `json:"createdAt"` // ISO 8601 string + ToolFormerData *cursorToolFormerData `json:"toolFormerData"` +} + +// cursorToolFormerData holds tool call information embedded in +// an assistant bubble. +type cursorToolFormerData struct { + Name string `json:"name"` + ToolCallID string `json:"toolCallId"` + Status string `json:"status"` + // Params and Result are JSON strings (not nested objects). + Params json.RawMessage `json:"params"` + Result json.RawMessage `json:"result"` +} + +func openCursorVscdb(dbPath string) (*sql.DB, error) { + // mode=ro avoids racing Cursor's writer; _busy_timeout + // retries briefly when Cursor holds a SHARED lock. + // _journal_mode is intentionally not forced — Cursor + // already runs in WAL, and forcing it here would require + // a write that fails under mode=ro. + dsn := cursorVscdbDSN( + dbPath, "mode=ro&_busy_timeout=3000", + ) + db, err := sql.Open("sqlite3", dsn) + if err != nil { + return nil, fmt.Errorf( + "opening cursor vscdb %s: %w", dbPath, err, + ) + } + return db, nil +} + +// cursorVscdbDSN builds a SQLite file: URI for the given path +// with the supplied query parameters. Going through a proper +// URI (rather than `path?params` concatenation) ensures any +// `?`, `#`, or `%` characters in the filesystem path are +// percent-encoded instead of being parsed by the SQLite driver +// as part of the DSN query string. Forward slashes are used +// regardless of OS so the URI matches RFC 3986. +func cursorVscdbDSN(dbPath, rawQuery string) string { + p := filepath.ToSlash(dbPath) + // Ensure a leading slash so url.URL serializes as + // "file:///path" or "file:///C:/path" rather than + // "file://C:/path", which omits the host component. + if !strings.HasPrefix(p, "/") { + p = "/" + p + } + u := &url.URL{ + Scheme: "file", + Path: p, + RawQuery: rawQuery, + } + return u.String() +} + +// loadCursorWorkspaceProjects scans workspaceStorage directories +// adjacent to globalStorage and returns a map of +// composerId → project name. +func loadCursorWorkspaceProjects( + globalDbPath string, +) (map[string]string, error) { + // globalStorage/state.vscdb → workspaceStorage/ + globalStorageDir := filepath.Dir(globalDbPath) + userDir := filepath.Dir(globalStorageDir) + wsDir := filepath.Join(userDir, "workspaceStorage") + + entries, err := os.ReadDir(wsDir) + if err != nil { + if os.IsNotExist(err) { + return nil, nil + } + return nil, fmt.Errorf( + "reading workspaceStorage: %w", err, + ) + } + + projects := make(map[string]string) + for _, e := range entries { + if !e.IsDir() { + continue + } + dirPath := filepath.Join(wsDir, e.Name()) + project := extractWorkspaceProject(dirPath) + if project == "" { + continue + } + ids := extractWorkspaceComposerIDs(dirPath) + for _, id := range ids { + if id != "" { + projects[id] = project + } + } + } + return projects, nil +} + +// extractWorkspaceProject reads the project path from +// workspaceStorage//workspace.json. +func extractWorkspaceProject(dirPath string) string { + wjPath := filepath.Join(dirPath, "workspace.json") + data, err := os.ReadFile(wjPath) + if err != nil { + return "" + } + var wj struct { + Folder string `json:"folder"` + } + if err := json.Unmarshal(data, &wj); err != nil { + return "" + } + if wj.Folder == "" { + return "" + } + + // folder is a file:// URL, e.g. "file:///home/user/proj" + // or "file:///C:/repo" / "file://host/share" on Windows. + folderPath := wj.Folder + if strings.HasPrefix(folderPath, "file://") { + folderPath = fileURLToPath(folderPath) + } + + return ExtractProjectFromCwd(folderPath) +} + +// fileURLToPath converts a "file://" URL into a native path, +// preserving Windows drive letters and UNC host components +// when running on Windows. Returns "" when the URL cannot be +// parsed; returns the input unchanged when it lacks the +// "file://" scheme. +func fileURLToPath(raw string) string { + if !strings.HasPrefix(raw, "file://") { + return raw + } + u, err := url.Parse(raw) + if err != nil { + return "" + } + path, err := url.PathUnescape(u.Path) + if err != nil { + path = u.Path + } + if runtime.GOOS == "windows" { + // "file://host/share" → UNC: \\host\share + if u.Host != "" { + return `\\` + u.Host + filepath.FromSlash(path) + } + // "file:///C:/repo" → "/C:/repo"; strip the leading + // slash so the drive letter lands at the start. + path = strings.TrimPrefix(path, "/") + return filepath.FromSlash(path) + } + if u.Host != "" { + // Non-Windows hosts cannot mount remote UNC paths + // directly; fall back to the path component so + // project extraction still has something to work with. + return path + } + return path +} + +// extractWorkspaceComposerIDs reads composer IDs from +// workspaceStorage//state.vscdb ItemTable. +func extractWorkspaceComposerIDs(dirPath string) []string { + dbPath := filepath.Join(dirPath, "state.vscdb") + if _, err := os.Stat(dbPath); os.IsNotExist(err) { + return nil + } + + db, err := sql.Open( + "sqlite3", + cursorVscdbDSN(dbPath, "mode=ro&_busy_timeout=3000"), + ) + if err != nil { + return nil + } + defer db.Close() + + var rawVal []byte + err = db.QueryRow( + "SELECT value FROM ItemTable WHERE key = 'composer.composerData'", + ).Scan(&rawVal) + if err != nil { + return nil + } + + var cd struct { + AllComposers []struct { + ComposerID string `json:"composerId"` + } `json:"allComposers"` + } + if err := json.Unmarshal(rawVal, &cd); err != nil { + return nil + } + + ids := make([]string, 0, len(cd.AllComposers)) + for _, c := range cd.AllComposers { + if c.ComposerID != "" { + ids = append(ids, c.ComposerID) + } + } + return ids +} + +// loadCursorBubbles fetches all bubble data for a session, +// keyed by bubble ID. +func loadCursorBubbles( + db *sql.DB, sessionID string, +) (map[string]cursorBubble, error) { + rows, err := db.Query( + "SELECT key, value FROM cursorDiskKV WHERE key LIKE ?", + "bubbleId:"+sessionID+":%", + ) + if err != nil { + return nil, err + } + defer rows.Close() + + bubbles := make(map[string]cursorBubble) + for rows.Next() { + var key string + var rawVal []byte + if err := rows.Scan(&key, &rawVal); err != nil { + return nil, err + } + + // key = "bubbleId::" + parts := strings.SplitN(key, ":", 3) + if len(parts) != 3 { + continue + } + bubbleID := parts[2] + + var b cursorBubble + if err := json.Unmarshal(rawVal, &b); err != nil { + continue + } + bubbles[bubbleID] = b + } + return bubbles, rows.Err() +} + +// buildCursorVscdbMessages reconstructs ParsedMessages from bubble +// headers and bubble data. Consecutive assistant bubbles (text + +// tool calls) are merged into a single assistant ParsedMessage. +func buildCursorVscdbMessages( + headers []cursorBubbleHeader, + bubbles map[string]cursorBubble, +) []ParsedMessage { + var msgs []ParsedMessage + ordinal := 0 + + // Tracks the current assistant message being assembled. + var curAsst *ParsedMessage + + flushAssistant := func() { + if curAsst == nil { + return + } + if strings.TrimSpace(curAsst.Content) != "" || + curAsst.HasToolUse { + msgs = append(msgs, *curAsst) + ordinal++ + } + curAsst = nil + } + + for _, h := range headers { + b, ok := bubbles[h.BubbleID] + if !ok { + continue + } + + switch h.Type { + case 1: // user + flushAssistant() + text := strings.TrimSpace(b.Text) + if text == "" { + continue + } + msgs = append(msgs, ParsedMessage{ + Ordinal: ordinal, + Role: RoleUser, + Content: text, + Timestamp: parseCursorBubbleTime(b.CreatedAt), + ContentLength: len(text), + }) + ordinal++ + + case 2: // assistant + isToolCall := b.ToolFormerData != nil && + b.ToolFormerData.Name != "" + + if curAsst == nil { + ts := parseCursorBubbleTime(b.CreatedAt) + curAsst = &ParsedMessage{ + Ordinal: ordinal, + Role: RoleAssistant, + Timestamp: ts, + } + } + + if isToolCall { + tc := buildCursorToolCall(b.ToolFormerData) + curAsst.ToolCalls = append( + curAsst.ToolCalls, tc, + ) + curAsst.HasToolUse = true + if tr, ok := buildCursorToolResult( + b.ToolFormerData, + ); ok { + curAsst.ToolResults = append( + curAsst.ToolResults, tr, + ) + } + } else { + text := strings.TrimSpace(b.Text) + if text != "" { + if curAsst.Content != "" { + curAsst.Content += "\n" + } + curAsst.Content += text + } + } + } + } + + flushAssistant() + + // Update ContentLength on all messages. + for i := range msgs { + msgs[i].ContentLength = len(msgs[i].Content) + } + + return msgs +} + +// buildCursorToolCall converts a cursorToolFormerData into a +// ParsedToolCall using the vscdb tool name taxonomy. +func buildCursorToolCall( + tf *cursorToolFormerData, +) ParsedToolCall { + if tf == nil { + return ParsedToolCall{} + } + + inputJSON := "" + if len(tf.Params) > 0 { + // params may be a JSON string (needs unquoting) or + // a JSON object — normalize to object form. + inputJSON = normalizeCursorParamsJSON(tf.Params) + } + + return ParsedToolCall{ + ToolUseID: tf.ToolCallID, + ToolName: tf.Name, + Category: NormalizeToolCategory(tf.Name), + InputJSON: inputJSON, + } +} + +// buildCursorToolResult converts the result field on a +// cursorToolFormerData into a ParsedToolResult. Returns false +// when the tool call has no captured result so the caller can +// skip empty entries. ContentRaw stores the raw JSON value as +// returned by Cursor; ContentLength reflects the decoded +// textual length so search/analytics see comparable numbers +// to JSONL-sourced sessions. +func buildCursorToolResult( + tf *cursorToolFormerData, +) (ParsedToolResult, bool) { + if tf == nil || len(tf.Result) == 0 || tf.ToolCallID == "" { + return ParsedToolResult{}, false + } + raw := string(tf.Result) + return ParsedToolResult{ + ToolUseID: tf.ToolCallID, + ContentLength: len(DecodeContent(raw)), + ContentRaw: raw, + }, true +} + +// normalizeCursorParamsJSON handles the case where params is +// stored as a JSON-encoded string (a string containing JSON) +// rather than a JSON object directly. +func normalizeCursorParamsJSON(raw json.RawMessage) string { + if len(raw) == 0 { + return "" + } + // If it's a JSON string, unwrap it. + if raw[0] == '"' { + var s string + if err := json.Unmarshal(raw, &s); err == nil { + return s + } + } + // Already a JSON object or array. + return string(raw) +} + +// parseCursorBubbleTime parses the ISO 8601 createdAt string +// used in Cursor bubbles. Returns zero time on parse failure. +func parseCursorBubbleTime(s string) time.Time { + if s == "" { + return time.Time{} + } + formats := []string{ + time.RFC3339Nano, + time.RFC3339, + "2006-01-02T15:04:05.999Z", + "2006-01-02T15:04:05Z", + } + for _, f := range formats { + if t, err := time.Parse(f, s); err == nil { + return t + } + } + return time.Time{} +} diff --git a/internal/parser/cursor_vscdb_test.go b/internal/parser/cursor_vscdb_test.go new file mode 100644 index 00000000..8057250e --- /dev/null +++ b/internal/parser/cursor_vscdb_test.go @@ -0,0 +1,831 @@ +package parser + +import ( + "database/sql" + "encoding/json" + "os" + "path/filepath" + "runtime" + "testing" + + _ "github.com/mattn/go-sqlite3" +) + +// createTestVscdb creates a minimal Cursor state.vscdb SQLite +// database at path with the cursorDiskKV table. +func createTestVscdb(t *testing.T, path string) *sql.DB { + t.Helper() + if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { + t.Fatalf("mkdir: %v", err) + } + db, err := sql.Open("sqlite3", path) + if err != nil { + t.Fatalf("open vscdb: %v", err) + } + _, err = db.Exec(` + CREATE TABLE cursorDiskKV ( + key TEXT UNIQUE ON CONFLICT REPLACE, + value BLOB + ) + `) + if err != nil { + t.Fatalf("create table: %v", err) + } + return db +} + +// insertComposerData inserts a composerData entry. +func insertComposerData( + t *testing.T, db *sql.DB, + sessionID string, data cursorComposerData, +) { + t.Helper() + raw, err := json.Marshal(data) + if err != nil { + t.Fatalf("marshal composerData: %v", err) + } + _, err = db.Exec( + "INSERT INTO cursorDiskKV (key, value) VALUES (?, ?)", + "composerData:"+sessionID, raw, + ) + if err != nil { + t.Fatalf("insert composerData: %v", err) + } +} + +// insertBubble inserts a bubbleId entry. +func insertBubble( + t *testing.T, db *sql.DB, + sessionID, bubbleID string, bubble cursorBubble, +) { + t.Helper() + raw, err := json.Marshal(bubble) + if err != nil { + t.Fatalf("marshal bubble: %v", err) + } + _, err = db.Exec( + "INSERT INTO cursorDiskKV (key, value) VALUES (?, ?)", + "bubbleId:"+sessionID+":"+bubbleID, raw, + ) + if err != nil { + t.Fatalf("insert bubble: %v", err) + } +} + +func TestListCursorVscdbSessions_NonExistent(t *testing.T) { + metas, err := ListCursorVscdbSessions( + "/nonexistent/state.vscdb", + ) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if metas != nil { + t.Errorf("expected nil for nonexistent db, got %v", metas) + } +} + +func TestListCursorVscdbSessions_Empty(t *testing.T) { + dir := t.TempDir() + dbPath := filepath.Join(dir, "state.vscdb") + db := createTestVscdb(t, dbPath) + db.Close() + + metas, err := ListCursorVscdbSessions(dbPath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(metas) != 0 { + t.Errorf("expected 0 metas, got %d", len(metas)) + } +} + +func TestListCursorVscdbSessions_SkipsEmpty(t *testing.T) { + dir := t.TempDir() + dbPath := filepath.Join(dir, "state.vscdb") + db := createTestVscdb(t, dbPath) + defer db.Close() + + // Session with no headers — should be skipped. + insertComposerData(t, db, "session-empty", cursorComposerData{ + ComposerID: "session-empty", + CreatedAt: 1000000, + LastUpdatedAt: 2000000, + FullConversationHeadersOnly: nil, + }) + + // Session with headers — should appear. + insertComposerData(t, db, "session-ok", cursorComposerData{ + ComposerID: "session-ok", + Name: "Test session", + CreatedAt: 1000000, + LastUpdatedAt: 2000000, + FullConversationHeadersOnly: []cursorBubbleHeader{ + {BubbleID: "b1", Type: 1}, + }, + }) + + db.Close() + + metas, err := ListCursorVscdbSessions(dbPath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(metas) != 1 { + t.Errorf("expected 1 meta, got %d", len(metas)) + } + if metas[0].SessionID != "session-ok" { + t.Errorf("got session %q, want session-ok", metas[0].SessionID) + } + if metas[0].Name != "Test session" { + t.Errorf("got name %q, want 'Test session'", metas[0].Name) + } + if metas[0].FileMtime != 2000000*1_000_000 { + t.Errorf( + "FileMtime = %d, want %d", + metas[0].FileMtime, 2000000*1_000_000, + ) + } + if metas[0].VirtualPath != dbPath+"#session-ok" { + t.Errorf( + "VirtualPath = %q, want %q", + metas[0].VirtualPath, dbPath+"#session-ok", + ) + } +} + +func TestListCursorVscdbSessions_SubComposerIDs(t *testing.T) { + dir := t.TempDir() + dbPath := filepath.Join(dir, "state.vscdb") + db := createTestVscdb(t, dbPath) + + insertComposerData(t, db, "parent-session", cursorComposerData{ + ComposerID: "parent-session", + CreatedAt: 1000000, + LastUpdatedAt: 2000000, + SubComposerIDs: []string{"child-1", "child-2"}, + FullConversationHeadersOnly: []cursorBubbleHeader{ + {BubbleID: "b1", Type: 1}, + }, + }) + db.Close() + + metas, err := ListCursorVscdbSessions(dbPath) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(metas) != 1 { + t.Fatalf("expected 1 meta, got %d", len(metas)) + } + if len(metas[0].SubComposerIDs) != 2 { + t.Errorf( + "SubComposerIDs len = %d, want 2", + len(metas[0].SubComposerIDs), + ) + } +} + +func TestParseCursorVscdbSession_NonExistent(t *testing.T) { + sess, msgs, err := ParseCursorVscdbSession( + "/nonexistent/state.vscdb", + "some-id", "myproject", "local", + ) + if err == nil { + t.Fatal("expected error for nonexistent db") + } + if sess != nil || msgs != nil { + t.Error("expected nil session and messages") + } +} + +func TestParseCursorVscdbSession_BasicTextOnly(t *testing.T) { + dir := t.TempDir() + dbPath := filepath.Join(dir, "state.vscdb") + db := createTestVscdb(t, dbPath) + + sessionID := "test-session-1" + bubble1 := "bubble-user-1" + bubble2 := "bubble-asst-1" + + insertComposerData(t, db, sessionID, cursorComposerData{ + ComposerID: sessionID, + Name: "My test session", + CreatedAt: 1000000, + LastUpdatedAt: 2000000, + FullConversationHeadersOnly: []cursorBubbleHeader{ + {BubbleID: bubble1, Type: 1}, + {BubbleID: bubble2, Type: 2}, + }, + }) + + insertBubble(t, db, sessionID, bubble1, cursorBubble{ + BubbleID: bubble1, + Type: 1, + Text: "Hello, can you help me?", + CreatedAt: "2025-01-01T10:00:00.000Z", + }) + insertBubble(t, db, sessionID, bubble2, cursorBubble{ + BubbleID: bubble2, + Type: 2, + Text: "Of course! What do you need?", + CreatedAt: "2025-01-01T10:00:01.000Z", + }) + + db.Close() + + sess, msgs, err := ParseCursorVscdbSession( + dbPath, sessionID, "myproject", "local", + ) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if sess == nil { + t.Fatal("expected non-nil session") + } + + assertEq(t, "ID", sess.ID, "cursor:"+sessionID) + assertEq(t, "Project", sess.Project, "myproject") + assertEq(t, "Machine", sess.Machine, "local") + assertEq(t, "Agent", string(sess.Agent), "cursor") + assertEq(t, "MessageCount", sess.MessageCount, 2) + assertEq(t, "UserMessageCount", sess.UserMessageCount, 1) + if sess.FirstMessage == "" { + t.Error("expected non-empty FirstMessage") + } + + if len(msgs) != 2 { + t.Fatalf("expected 2 messages, got %d", len(msgs)) + } + assertEq(t, "msgs[0].Role", string(msgs[0].Role), "user") + assertEq(t, "msgs[0].Content", msgs[0].Content, "Hello, can you help me?") + assertEq(t, "msgs[1].Role", string(msgs[1].Role), "assistant") + assertEq(t, "msgs[1].Content", msgs[1].Content, "Of course! What do you need?") +} + +func TestParseCursorVscdbSession_WithToolCall(t *testing.T) { + dir := t.TempDir() + dbPath := filepath.Join(dir, "state.vscdb") + db := createTestVscdb(t, dbPath) + + sessionID := "tool-session" + b1 := "b-user" + b2 := "b-tool" + b3 := "b-text" + + params := json.RawMessage(`{"pattern":"foo","path":"/src"}`) + + insertComposerData(t, db, sessionID, cursorComposerData{ + ComposerID: sessionID, + CreatedAt: 1000000, + LastUpdatedAt: 2000000, + FullConversationHeadersOnly: []cursorBubbleHeader{ + {BubbleID: b1, Type: 1}, + {BubbleID: b2, Type: 2}, + {BubbleID: b3, Type: 2}, + }, + }) + + insertBubble(t, db, sessionID, b1, cursorBubble{ + BubbleID: b1, + Type: 1, + Text: "Search for foo in /src", + CreatedAt: "2025-01-01T10:00:00.000Z", + }) + insertBubble(t, db, sessionID, b2, cursorBubble{ + BubbleID: b2, + Type: 2, + CreatedAt: "2025-01-01T10:00:01.000Z", + ToolFormerData: &cursorToolFormerData{ + Name: "grep", + ToolCallID: "call-001", + Status: "completed", + Params: params, + }, + }) + insertBubble(t, db, sessionID, b3, cursorBubble{ + BubbleID: b3, + Type: 2, + Text: "Found 3 matches.", + CreatedAt: "2025-01-01T10:00:02.000Z", + }) + + db.Close() + + sess, msgs, err := ParseCursorVscdbSession( + dbPath, sessionID, "myproject", "local", + ) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if sess == nil { + t.Fatal("expected non-nil session") + } + // User message + one merged assistant message. + if len(msgs) != 2 { + t.Fatalf("expected 2 messages, got %d", len(msgs)) + } + + asstMsg := msgs[1] + assertEq(t, "asstMsg.Role", string(asstMsg.Role), "assistant") + assertEq(t, "asstMsg.HasToolUse", asstMsg.HasToolUse, true) + assertEq(t, "asstMsg.Content", asstMsg.Content, "Found 3 matches.") + if len(asstMsg.ToolCalls) != 1 { + t.Fatalf( + "expected 1 tool call, got %d", + len(asstMsg.ToolCalls), + ) + } + tc := asstMsg.ToolCalls[0] + assertEq(t, "tc.ToolName", tc.ToolName, "grep") + assertEq(t, "tc.Category", tc.Category, "Grep") + assertEq(t, "tc.ToolUseID", tc.ToolUseID, "call-001") + if tc.InputJSON == "" { + t.Error("expected non-empty InputJSON") + } +} + +func TestParseCursorVscdbSession_PersistsToolResults(t *testing.T) { + dir := t.TempDir() + dbPath := filepath.Join(dir, "state.vscdb") + db := createTestVscdb(t, dbPath) + + sessionID := "tool-result-session" + bUser := "u1" + bTool := "t1" + + insertComposerData(t, db, sessionID, cursorComposerData{ + ComposerID: sessionID, + CreatedAt: 1000000, + LastUpdatedAt: 2000000, + FullConversationHeadersOnly: []cursorBubbleHeader{ + {BubbleID: bUser, Type: 1}, + {BubbleID: bTool, Type: 2}, + }, + }) + + insertBubble(t, db, sessionID, bUser, cursorBubble{ + BubbleID: bUser, + Type: 1, + Text: "List the files", + CreatedAt: "2025-01-01T10:00:00.000Z", + }) + resultJSON := json.RawMessage(`"file1.go\nfile2.go\nfile3.go"`) + insertBubble(t, db, sessionID, bTool, cursorBubble{ + BubbleID: bTool, + Type: 2, + CreatedAt: "2025-01-01T10:00:01.000Z", + ToolFormerData: &cursorToolFormerData{ + Name: "list_dir", + ToolCallID: "call-list-001", + Status: "completed", + Params: json.RawMessage(`{"path":"/src"}`), + Result: resultJSON, + }, + }) + + db.Close() + + _, msgs, err := ParseCursorVscdbSession( + dbPath, sessionID, "proj", "local", + ) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(msgs) != 2 { + t.Fatalf("expected 2 messages, got %d", len(msgs)) + } + + asst := msgs[1] + if len(asst.ToolResults) != 1 { + t.Fatalf( + "expected 1 tool result, got %d", + len(asst.ToolResults), + ) + } + tr := asst.ToolResults[0] + assertEq(t, "tr.ToolUseID", tr.ToolUseID, "call-list-001") + if tr.ContentRaw != string(resultJSON) { + t.Errorf( + "tr.ContentRaw = %q, want %q", + tr.ContentRaw, string(resultJSON), + ) + } + // "file1.go\nfile2.go\nfile3.go" decodes to 26 chars. + if tr.ContentLength != 26 { + t.Errorf( + "tr.ContentLength = %d, want 26", + tr.ContentLength, + ) + } +} + +func TestParseCursorVscdbSession_EmptySession(t *testing.T) { + dir := t.TempDir() + dbPath := filepath.Join(dir, "state.vscdb") + db := createTestVscdb(t, dbPath) + + // Session with headers but no matching bubble data. + insertComposerData(t, db, "empty-session", cursorComposerData{ + ComposerID: "empty-session", + CreatedAt: 1000000, + LastUpdatedAt: 2000000, + FullConversationHeadersOnly: []cursorBubbleHeader{ + {BubbleID: "missing-bubble", Type: 1}, + }, + }) + db.Close() + + sess, msgs, err := ParseCursorVscdbSession( + dbPath, "empty-session", "proj", "local", + ) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if sess != nil { + t.Errorf("expected nil session for empty content, got %+v", sess) + } + if msgs != nil { + t.Errorf("expected nil messages, got %v", msgs) + } +} + +func TestCursorVscdbDSN(t *testing.T) { + tests := []struct { + name string + path string + rawQuery string + want string + }{ + { + name: "plain-path", + path: "/home/u/state.vscdb", + rawQuery: "mode=ro", + want: "file:///home/u/state.vscdb?mode=ro", + }, + { + name: "path-with-question-mark", + path: "/tmp/foo?bar/state.vscdb", + rawQuery: "mode=ro", + want: "file:///tmp/foo%3Fbar/state.vscdb?mode=ro", + }, + { + name: "path-with-hash", + path: "/tmp/foo#bar/state.vscdb", + rawQuery: "mode=ro&_busy_timeout=3000", + want: "file:///tmp/foo%23bar/state.vscdb?mode=ro&_busy_timeout=3000", + }, + { + name: "path-with-percent", + path: "/tmp/has%20space/state.vscdb", + rawQuery: "mode=ro", + want: "file:///tmp/has%2520space/state.vscdb?mode=ro", + }, + { + name: "path-with-space", + path: "/home/u/My Cursor/state.vscdb", + rawQuery: "mode=ro", + want: "file:///home/u/My%20Cursor/state.vscdb?mode=ro", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("posix path expectations") + } + got := cursorVscdbDSN(tt.path, tt.rawQuery) + if got != tt.want { + t.Errorf( + "cursorVscdbDSN(%q, %q) = %q, want %q", + tt.path, tt.rawQuery, got, tt.want, + ) + } + }) + } +} + +func TestOpenCursorVscdb_PathWithSpecialChars(t *testing.T) { + // Verify openCursorVscdb opens DBs whose path contains + // characters that would otherwise be parsed by the + // sqlite3 driver as DSN separators when concatenated raw. + if runtime.GOOS == "windows" { + t.Skip("'?' and '#' are not valid in Windows filenames") + } + parent := filepath.Join(t.TempDir(), "weird?#dir") + if err := os.MkdirAll(parent, 0o755); err != nil { + t.Fatalf("mkdir: %v", err) + } + dbPath := filepath.Join(parent, "state.vscdb") + // Bootstrap the DB through the same DSN helper so the + // file lands at the intended path; sql.Open with a raw + // path containing '?' would otherwise be split by the + // sqlite3 driver. + dsn := cursorVscdbDSN(dbPath, "") + d, err := sql.Open("sqlite3", dsn) + if err != nil { + t.Fatalf("bootstrap open: %v", err) + } + if _, err := d.Exec(`CREATE TABLE cursorDiskKV ( + key TEXT UNIQUE ON CONFLICT REPLACE, + value BLOB + )`); err != nil { + d.Close() + t.Fatalf("bootstrap exec: %v", err) + } + d.Close() + + got, err := openCursorVscdb(dbPath) + if err != nil { + t.Fatalf("openCursorVscdb: %v", err) + } + defer got.Close() + var n int + if err := got.QueryRow( + "SELECT COUNT(*) FROM cursorDiskKV", + ).Scan(&n); err != nil { + t.Fatalf("query: %v", err) + } +} + +func TestFileURLToPath(t *testing.T) { + tests := []struct { + name string + in string + // posix is the expected output on macOS/Linux; tests + // skip when running on Windows so the helper's + // drive-letter/UNC branches don't perturb the assertion. + posix string + }{ + { + name: "posix-absolute", + in: "file:///home/user/proj", + posix: "/home/user/proj", + }, + { + name: "percent-encoded-spaces", + in: "file:///home/user/My%20Project", + posix: "/home/user/My Project", + }, + { + name: "percent-encoded-unicode", + in: "file:///home/user/r%C3%A9sum%C3%A9", + posix: "/home/user/résumé", + }, + { + name: "no-scheme-passthrough", + in: "/no/scheme", + posix: "/no/scheme", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("posix-only path expectations") + } + got := fileURLToPath(tt.in) + if got != tt.posix { + t.Errorf( + "fileURLToPath(%q) = %q, want %q", + tt.in, got, tt.posix, + ) + } + }) + } +} + +func TestIsCursorVscdbVirtualPath(t *testing.T) { + // Paths use OS-native separators since they originate from + // filepath.Join in production code; filepath.Base only + // recognizes the host OS's separator. + good := filepath.Join( + "globalStorage", "state.vscdb", + ) + "#abc-123" + noSession := filepath.Join("globalStorage", "state.vscdb") + wrongName := filepath.Join("notavscdb") + "#abc" + + tests := []struct { + name string + path string + want bool + }{ + {"with-session-id", good, true}, + {"missing-session-id", noSession, false}, + {"jsonl", "/some/path/file.jsonl", false}, + {"wrong-basename", wrongName, false}, + {"only-hash", "#abc", false}, + {"empty", "", false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := IsCursorVscdbVirtualPath(tt.path) + if got != tt.want { + t.Errorf( + "IsCursorVscdbVirtualPath(%q) = %v, want %v", + tt.path, got, tt.want, + ) + } + }) + } +} + +func TestIsCursorVscdbPath(t *testing.T) { + tests := []struct { + name string + path string + want bool + }{ + { + "linux-default", + filepath.Join( + ".config", "Cursor", "User", + "globalStorage", "state.vscdb", + ), + true, + }, + { + "macos-default", + filepath.Join( + "Library", "Application Support", "Cursor", + "User", "globalStorage", "state.vscdb", + ), + true, + }, + {"transcripts-dir", ".cursor/projects", false}, + {"jsonl", "/some/path/file.jsonl", false}, + {"empty", "", false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := IsCursorVscdbPath( + tt.path, + ); got != tt.want { + t.Errorf( + "IsCursorVscdbPath(%q) = %v, want %v", + tt.path, got, tt.want, + ) + } + }) + } +} + +func TestFindCursorVscdb(t *testing.T) { + dir := t.TempDir() + good := filepath.Join(dir, "state.vscdb") + if err := os.WriteFile(good, []byte("x"), 0o600); err != nil { + t.Fatalf("seed vscdb: %v", err) + } + missing := filepath.Join(dir, "missing", "state.vscdb") + notVscdb := filepath.Join(dir, "transcripts") + if err := os.MkdirAll(notVscdb, 0o755); err != nil { + t.Fatalf("seed dir: %v", err) + } + + t.Run("returns-existing-vscdb", func(t *testing.T) { + got := FindCursorVscdb([]string{notVscdb, good}) + if got != good { + t.Errorf("got %q, want %q", got, good) + } + }) + t.Run("skips-missing", func(t *testing.T) { + if got := FindCursorVscdb( + []string{missing}, + ); got != "" { + t.Errorf("got %q, want empty", got) + } + }) + t.Run("skips-non-vscdb", func(t *testing.T) { + if got := FindCursorVscdb( + []string{notVscdb}, + ); got != "" { + t.Errorf("got %q, want empty", got) + } + }) + t.Run("empty-input", func(t *testing.T) { + if got := FindCursorVscdb(nil); got != "" { + t.Errorf("got %q, want empty", got) + } + }) +} + +func TestNormalizeCursorVscdbTool(t *testing.T) { + tests := []struct { + name string + want string + }{ + {"run_terminal_command_v2", "Bash"}, + {"run_terminal_cmd", "Bash"}, + {"read_file_v2", "Read"}, + {"edit_file_v2", "Edit"}, + {"search_replace", "Edit"}, + {"apply_patch", "Edit"}, + {"ripgrep_raw_search", "Grep"}, + {"rg", "Grep"}, + {"glob_file_search", "Glob"}, + {"file_search", "Glob"}, + {"task_v2", "Task"}, + {"delete_file", "Write"}, + {"list_dir_v2", "Read"}, + {"list_dir", "Read"}, + {"read_lints", "Read"}, + {"todo_write", "Tool"}, + {"create_plan", "Tool"}, + {"ask_question", "Tool"}, + {"switch_mode", "Tool"}, + {"codebase_search", "Tool"}, + {"semantic_search_full", "Tool"}, + {"web_search", "Tool"}, + {"web_fetch", "Tool"}, + {"mcp-github", "Tool"}, + {"mcp-linear-search", "Tool"}, + {"grep", "Grep"}, + {"shell", "Bash"}, + {"unknown_tool_xyz", "Other"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := NormalizeToolCategory(tt.name) + if got != tt.want { + t.Errorf( + "NormalizeToolCategory(%q) = %q, want %q", + tt.name, got, tt.want, + ) + } + }) + } +} + +func TestBuildCursorVscdbMessages_GroupsConsecutiveAssistant(t *testing.T) { + headers := []cursorBubbleHeader{ + {BubbleID: "u1", Type: 1}, + {BubbleID: "a1", Type: 2}, // tool call + {BubbleID: "a2", Type: 2}, // text + {BubbleID: "u2", Type: 1}, + {BubbleID: "a3", Type: 2}, // text + } + params := json.RawMessage(`{"path":"/foo"}`) + bubbles := map[string]cursorBubble{ + "u1": {BubbleID: "u1", Type: 1, Text: "First question"}, + "a1": { + BubbleID: "a1", + Type: 2, + CreatedAt: "2025-01-01T10:00:00Z", + ToolFormerData: &cursorToolFormerData{ + Name: "read_file_v2", + Status: "completed", + Params: params, + }, + }, + "a2": {BubbleID: "a2", Type: 2, Text: "Here is the content."}, + "u2": {BubbleID: "u2", Type: 1, Text: "Second question"}, + "a3": {BubbleID: "a3", Type: 2, Text: "Another response."}, + } + + msgs := buildCursorVscdbMessages(headers, bubbles) + + // Expect: user, assistant(tool+text), user, assistant(text) + if len(msgs) != 4 { + t.Fatalf("expected 4 messages, got %d", len(msgs)) + } + + assertEq(t, "msgs[0].Role", string(msgs[0].Role), "user") + assertEq(t, "msgs[1].Role", string(msgs[1].Role), "assistant") + assertEq(t, "msgs[1].HasToolUse", msgs[1].HasToolUse, true) + assertEq(t, "msgs[1].Content", msgs[1].Content, "Here is the content.") + if len(msgs[1].ToolCalls) != 1 { + t.Errorf("expected 1 tool call, got %d", len(msgs[1].ToolCalls)) + } + assertEq(t, "msgs[2].Role", string(msgs[2].Role), "user") + assertEq(t, "msgs[3].Role", string(msgs[3].Role), "assistant") + assertEq(t, "msgs[3].Content", msgs[3].Content, "Another response.") +} + +func TestParseCursorParamsJSON(t *testing.T) { + tests := []struct { + name string + input json.RawMessage + want string + }{ + { + name: "object", + input: json.RawMessage(`{"key":"value"}`), + want: `{"key":"value"}`, + }, + { + name: "string wrapping json", + input: json.RawMessage(`"{\"key\":\"value\"}"`), + want: `{"key":"value"}`, + }, + { + name: "empty", + input: json.RawMessage(nil), + want: "", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := normalizeCursorParamsJSON(tt.input) + if got != tt.want { + t.Errorf("got %q, want %q", got, tt.want) + } + }) + } +} diff --git a/internal/parser/discovery.go b/internal/parser/discovery.go index 880ef489..899f2bac 100644 --- a/internal/parser/discovery.go +++ b/internal/parser/discovery.go @@ -764,6 +764,14 @@ func DiscoverCursorSessions( if projectsDir == "" { return nil } + // The cursor agent's configured paths can include the + // global state.vscdb file alongside the legacy + // transcripts root. Vscdb sessions are ingested by the + // sync engine via ListCursorVscdbSessions, not by walking + // a directory, so skip those entries here. + if IsCursorVscdbPath(projectsDir) { + return nil + } // Canonicalize root once for containment checks. resolvedRoot, err := filepath.EvalSymlinks(projectsDir) @@ -897,6 +905,9 @@ func FindCursorSourceFile( if projectsDir == "" || !IsValidSessionID(sessionID) { return "" } + if IsCursorVscdbPath(projectsDir) { + return "" + } entries, err := os.ReadDir(projectsDir) if err != nil { diff --git a/internal/parser/taxonomy.go b/internal/parser/taxonomy.go index 4af4bb19..2ab359ad 100644 --- a/internal/parser/taxonomy.go +++ b/internal/parser/taxonomy.go @@ -69,7 +69,7 @@ func NormalizeToolCategory(rawName string) string { case "report_intent": return "Tool" - // Cursor tools + // Cursor (file-based JSONL) tools case "Shell": return "Bash" case "StrReplace": @@ -77,6 +77,32 @@ func NormalizeToolCategory(rawName string) string { case "LS": return "Read" + // Cursor vscdb tool names + // Note: "apply_patch" is handled above (Codex section). + // Note: "web_search", "web_fetch" handled below (OpenClaw). + case "run_terminal_command_v2", "run_terminal_cmd": + return "Bash" + case "read_file_v2": + return "Read" + case "edit_file_v2", "search_replace": + return "Edit" + case "ripgrep_raw_search", "rg": + return "Grep" + case "glob_file_search", "file_search": + return "Glob" + case "task_v2": + return "Task" + case "delete_file": + return "Write" + case "list_dir_v2", "list_dir": + return "Read" + case "read_lints": + return "Read" + case "todo_write", "create_plan", "ask_question", + "switch_mode", "codebase_search", + "semantic_search_full": + return "Tool" + // Amp tools (not already covered above) // Note: "create_file" is also used by Pi. case "create_file": @@ -178,6 +204,11 @@ func NormalizeToolCategory(rawName string) string { if strings.Contains(rawName, "subagent") { return "Task" } + // Cursor vscdb stores MCP tool invocations with an + // "mcp-" prefix (e.g. "mcp-github", "mcp-linear-search"). + if strings.HasPrefix(rawName, "mcp-") { + return "Tool" + } return "Other" } } diff --git a/internal/parser/types.go b/internal/parser/types.go index a8a01d95..4fc7f58e 100644 --- a/internal/parser/types.go +++ b/internal/parser/types.go @@ -135,11 +135,24 @@ var Registry = []AgentDef{ FindSourceFunc: FindOpenHandsSourceFile, }, { - Type: AgentCursor, - DisplayName: "Cursor", - EnvVar: "CURSOR_PROJECTS_DIR", - ConfigKey: "cursor_project_dirs", - DefaultDirs: []string{".cursor/projects"}, + Type: AgentCursor, + DisplayName: "Cursor", + EnvVar: "CURSOR_PROJECTS_DIR", + ConfigKey: "cursor_project_dirs", + // AgentDirs[AgentCursor] holds two kinds of paths: + // the legacy JSONL transcripts root, and the platform + // state.vscdb file. Discovery skips state.vscdb-named + // entries; the sync engine finds the vscdb via + // FindCursorVscdb. Listing all platform vscdb paths + // keeps the registry table-driven the same way + // VSCode Copilot does — only the matching platform + // path will exist at runtime. + DefaultDirs: []string{ + ".cursor/projects", + "Library/Application Support/Cursor/User/globalStorage/state.vscdb", + "AppData/Roaming/Cursor/User/globalStorage/state.vscdb", + ".config/Cursor/User/globalStorage/state.vscdb", + }, IDPrefix: "cursor:", FileBased: true, DiscoverFunc: DiscoverCursorSessions, diff --git a/internal/sync/engine.go b/internal/sync/engine.go index 863ba670..8ce4cf51 100644 --- a/internal/sync/engine.go +++ b/internal/sync/engine.go @@ -95,6 +95,12 @@ type Engine struct { idPrefix string pathRewriter func(string) string emitter Emitter + + // cursorVscdbSynced is the set of "cursor:" session + // IDs synced from vscdb in the current sync cycle. It is + // populated before file workers start and cleared after. + // Read-only during worker execution; no lock needed. + cursorVscdbSynced map[string]bool } // codexExecMigrationKey is the pg_sync_state flag that @@ -543,7 +549,7 @@ func (e *Engine) classifyOnePath( // //agent-transcripts/.{txt,jsonl} // //agent-transcripts//.{txt,jsonl} for _, cursorDir := range e.agentDirs[parser.AgentCursor] { - if cursorDir == "" { + if cursorDir == "" || parser.IsCursorVscdbPath(cursorDir) { continue } if rel, ok := isUnder(cursorDir, path); ok { @@ -1430,11 +1436,48 @@ func (e *Engine) syncAllLocked( }) } + // Sync Cursor vscdb sessions before file workers so that + // file-based cursor sync can skip already-handled IDs. + tCV := time.Now() + cvPending, cvSynced := e.syncCursorVscdb() + e.cursorVscdbSynced = cvSynced + var cvWritten, cvFailed int + for _, pw := range cvPending { + switch err := e.writeSessionFull(pw); { + case err == nil: + cvWritten++ + case errors.Is(err, db.ErrSessionExcluded), + errors.Is(err, errSessionPreserved): + // Intentional skip, not a failure. + default: + cvFailed++ + } + } + if verbose && len(cvPending) > 0 { + log.Printf( + "cursor vscdb write: %d synced, %d failed in %s", + cvWritten, cvFailed, + time.Since(tCV).Round(time.Millisecond), + ) + } + tWorkers := time.Now() results := e.startWorkers(ctx, all) stats := e.collectAndBatch( ctx, results, len(all), onProgress, writeMode, ) + // Clear vscdb synced set after workers complete. + e.cursorVscdbSynced = nil + + // Fold cursor vscdb stats into the combined stats. + if len(cvPending) > 0 { + stats.TotalSessions += len(cvPending) + stats.RecordSynced(cvWritten) + for i := 0; i < cvFailed; i++ { + stats.RecordFailed() + } + } + if verbose { log.Printf( "file sync: %d synced, %d skipped in %s", @@ -1731,6 +1774,119 @@ func (e *Engine) syncOneOpenCode( return pending } +// cursorVscdbPath returns the configured Cursor state.vscdb +// path from the cursor agent's dir slot, or "" when no vscdb +// is configured / available on disk. +func (e *Engine) cursorVscdbPath() string { + return parser.FindCursorVscdb( + e.agentDirs[parser.AgentCursor], + ) +} + +// cursorVscdbHasSession reports whether a Cursor session has +// already been ingested from the global state.vscdb. It +// consults the in-memory set populated during SyncAll first; +// otherwise (SyncPaths, watcher events) it falls back to the +// stored session row's file_path, which is set to the vscdb +// virtual path when the session was synced from vscdb. +func (e *Engine) cursorVscdbHasSession(sessionID string) bool { + if e.cursorVscdbSynced[sessionID] { + return true + } + if e.cursorVscdbPath() == "" { + return false + } + stored := e.db.GetSessionFilePath(sessionID) + return parser.IsCursorVscdbVirtualPath(stored) +} + +// syncCursorVscdb syncs sessions from Cursor's global state.vscdb. +// Returns pending writes and the set of synced session IDs (with +// "cursor:" prefix) so the file-based sync can skip duplicates. +func (e *Engine) syncCursorVscdb() ( + []pendingWrite, map[string]bool, +) { + dbPath := e.cursorVscdbPath() + if dbPath == "" { + return nil, nil + } + + metas, err := parser.ListCursorVscdbSessions(dbPath) + if err != nil { + log.Printf("sync cursor vscdb: %v", err) + return nil, nil + } + if len(metas) == 0 { + return nil, nil + } + + // Build child→parent map from subComposerIds. + childToParent := make(map[string]string) + for _, m := range metas { + for _, childID := range m.SubComposerIDs { + if childID != "" { + childToParent[childID] = m.SessionID + } + } + } + + syncedIDs := make(map[string]bool, len(metas)) + + var changed []parser.CursorVscdbMeta + for _, m := range metas { + _, storedMtime, ok := + e.db.GetFileInfoByPath(m.VirtualPath) + dataVersionCurrent := e.db.GetDataVersionByPath(m.VirtualPath) >= + db.CurrentDataVersion() + if ok && storedMtime == m.FileMtime && dataVersionCurrent { + // Unchanged: still mark as synced to suppress + // file-based sync overwriting with text-only data. + // Also reparse when an agentsview upgrade bumped + // the parser data version — otherwise old vscdb + // sessions stay frozen until Cursor itself bumps + // lastUpdatedAt. + syncedIDs["cursor:"+m.SessionID] = true + continue + } + changed = append(changed, m) + } + + if len(changed) == 0 { + return nil, syncedIDs + } + + var pending []pendingWrite + for _, m := range changed { + sess, msgs, err := parser.ParseCursorVscdbSession( + dbPath, m.SessionID, m.Project, e.machine, + ) + if err != nil { + log.Printf( + "cursor vscdb session %s: %v", + m.SessionID, err, + ) + continue + } + if sess == nil { + continue + } + + // Wire up parent-child relationship. + if parentID, ok := childToParent[m.SessionID]; ok { + sess.ParentSessionID = "cursor:" + parentID + sess.RelationshipType = parser.RelSubagent + } + + syncedIDs["cursor:"+m.SessionID] = true + pending = append(pending, pendingWrite{ + sess: *sess, + msgs: msgs, + }) + } + + return pending, syncedIDs +} + // startWorkers fans out file processing across a worker pool // and returns a channel of results. When ctx is cancelled, // workers skip remaining jobs with a context error instead @@ -2982,6 +3138,15 @@ func (e *Engine) processCursor( sessionID := parser.CursorSessionID(file.Path) + // Skip if already synced from vscdb (richer data source). + // SyncAll populates cursorVscdbSynced inline; SyncPaths, + // SyncSingleSession, and watcher-driven syncs fall back to + // checking the stored session source path so a JSONL change + // does not overwrite the richer vscdb messages. + if e.cursorVscdbHasSession(sessionID) { + return processResult{skip: true} + } + if e.shouldSkipFile(sessionID, info) { return processResult{skip: true} } @@ -3960,6 +4125,17 @@ func (e *Engine) SyncSingleSession(sessionID string) (err error) { } } + // Cursor sessions ingested from state.vscdb store a virtual + // file_path (state.vscdb#) that won't os.Stat. Check for + // that case before FindSourceFile so explicit resync works + // even when no JSONL fallback exists for the session. + if def.Type == parser.AgentCursor { + stored := e.db.GetSessionFilePath(sessionID) + if parser.IsCursorVscdbVirtualPath(stored) { + return e.syncSingleCursorVscdb(sessionID) + } + } + path := e.FindSourceFile(sessionID) if path == "" { return fmt.Errorf( @@ -4003,6 +4179,9 @@ func (e *Engine) SyncSingleSession(sessionID string) (err error) { case parser.AgentCursor: // Support both flat and nested transcript layouts. for _, cursorDir := range e.agentDirs[parser.AgentCursor] { + if parser.IsCursorVscdbPath(cursorDir) { + continue + } rel, ok := isUnder(cursorDir, path) if !ok { continue @@ -4077,6 +4256,83 @@ func (e *Engine) SyncSingleSession(sessionID string) (err error) { return nil } +// syncSingleCursorVscdb re-syncs a single Cursor session from +// the global state.vscdb. Used by SyncSingleSession when the +// stored source path is a vscdb virtual path; without this, +// explicit resync would fail for vscdb-only sessions because +// FindSourceFile cannot map the virtual path back to a real +// file. +func (e *Engine) syncSingleCursorVscdb(sessionID string) error { + dbPath := e.cursorVscdbPath() + if dbPath == "" { + return fmt.Errorf( + "cursor state.vscdb not found in configured paths", + ) + } + rawID := strings.TrimPrefix(sessionID, "cursor:") + + metas, err := parser.ListCursorVscdbSessions(dbPath) + if err != nil { + return fmt.Errorf( + "list cursor vscdb sessions: %w", err, + ) + } + + var meta *parser.CursorVscdbMeta + parentID := "" + for i := range metas { + if metas[i].SessionID == rawID { + meta = &metas[i] + } + // Detect parent: if rawID appears in another meta's + // SubComposerIDs, that meta is its parent. Same scan + // syncCursorVscdb runs in bulk; mirroring it here so + // explicit single-session resync does not clear + // parent_session_id / relationship_type via the + // UpsertSession overwrite. + for _, child := range metas[i].SubComposerIDs { + if child == rawID { + parentID = metas[i].SessionID + } + } + } + if meta == nil { + return fmt.Errorf( + "cursor session %s not found in vscdb", sessionID, + ) + } + + sess, msgs, err := parser.ParseCursorVscdbSession( + dbPath, meta.SessionID, + meta.Project, e.machine, + ) + if err != nil { + return fmt.Errorf( + "parse cursor vscdb session %s: %w", + sessionID, err, + ) + } + if sess == nil { + return nil + } + + if parentID != "" { + sess.ParentSessionID = "cursor:" + parentID + sess.RelationshipType = parser.RelSubagent + } + + if err := e.writeSessionFull( + pendingWrite{sess: *sess, msgs: msgs}, + ); err != nil && !errors.Is(err, db.ErrSessionExcluded) && + !errors.Is(err, errSessionPreserved) { + return fmt.Errorf( + "write cursor vscdb session %s: %w", + sessionID, err, + ) + } + return nil +} + // syncSingleOpenCode re-syncs a single OpenCode session. func (e *Engine) syncSingleOpenCode( sessionID string, diff --git a/internal/sync/engine_integration_test.go b/internal/sync/engine_integration_test.go index 31bd2f44..a441412a 100644 --- a/internal/sync/engine_integration_test.go +++ b/internal/sync/engine_integration_test.go @@ -6029,3 +6029,819 @@ func TestIncrementalSync_ClaudeClearOnlyRepairedOnAppend(t *testing.T) { ) } } + +// --- Cursor vscdb integration tests --- + +// createCursorVscdbHelper creates a minimal Cursor state.vscdb at +// the given path and returns a helper for inserting test data. +func createCursorVscdbHelper( + t *testing.T, dbPath string, +) *cursorVscdbHelper { + t.Helper() + if err := os.MkdirAll(filepath.Dir(dbPath), 0o755); err != nil { + t.Fatalf("mkdir: %v", err) + } + d, err := sql.Open("sqlite3", dbPath) + if err != nil { + t.Fatalf("opening cursor vscdb: %v", err) + } + t.Cleanup(func() { d.Close() }) + + if _, err := d.Exec(` + CREATE TABLE cursorDiskKV ( + key TEXT UNIQUE ON CONFLICT REPLACE, + value BLOB + ) + `); err != nil { + t.Fatalf("creating vscdb schema: %v", err) + } + return &cursorVscdbHelper{db: d, path: dbPath} +} + +type cursorVscdbHelper struct { + db *sql.DB + path string +} + +func (h *cursorVscdbHelper) addSession( + t *testing.T, + sessionID, name string, + createdAt, lastUpdatedAt int64, + bubbles []string, // ordered bubble IDs +) { + t.Helper() + headers := make([]map[string]any, 0, len(bubbles)) + for _, bid := range bubbles { + // type 1 for odd positions, 2 for even (alternating) + btype := 1 + if len(headers)%2 != 0 { + btype = 2 + } + headers = append(headers, map[string]any{ + "bubbleId": bid, + "type": btype, + }) + } + data := map[string]any{ + "composerId": sessionID, + "name": name, + "createdAt": createdAt, + "lastUpdatedAt": lastUpdatedAt, + "fullConversationHeadersOnly": headers, + } + raw, err := json.Marshal(data) + if err != nil { + t.Fatalf("marshal composerData: %v", err) + } + if _, err := h.db.Exec( + "INSERT INTO cursorDiskKV (key, value) VALUES (?, ?)", + "composerData:"+sessionID, raw, + ); err != nil { + t.Fatalf("insert composerData: %v", err) + } +} + +func (h *cursorVscdbHelper) addSessionWithSubComposers( + t *testing.T, + sessionID, name string, + createdAt, lastUpdatedAt int64, + bubbles []string, + subIDs []string, +) { + t.Helper() + headers := make([]map[string]any, 0, len(bubbles)) + for _, bid := range bubbles { + btype := 1 + if len(headers)%2 != 0 { + btype = 2 + } + headers = append(headers, map[string]any{ + "bubbleId": bid, + "type": btype, + }) + } + data := map[string]any{ + "composerId": sessionID, + "name": name, + "createdAt": createdAt, + "lastUpdatedAt": lastUpdatedAt, + "fullConversationHeadersOnly": headers, + "subComposerIds": subIDs, + } + raw, err := json.Marshal(data) + if err != nil { + t.Fatalf("marshal composerData: %v", err) + } + if _, err := h.db.Exec( + "INSERT INTO cursorDiskKV (key, value) VALUES (?, ?)", + "composerData:"+sessionID, raw, + ); err != nil { + t.Fatalf("insert composerData: %v", err) + } +} + +func (h *cursorVscdbHelper) addUserBubble( + t *testing.T, sessionID, bubbleID, text string, +) { + t.Helper() + data := map[string]any{ + "bubbleId": bubbleID, + "type": 1, + "text": text, + "createdAt": "2024-01-01T10:00:00.000Z", + } + raw, _ := json.Marshal(data) + if _, err := h.db.Exec( + "INSERT INTO cursorDiskKV (key, value) VALUES (?, ?)", + "bubbleId:"+sessionID+":"+bubbleID, raw, + ); err != nil { + t.Fatalf("insert user bubble: %v", err) + } +} + +func (h *cursorVscdbHelper) addAssistantBubble( + t *testing.T, sessionID, bubbleID, text string, +) { + t.Helper() + data := map[string]any{ + "bubbleId": bubbleID, + "type": 2, + "text": text, + "createdAt": "2024-01-01T10:00:01.000Z", + } + raw, _ := json.Marshal(data) + if _, err := h.db.Exec( + "INSERT INTO cursorDiskKV (key, value) VALUES (?, ?)", + "bubbleId:"+sessionID+":"+bubbleID, raw, + ); err != nil { + t.Fatalf("insert assistant bubble: %v", err) + } +} + +func (h *cursorVscdbHelper) addToolBubble( + t *testing.T, + sessionID, bubbleID, toolName, callID string, + params []byte, +) { + t.Helper() + data := map[string]any{ + "bubbleId": bubbleID, + "type": 2, + "createdAt": "2024-01-01T10:00:01.000Z", + "toolFormerData": map[string]any{ + "name": toolName, + "toolCallId": callID, + "status": "completed", + "params": json.RawMessage(params), + }, + } + raw, _ := json.Marshal(data) + if _, err := h.db.Exec( + "INSERT INTO cursorDiskKV (key, value) VALUES (?, ?)", + "bubbleId:"+sessionID+":"+bubbleID, raw, + ); err != nil { + t.Fatalf("insert tool bubble: %v", err) + } +} + +func (h *cursorVscdbHelper) updateLastUpdatedAt( + t *testing.T, sessionID string, newTime int64, +) { + t.Helper() + var rawVal []byte + if err := h.db.QueryRow( + "SELECT value FROM cursorDiskKV WHERE key = ?", + "composerData:"+sessionID, + ).Scan(&rawVal); err != nil { + t.Fatalf("read composerData: %v", err) + } + var data map[string]any + if err := json.Unmarshal(rawVal, &data); err != nil { + t.Fatalf("unmarshal composerData: %v", err) + } + data["lastUpdatedAt"] = newTime + raw, _ := json.Marshal(data) + if _, err := h.db.Exec( + "INSERT INTO cursorDiskKV (key, value) VALUES (?, ?)", + "composerData:"+sessionID, raw, + ); err != nil { + t.Fatalf("update composerData: %v", err) + } +} + +// TestSyncCursorVscdbBasic verifies that SyncAll discovers and +// stores Cursor sessions from state.vscdb. +func TestSyncCursorVscdbBasic(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test") + } + + database := dbtest.OpenTestDB(t) + dir := t.TempDir() + dbPath := filepath.Join( + dir, "globalStorage", "state.vscdb", + ) + vscdb := createCursorVscdbHelper(t, dbPath) + + sessionID := "vscdb-session-001" + vscdb.addSession( + t, sessionID, "My Vscdb Session", + 1704067200000, 1704067205000, + []string{"b-user", "b-asst"}, + ) + vscdb.addUserBubble(t, sessionID, "b-user", "What is the answer?") + vscdb.addAssistantBubble(t, sessionID, "b-asst", "42.") + + engine := sync.NewEngine(database, sync.EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentCursor: {t.TempDir(), dbPath}, + }, + Machine: "local", + }) + + stats := engine.SyncAll(context.Background(), nil) + if stats.Synced < 1 { + t.Fatalf("Synced = %d, want >= 1", stats.Synced) + } + + agentviewID := "cursor:" + sessionID + assertSessionState(t, database, agentviewID, + func(sess *db.Session) { + if sess.Agent != "cursor" { + t.Errorf( + "agent = %q, want cursor", + sess.Agent, + ) + } + if sess.Project == "" { + t.Error("expected non-empty project") + } + }, + ) + assertSessionMessageCount(t, database, agentviewID, 2) +} + +// TestSyncCursorVscdbChangeDetection verifies that unchanged +// sessions are not re-parsed but updated ones are. +func TestSyncCursorVscdbChangeDetection(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test") + } + + database := dbtest.OpenTestDB(t) + dir := t.TempDir() + dbPath := filepath.Join( + dir, "globalStorage", "state.vscdb", + ) + vscdb := createCursorVscdbHelper(t, dbPath) + + sessionID := "vscdb-change-001" + vscdb.addSession( + t, sessionID, "Change Test", + 1704067200000, 1704067205000, + []string{"b1", "b2"}, + ) + vscdb.addUserBubble(t, sessionID, "b1", "original question") + vscdb.addAssistantBubble(t, sessionID, "b2", "original answer") + + engine := sync.NewEngine(database, sync.EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentCursor: {t.TempDir(), dbPath}, + }, + Machine: "local", + }) + + // First sync. + stats1 := engine.SyncAll(context.Background(), nil) + if stats1.Synced < 1 { + t.Fatalf("first sync: Synced = %d, want >= 1", stats1.Synced) + } + + // Second sync with no changes: should not re-parse. + stats2 := engine.SyncAll(context.Background(), nil) + if stats2.Synced != 0 { + t.Errorf( + "second sync (no change): Synced = %d, want 0", + stats2.Synced, + ) + } + + // Update lastUpdatedAt and re-sync. + vscdb.updateLastUpdatedAt(t, sessionID, 1704067210000) + stats3 := engine.SyncAll(context.Background(), nil) + if stats3.Synced < 1 { + t.Fatalf( + "third sync (after update): Synced = %d, want >= 1", + stats3.Synced, + ) + } +} + +// TestSyncSingleSessionCursorVscdbOnly verifies that an +// explicit resync works for sessions that exist only in vscdb +// (no discoverable JSONL fallback). Without the dispatch in +// SyncSingleSession, FindSourceFile returns "" because the +// stored virtual path fails os.Stat and no JSONL exists. +func TestSyncSingleSessionCursorVscdbOnly(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test") + } + + database := dbtest.OpenTestDB(t) + dir := t.TempDir() + dbPath := filepath.Join( + dir, "globalStorage", "state.vscdb", + ) + vscdb := createCursorVscdbHelper(t, dbPath) + + sessionID := "vscdb-only-001" + vscdb.addSession( + t, sessionID, "Vscdb Only", + 1704067200000, 1704067205000, + []string{"u1", "a1"}, + ) + vscdb.addUserBubble(t, sessionID, "u1", "first message") + vscdb.addAssistantBubble(t, sessionID, "a1", "first reply") + + engine := sync.NewEngine(database, sync.EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentCursor: {t.TempDir(), dbPath}, + }, + Machine: "local", + }) + + engine.SyncAll(context.Background(), nil) + + avID := "cursor:" + sessionID + stored := database.GetSessionFilePath(avID) + if !parser.IsCursorVscdbVirtualPath(stored) { + t.Fatalf( + "setup: file_path = %q, want vscdb virtual path", + stored, + ) + } + // No JSONL fallback exists — without the SyncSingleSession + // vscdb dispatch, FindSourceFile returns "" and resync + // fails with "source file not found". + if err := engine.SyncSingleSession(avID); err != nil { + t.Fatalf("SyncSingleSession: %v", err) + } + if storedAfter := database.GetSessionFilePath(avID); !parser.IsCursorVscdbVirtualPath( + storedAfter, + ) { + t.Errorf( + "after resync: file_path = %q, want vscdb virtual path", + storedAfter, + ) + } + + // Mutate vscdb by re-inserting composerData with extra + // bubbles + a bumped lastUpdatedAt. cursorDiskKV uses + // UNIQUE ON CONFLICT REPLACE so the new row supersedes. + vscdb.addAssistantBubble(t, sessionID, "a2", "follow-up reply") + vscdb.addSession( + t, sessionID, "Vscdb Only", + 1704067200000, 1704067210000, + []string{"u1", "a1", "u1b", "a2"}, + ) + vscdb.addUserBubble(t, sessionID, "u1b", "follow-up question") + + if err := engine.SyncSingleSession(avID); err != nil { + t.Fatalf("SyncSingleSession after mutation: %v", err) + } + msgs, err := database.GetMessages( + context.Background(), avID, 0, 100, true, + ) + if err != nil { + t.Fatalf("GetMessages: %v", err) + } + foundFollowUp := false + for _, m := range msgs { + if strings.Contains(m.Content, "follow-up question") { + foundFollowUp = true + break + } + } + if !foundFollowUp { + t.Error( + "explicit resync did not pick up the new vscdb bubble", + ) + } +} + +// TestSyncSingleSessionCursorVscdbPreservesParent verifies +// that explicitly resyncing a vscdb subagent child session +// preserves its parent_session_id / relationship_type set by +// the bulk vscdb sync. UpsertSession unconditionally +// overwrites those columns, so the single-session path has to +// re-derive the parent from SubComposerIDs. +func TestSyncSingleSessionCursorVscdbPreservesParent(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test") + } + + database := dbtest.OpenTestDB(t) + dir := t.TempDir() + dbPath := filepath.Join( + dir, "globalStorage", "state.vscdb", + ) + vscdb := createCursorVscdbHelper(t, dbPath) + + parentID := "single-resync-parent" + childID := "single-resync-child" + + vscdb.addSessionWithSubComposers( + t, parentID, "Parent", + 1704067200000, 1704067205000, + []string{"pb1", "pb2"}, + []string{childID}, + ) + vscdb.addUserBubble(t, parentID, "pb1", "parent question") + vscdb.addAssistantBubble(t, parentID, "pb2", "parent answer") + + vscdb.addSession( + t, childID, "Child", + 1704067201000, 1704067206000, + []string{"cb1", "cb2"}, + ) + vscdb.addUserBubble(t, childID, "cb1", "child question") + vscdb.addAssistantBubble(t, childID, "cb2", "child answer") + + engine := sync.NewEngine(database, sync.EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentCursor: {t.TempDir(), dbPath}, + }, + Machine: "local", + }) + + engine.SyncAll(context.Background(), nil) + + parentAv := "cursor:" + parentID + childAv := "cursor:" + childID + + // Resync the child explicitly. Without re-deriving the + // parent, UpsertSession would clear ParentSessionID. + if err := engine.SyncSingleSession(childAv); err != nil { + t.Fatalf("SyncSingleSession child: %v", err) + } + assertSessionState(t, database, childAv, + func(sess *db.Session) { + if sess.ParentSessionID == nil || + *sess.ParentSessionID != parentAv { + got := "" + if sess.ParentSessionID != nil { + got = *sess.ParentSessionID + } + t.Errorf( + "after single resync: ParentSessionID = %q, want %q", + got, parentAv, + ) + } + }, + ) +} + +// TestSyncCursorVscdbReparsesOnDataVersionBump verifies that +// vscdb sessions get re-parsed when the stored data_version +// falls behind db.CurrentDataVersion (e.g., after an agentsview +// upgrade), even though the vscdb meta mtime is unchanged. +func TestSyncCursorVscdbReparsesOnDataVersionBump(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test") + } + + database := dbtest.OpenTestDB(t) + dir := t.TempDir() + dbPath := filepath.Join( + dir, "globalStorage", "state.vscdb", + ) + vscdb := createCursorVscdbHelper(t, dbPath) + + sessionID := "dataversion-001" + vscdb.addSession( + t, sessionID, "Data Version", + 1704067200000, 1704067205000, + []string{"b1", "b2"}, + ) + vscdb.addUserBubble(t, sessionID, "b1", "question") + vscdb.addAssistantBubble(t, sessionID, "b2", "answer") + + engine := sync.NewEngine(database, sync.EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentCursor: {t.TempDir(), dbPath}, + }, + Machine: "local", + }) + + stats1 := engine.SyncAll(context.Background(), nil) + if stats1.Synced < 1 { + t.Fatalf("first sync: Synced = %d, want >= 1", stats1.Synced) + } + + // Simulate an agentsview upgrade by stamping the session + // at an older data version while leaving the vscdb file + // untouched. The next SyncAll must re-parse it. + avID := "cursor:" + sessionID + if err := database.SetSessionDataVersion( + avID, db.CurrentDataVersion()-1, + ); err != nil { + t.Fatalf("SetSessionDataVersion: %v", err) + } + + stats2 := engine.SyncAll(context.Background(), nil) + if stats2.Synced < 1 { + t.Errorf( + "second sync after data_version bump: "+ + "Synced = %d, want >= 1", + stats2.Synced, + ) + } + if got := database.GetSessionDataVersion(avID); got != + db.CurrentDataVersion() { + t.Errorf( + "after reparse: data_version = %d, want %d", + got, db.CurrentDataVersion(), + ) + } +} + +// TestSyncCursorVscdbDedup verifies that sessions present in +// vscdb are not overwritten by the file-based cursor sync. +func TestSyncCursorVscdbDedup(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test") + } + + database := dbtest.OpenTestDB(t) + dir := t.TempDir() + dbPath := filepath.Join( + dir, "globalStorage", "state.vscdb", + ) + vscdb := createCursorVscdbHelper(t, dbPath) + + sessionID := "dedup-session-001" + vscdb.addSession( + t, sessionID, "Dedup Test", + 1704067200000, 1704067205000, + []string{"b-user", "b-tool", "b-asst"}, + ) + vscdb.addUserBubble(t, sessionID, "b-user", "Do something") + vscdb.addToolBubble( + t, sessionID, "b-tool", + "read_file_v2", "call-1", + []byte(`{"path":"/foo.txt"}`), + ) + vscdb.addAssistantBubble(t, sessionID, "b-asst", "Done.") + + // Create a cursor projects directory with a JSONL file + // for the same session (text-only, no tool calls). + cursorDir := t.TempDir() + jsonlDir := filepath.Join( + cursorDir, "myproject", "agent-transcripts", + sessionID, + ) + if err := os.MkdirAll(jsonlDir, 0o755); err != nil { + t.Fatalf("mkdir: %v", err) + } + // Minimal JSONL with only text (no tool calls). + jsonlContent := `{"role":"user","message":{"content":[{"type":"text","text":"file-based text only"}]}}` + "\n" + if err := os.WriteFile( + filepath.Join(jsonlDir, sessionID+".jsonl"), + []byte(jsonlContent), 0o644, + ); err != nil { + t.Fatalf("write jsonl: %v", err) + } + + engine := sync.NewEngine(database, sync.EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentCursor: {cursorDir, dbPath}, + }, + Machine: "local", + }) + + engine.SyncAll(context.Background(), nil) + + agentviewID := "cursor:" + sessionID + // Session should be present (from vscdb). + assertSessionState(t, database, agentviewID, + func(sess *db.Session) { + if sess.Agent != "cursor" { + t.Errorf("agent = %q, want cursor", sess.Agent) + } + }, + ) + + // Should have messages with tool use from vscdb. + // The file-based JSONL should NOT have replaced the vscdb data. + msgs, err := database.GetMessages( + context.Background(), agentviewID, 0, 100, true, + ) + if err != nil { + t.Fatalf("GetMessages: %v", err) + } + hasToolUse := false + for _, m := range msgs { + if m.HasToolUse { + hasToolUse = true + break + } + } + if !hasToolUse { + t.Error( + "expected vscdb data (with tool call) to win over " + + "file-based text-only JSONL", + ) + } +} + +// TestSyncPathsCursorVscdbDedup verifies that processCursor +// skips JSONL transcripts during SyncPaths/watcher events when +// the session is already populated from vscdb. Without the +// fallback dedup, a JSONL parse would overwrite the stored +// session metadata (file_path, file_size, file_hash) and lose +// the tie-back to the vscdb virtual path that syncCursorVscdb +// uses for change detection. SyncSingleSession exercises the +// same processCursor path and uses ReplaceSessionMessages, so +// it surfaces both halves of the regression cleanly. +func TestSyncPathsCursorVscdbDedup(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test") + } + + database := dbtest.OpenTestDB(t) + dir := t.TempDir() + dbPath := filepath.Join( + dir, "globalStorage", "state.vscdb", + ) + vscdb := createCursorVscdbHelper(t, dbPath) + + sessionID := "syncpaths-dedup-001" + vscdb.addSession( + t, sessionID, "SyncPaths Dedup", + 1704067200000, 1704067205000, + []string{"b-user", "b-tool", "b-asst"}, + ) + vscdb.addUserBubble(t, sessionID, "b-user", "Do something") + vscdb.addToolBubble( + t, sessionID, "b-tool", + "read_file_v2", "call-1", + []byte(`{"path":"/foo.txt"}`), + ) + vscdb.addAssistantBubble(t, sessionID, "b-asst", "Done.") + + cursorDir := t.TempDir() + jsonlDir := filepath.Join( + cursorDir, "myproject", "agent-transcripts", sessionID, + ) + if err := os.MkdirAll(jsonlDir, 0o755); err != nil { + t.Fatalf("mkdir: %v", err) + } + jsonlPath := filepath.Join(jsonlDir, sessionID+".jsonl") + jsonlContent := `{"role":"user","message":{"content":[{"type":"text","text":"file-based text only"}]}}` + "\n" + if err := os.WriteFile( + jsonlPath, []byte(jsonlContent), 0o644, + ); err != nil { + t.Fatalf("write jsonl: %v", err) + } + + engine := sync.NewEngine(database, sync.EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentCursor: {cursorDir, dbPath}, + }, + Machine: "local", + }) + + // SyncAll first to populate the DB from vscdb. + engine.SyncAll(context.Background(), nil) + + agentviewID := "cursor:" + sessionID + storedBefore := database.GetSessionFilePath(agentviewID) + if !parser.IsCursorVscdbVirtualPath(storedBefore) { + t.Fatalf( + "setup: stored file_path = %q, want vscdb virtual path", + storedBefore, + ) + } + + // Force a re-parse of this single session via the API + // path that powers /sessions/:id/resync. SyncSingleSession + // uses writeSessionFull → ReplaceSessionMessages, so it + // would silently overwrite the vscdb-backed messages and + // file_path if processCursor accepted the JSONL. + if err := engine.SyncSingleSession(agentviewID); err != nil { + t.Fatalf("SyncSingleSession: %v", err) + } + + storedAfter := database.GetSessionFilePath(agentviewID) + if !parser.IsCursorVscdbVirtualPath(storedAfter) { + t.Errorf( + "after re-parse: file_path = %q, want vscdb virtual path", + storedAfter, + ) + } + + msgs, err := database.GetMessages( + context.Background(), agentviewID, 0, 100, true, + ) + if err != nil { + t.Fatalf("GetMessages: %v", err) + } + hasToolUse := false + for _, m := range msgs { + if m.HasToolUse { + hasToolUse = true + break + } + } + if !hasToolUse { + t.Error( + "vscdb tool use lost after re-parse; JSONL overwrote " + + "richer vscdb messages", + ) + } + + // Drive the watcher path too: SyncPaths must also dedup, + // even though writeBatch's incremental message append makes + // the message-loss surface narrower than SyncSingleSession. + engine.SyncPaths([]string{jsonlPath}) + storedAfterWatch := database.GetSessionFilePath(agentviewID) + if !parser.IsCursorVscdbVirtualPath(storedAfterWatch) { + t.Errorf( + "after SyncPaths: file_path = %q, want vscdb virtual path", + storedAfterWatch, + ) + } +} + +// TestSyncCursorVscdbSubagentLinking verifies that sessions +// with subComposerIds get parent-child relationships set. +func TestSyncCursorVscdbSubagentLinking(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test") + } + + database := dbtest.OpenTestDB(t) + dir := t.TempDir() + dbPath := filepath.Join( + dir, "globalStorage", "state.vscdb", + ) + vscdb := createCursorVscdbHelper(t, dbPath) + + parentID := "parent-session-001" + childID := "child-session-001" + + vscdb.addSessionWithSubComposers( + t, parentID, "Parent Session", + 1704067200000, 1704067205000, + []string{"pb1", "pb2"}, + []string{childID}, + ) + vscdb.addUserBubble(t, parentID, "pb1", "parent question") + vscdb.addAssistantBubble(t, parentID, "pb2", "parent answer") + + vscdb.addSession( + t, childID, "Child Session", + 1704067201000, 1704067206000, + []string{"cb1", "cb2"}, + ) + vscdb.addUserBubble(t, childID, "cb1", "child question") + vscdb.addAssistantBubble(t, childID, "cb2", "child answer") + + engine := sync.NewEngine(database, sync.EngineConfig{ + AgentDirs: map[parser.AgentType][]string{ + parser.AgentCursor: {t.TempDir(), dbPath}, + }, + Machine: "local", + }) + + engine.SyncAll(context.Background(), nil) + + parentAvID := "cursor:" + parentID + childAvID := "cursor:" + childID + + assertSessionState(t, database, parentAvID, + func(sess *db.Session) { + if sess.ParentSessionID != nil { + t.Errorf( + "parent: ParentSessionID = %q, want nil", + *sess.ParentSessionID, + ) + } + }, + ) + assertSessionState(t, database, childAvID, + func(sess *db.Session) { + if sess.ParentSessionID == nil || + *sess.ParentSessionID != parentAvID { + got := "" + if sess.ParentSessionID != nil { + got = *sess.ParentSessionID + } + t.Errorf( + "child: ParentSessionID = %q, want %q", + got, parentAvID, + ) + } + }, + ) +}