|
14 | 14 | import sys |
15 | 15 | import unicodedata |
16 | 16 | from pathlib import Path |
| 17 | +from typing import Any |
17 | 18 |
|
18 | | -from ..types import SDKSessionInfo |
| 19 | +from ..types import SDKSessionInfo, SessionMessage |
19 | 20 |
|
20 | 21 | # --------------------------------------------------------------------------- |
21 | 22 | # Constants |
@@ -631,3 +632,295 @@ def list_sessions( |
631 | 632 | if directory: |
632 | 633 | return _list_sessions_for_project(directory, limit, include_worktrees) |
633 | 634 | return _list_all_sessions(limit) |
| 635 | + |
| 636 | + |
| 637 | +# --------------------------------------------------------------------------- |
| 638 | +# get_session_messages — full transcript reconstruction |
| 639 | +# --------------------------------------------------------------------------- |
| 640 | + |
| 641 | +# Transcript entry types that carry uuid + parentUuid chain links. |
| 642 | +_TRANSCRIPT_ENTRY_TYPES = frozenset( |
| 643 | + {"user", "assistant", "progress", "system", "attachment"} |
| 644 | +) |
| 645 | + |
| 646 | +# Internal type for parsed JSONL transcript entries — mirrors the TS |
| 647 | +# TranscriptEntry type but as a loose dict (fields: type, uuid, parentUuid, |
| 648 | +# sessionId, message, isSidechain, isMeta, isCompactSummary, teamName). |
| 649 | +_TranscriptEntry = dict[str, Any] |
| 650 | + |
| 651 | + |
| 652 | +def _try_read_session_file(project_dir: Path, file_name: str) -> str | None: |
| 653 | + """Tries to read a session JSONL file from a project directory.""" |
| 654 | + try: |
| 655 | + return (project_dir / file_name).read_text(encoding="utf-8") |
| 656 | + except OSError: |
| 657 | + return None |
| 658 | + |
| 659 | + |
| 660 | +def _read_session_file(session_id: str, directory: str | None) -> str | None: |
| 661 | + """Finds and reads the session JSONL file. |
| 662 | +
|
| 663 | + If directory is provided, looks in that project directory and its git |
| 664 | + worktrees (with prefix-fallback for Bun/Node hash mismatches on long |
| 665 | + paths). Otherwise, searches all project directories. |
| 666 | +
|
| 667 | + Returns the file content, or None if not found. |
| 668 | + """ |
| 669 | + file_name = f"{session_id}.jsonl" |
| 670 | + |
| 671 | + if directory: |
| 672 | + canonical_dir = _canonicalize_path(directory) |
| 673 | + |
| 674 | + # Try the exact/prefix-matched project directory first |
| 675 | + project_dir = _find_project_dir(canonical_dir) |
| 676 | + if project_dir is not None: |
| 677 | + content = _try_read_session_file(project_dir, file_name) |
| 678 | + if content: |
| 679 | + return content |
| 680 | + |
| 681 | + # Try worktree paths — sessions may live under a different worktree root |
| 682 | + try: |
| 683 | + worktree_paths = _get_worktree_paths(canonical_dir) |
| 684 | + except Exception: |
| 685 | + worktree_paths = [] |
| 686 | + |
| 687 | + for wt in worktree_paths: |
| 688 | + if wt == canonical_dir: |
| 689 | + continue # already tried above |
| 690 | + wt_project_dir = _find_project_dir(wt) |
| 691 | + if wt_project_dir is not None: |
| 692 | + content = _try_read_session_file(wt_project_dir, file_name) |
| 693 | + if content: |
| 694 | + return content |
| 695 | + |
| 696 | + return None |
| 697 | + |
| 698 | + # No directory provided — search all project directories |
| 699 | + projects_dir = _get_projects_dir() |
| 700 | + try: |
| 701 | + dirents = list(projects_dir.iterdir()) |
| 702 | + except OSError: |
| 703 | + return None |
| 704 | + |
| 705 | + for entry in dirents: |
| 706 | + content = _try_read_session_file(entry, file_name) |
| 707 | + if content: |
| 708 | + return content |
| 709 | + |
| 710 | + return None |
| 711 | + |
| 712 | + |
| 713 | +def _parse_transcript_entries(content: str) -> list[_TranscriptEntry]: |
| 714 | + """Parses JSONL content into transcript entries. |
| 715 | +
|
| 716 | + Only keeps entries that have a uuid and are transcript message types |
| 717 | + (user/assistant/progress/system/attachment). Skips corrupt lines. |
| 718 | + """ |
| 719 | + entries: list[_TranscriptEntry] = [] |
| 720 | + start = 0 |
| 721 | + length = len(content) |
| 722 | + |
| 723 | + while start < length: |
| 724 | + end = content.find("\n", start) |
| 725 | + if end == -1: |
| 726 | + end = length |
| 727 | + |
| 728 | + line = content[start:end].strip() |
| 729 | + start = end + 1 |
| 730 | + if not line: |
| 731 | + continue |
| 732 | + |
| 733 | + try: |
| 734 | + entry = json.loads(line) |
| 735 | + except (json.JSONDecodeError, ValueError): |
| 736 | + continue |
| 737 | + |
| 738 | + if not isinstance(entry, dict): |
| 739 | + continue |
| 740 | + entry_type = entry.get("type") |
| 741 | + if entry_type in _TRANSCRIPT_ENTRY_TYPES and isinstance(entry.get("uuid"), str): |
| 742 | + entries.append(entry) |
| 743 | + |
| 744 | + return entries |
| 745 | + |
| 746 | + |
| 747 | +def _build_conversation_chain( |
| 748 | + entries: list[_TranscriptEntry], |
| 749 | +) -> list[_TranscriptEntry]: |
| 750 | + """Builds the conversation chain by finding the leaf and walking parentUuid. |
| 751 | +
|
| 752 | + Returns messages in chronological order (root → leaf). |
| 753 | +
|
| 754 | + Note: logicalParentUuid (set on compact_boundary entries) is intentionally |
| 755 | + NOT followed. This matches VS Code IDE behavior — post-compaction, the |
| 756 | + isCompactSummary message replaces earlier messages, so following logical |
| 757 | + parents would duplicate content. |
| 758 | + """ |
| 759 | + if not entries: |
| 760 | + return [] |
| 761 | + |
| 762 | + # Index by uuid for O(1) parent lookup |
| 763 | + by_uuid: dict[str, _TranscriptEntry] = {} |
| 764 | + for entry in entries: |
| 765 | + by_uuid[entry["uuid"]] = entry |
| 766 | + |
| 767 | + # Build index of entry positions (file order) for tie-breaking |
| 768 | + entry_index: dict[str, int] = {} |
| 769 | + for i, entry in enumerate(entries): |
| 770 | + entry_index[entry["uuid"]] = i |
| 771 | + |
| 772 | + # Find terminal messages (no children point to them via parentUuid) |
| 773 | + parent_uuids: set[str] = set() |
| 774 | + for entry in entries: |
| 775 | + parent = entry.get("parentUuid") |
| 776 | + if parent: |
| 777 | + parent_uuids.add(parent) |
| 778 | + |
| 779 | + terminals = [e for e in entries if e["uuid"] not in parent_uuids] |
| 780 | + |
| 781 | + # From each terminal, walk back to find the nearest user/assistant leaf |
| 782 | + leaves: list[_TranscriptEntry] = [] |
| 783 | + for terminal in terminals: |
| 784 | + walk_cur: _TranscriptEntry | None = terminal |
| 785 | + walk_seen: set[str] = set() |
| 786 | + while walk_cur is not None: |
| 787 | + uid = walk_cur["uuid"] |
| 788 | + if uid in walk_seen: |
| 789 | + break |
| 790 | + walk_seen.add(uid) |
| 791 | + if walk_cur.get("type") in ("user", "assistant"): |
| 792 | + leaves.append(walk_cur) |
| 793 | + break |
| 794 | + parent = walk_cur.get("parentUuid") |
| 795 | + walk_cur = by_uuid.get(parent) if parent else None |
| 796 | + |
| 797 | + if not leaves: |
| 798 | + return [] |
| 799 | + |
| 800 | + # Pick the leaf from the main chain (not sidechain/team/meta), preferring |
| 801 | + # the highest position in the entries array (most recent in file) |
| 802 | + main_leaves = [ |
| 803 | + leaf |
| 804 | + for leaf in leaves |
| 805 | + if not leaf.get("isSidechain") |
| 806 | + and not leaf.get("teamName") |
| 807 | + and not leaf.get("isMeta") |
| 808 | + ] |
| 809 | + |
| 810 | + def _pick_best(candidates: list[_TranscriptEntry]) -> _TranscriptEntry: |
| 811 | + best = candidates[0] |
| 812 | + best_idx = entry_index.get(best["uuid"], -1) |
| 813 | + for cur in candidates[1:]: |
| 814 | + cur_idx = entry_index.get(cur["uuid"], -1) |
| 815 | + if cur_idx > best_idx: |
| 816 | + best = cur |
| 817 | + best_idx = cur_idx |
| 818 | + return best |
| 819 | + |
| 820 | + leaf = _pick_best(main_leaves) if main_leaves else _pick_best(leaves) |
| 821 | + |
| 822 | + # Walk from leaf to root via parentUuid |
| 823 | + chain: list[_TranscriptEntry] = [] |
| 824 | + chain_seen: set[str] = set() |
| 825 | + chain_cur: _TranscriptEntry | None = leaf |
| 826 | + while chain_cur is not None: |
| 827 | + uid = chain_cur["uuid"] |
| 828 | + if uid in chain_seen: |
| 829 | + break |
| 830 | + chain_seen.add(uid) |
| 831 | + chain.append(chain_cur) |
| 832 | + parent = chain_cur.get("parentUuid") |
| 833 | + chain_cur = by_uuid.get(parent) if parent else None |
| 834 | + |
| 835 | + chain.reverse() |
| 836 | + return chain |
| 837 | + |
| 838 | + |
| 839 | +def _is_visible_message(entry: _TranscriptEntry) -> bool: |
| 840 | + """Returns True if the entry should be included in the returned messages.""" |
| 841 | + entry_type = entry.get("type") |
| 842 | + if entry_type != "user" and entry_type != "assistant": |
| 843 | + return False |
| 844 | + if entry.get("isMeta"): |
| 845 | + return False |
| 846 | + if entry.get("isSidechain"): |
| 847 | + return False |
| 848 | + # Note: isCompactSummary messages are intentionally included. They contain |
| 849 | + # the summarized content from compacted conversations and are the only |
| 850 | + # representation of that content post-compaction. This matches VS Code IDE |
| 851 | + # behavior (transcriptToSessionMessage does not filter them). |
| 852 | + return not entry.get("teamName") |
| 853 | + |
| 854 | + |
| 855 | +def _to_session_message(entry: _TranscriptEntry) -> SessionMessage: |
| 856 | + """Converts a transcript entry dict into a SessionMessage.""" |
| 857 | + entry_type = entry.get("type") |
| 858 | + # Narrow to the Literal type — _is_visible_message already guarantees |
| 859 | + # this is "user" or "assistant". |
| 860 | + msg_type: str = "user" if entry_type == "user" else "assistant" |
| 861 | + return SessionMessage( |
| 862 | + type=msg_type, # type: ignore[arg-type] |
| 863 | + uuid=entry.get("uuid", ""), |
| 864 | + session_id=entry.get("sessionId", ""), |
| 865 | + message=entry.get("message"), |
| 866 | + parent_tool_use_id=None, |
| 867 | + ) |
| 868 | + |
| 869 | + |
| 870 | +def get_session_messages( |
| 871 | + session_id: str, |
| 872 | + directory: str | None = None, |
| 873 | + limit: int | None = None, |
| 874 | + offset: int = 0, |
| 875 | +) -> list[SessionMessage]: |
| 876 | + """Reads a session's conversation messages from its JSONL transcript file. |
| 877 | +
|
| 878 | + Parses the full JSONL, builds the conversation chain via ``parentUuid`` |
| 879 | + links, and returns user/assistant messages in chronological order. |
| 880 | +
|
| 881 | + Args: |
| 882 | + session_id: UUID of the session to read. |
| 883 | + directory: Project directory to find the session in. If omitted, |
| 884 | + searches all project directories under ``~/.claude/projects/``. |
| 885 | + limit: Maximum number of messages to return. |
| 886 | + offset: Number of messages to skip from the start. |
| 887 | +
|
| 888 | + Returns: |
| 889 | + List of ``SessionMessage`` objects in chronological order. Returns |
| 890 | + an empty list if the session is not found, the session_id is not a |
| 891 | + valid UUID, or the transcript contains no visible messages. |
| 892 | +
|
| 893 | + Example: |
| 894 | + Read all messages from a session:: |
| 895 | +
|
| 896 | + messages = get_session_messages( |
| 897 | + "550e8400-e29b-41d4-a716-446655440000", |
| 898 | + directory="/path/to/project", |
| 899 | + ) |
| 900 | + for msg in messages: |
| 901 | + print(msg.type, msg.message) |
| 902 | +
|
| 903 | + Read with pagination:: |
| 904 | +
|
| 905 | + page = get_session_messages( |
| 906 | + session_id, limit=10, offset=20 |
| 907 | + ) |
| 908 | + """ |
| 909 | + if not _validate_uuid(session_id): |
| 910 | + return [] |
| 911 | + |
| 912 | + content = _read_session_file(session_id, directory) |
| 913 | + if not content: |
| 914 | + return [] |
| 915 | + |
| 916 | + entries = _parse_transcript_entries(content) |
| 917 | + chain = _build_conversation_chain(entries) |
| 918 | + visible = [e for e in chain if _is_visible_message(e)] |
| 919 | + messages = [_to_session_message(e) for e in visible] |
| 920 | + |
| 921 | + # Apply offset and limit |
| 922 | + if limit is not None and limit > 0: |
| 923 | + return messages[offset : offset + limit] |
| 924 | + if offset > 0: |
| 925 | + return messages[offset:] |
| 926 | + return messages |
0 commit comments