Skip to content

Commit aa537fa

Browse files
committed
feat: add get_session_messages top-level function
Ports getSessionMessages() from the TypeScript SDK. Reads a single session JSONL file fully, reconstructs the conversation chain via parentUuid links, and returns user/assistant messages in chronological order. - Add SessionMessage dataclass to types.py (type, uuid, session_id, message, parent_tool_use_id) - Add get_session_messages() to _internal/sessions.py, reusing existing path sanitization, config dir resolution, and worktree detection helpers from list_sessions - Chain algorithm: find terminal entries (no children), walk each back to nearest user/assistant leaf, pick main-chain leaf with highest file position, then walk leaf→root via parentUuid and reverse - Filters: isMeta, isSidechain, teamName; keeps isCompactSummary (matches VS Code IDE behavior post-compaction) - Supports limit/offset pagination - Returns empty list for invalid UUID or missing session (no raise) - 21 new tests covering chain building, filtering, pagination, cycles
1 parent 12610e0 commit aa537fa

4 files changed

Lines changed: 821 additions & 3 deletions

File tree

src/claude_agent_sdk/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
CLINotFoundError,
1414
ProcessError,
1515
)
16-
from ._internal.sessions import list_sessions
16+
from ._internal.sessions import get_session_messages, list_sessions
1717
from ._internal.transport import Transport
1818
from ._version import __version__
1919
from .client import ClaudeSDKClient
@@ -54,6 +54,7 @@
5454
SdkBeta,
5555
SdkPluginConfig,
5656
SDKSessionInfo,
57+
SessionMessage,
5758
SettingSource,
5859
StopHookInput,
5960
SubagentStartHookInput,
@@ -382,7 +383,9 @@ async def call_tool(name: str, arguments: dict[str, Any]) -> Any:
382383
"SdkPluginConfig",
383384
# Session listing
384385
"list_sessions",
386+
"get_session_messages",
385387
"SDKSessionInfo",
388+
"SessionMessage",
386389
# Beta support
387390
"SdkBeta",
388391
# Sandbox support

src/claude_agent_sdk/_internal/sessions.py

Lines changed: 294 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,9 @@
1414
import sys
1515
import unicodedata
1616
from pathlib import Path
17+
from typing import Any
1718

18-
from ..types import SDKSessionInfo
19+
from ..types import SDKSessionInfo, SessionMessage
1920

2021
# ---------------------------------------------------------------------------
2122
# Constants
@@ -631,3 +632,295 @@ def list_sessions(
631632
if directory:
632633
return _list_sessions_for_project(directory, limit, include_worktrees)
633634
return _list_all_sessions(limit)
635+
636+
637+
# ---------------------------------------------------------------------------
638+
# get_session_messages — full transcript reconstruction
639+
# ---------------------------------------------------------------------------
640+
641+
# Transcript entry types that carry uuid + parentUuid chain links.
642+
_TRANSCRIPT_ENTRY_TYPES = frozenset(
643+
{"user", "assistant", "progress", "system", "attachment"}
644+
)
645+
646+
# Internal type for parsed JSONL transcript entries — mirrors the TS
647+
# TranscriptEntry type but as a loose dict (fields: type, uuid, parentUuid,
648+
# sessionId, message, isSidechain, isMeta, isCompactSummary, teamName).
649+
_TranscriptEntry = dict[str, Any]
650+
651+
652+
def _try_read_session_file(project_dir: Path, file_name: str) -> str | None:
653+
"""Tries to read a session JSONL file from a project directory."""
654+
try:
655+
return (project_dir / file_name).read_text(encoding="utf-8")
656+
except OSError:
657+
return None
658+
659+
660+
def _read_session_file(session_id: str, directory: str | None) -> str | None:
661+
"""Finds and reads the session JSONL file.
662+
663+
If directory is provided, looks in that project directory and its git
664+
worktrees (with prefix-fallback for Bun/Node hash mismatches on long
665+
paths). Otherwise, searches all project directories.
666+
667+
Returns the file content, or None if not found.
668+
"""
669+
file_name = f"{session_id}.jsonl"
670+
671+
if directory:
672+
canonical_dir = _canonicalize_path(directory)
673+
674+
# Try the exact/prefix-matched project directory first
675+
project_dir = _find_project_dir(canonical_dir)
676+
if project_dir is not None:
677+
content = _try_read_session_file(project_dir, file_name)
678+
if content:
679+
return content
680+
681+
# Try worktree paths — sessions may live under a different worktree root
682+
try:
683+
worktree_paths = _get_worktree_paths(canonical_dir)
684+
except Exception:
685+
worktree_paths = []
686+
687+
for wt in worktree_paths:
688+
if wt == canonical_dir:
689+
continue # already tried above
690+
wt_project_dir = _find_project_dir(wt)
691+
if wt_project_dir is not None:
692+
content = _try_read_session_file(wt_project_dir, file_name)
693+
if content:
694+
return content
695+
696+
return None
697+
698+
# No directory provided — search all project directories
699+
projects_dir = _get_projects_dir()
700+
try:
701+
dirents = list(projects_dir.iterdir())
702+
except OSError:
703+
return None
704+
705+
for entry in dirents:
706+
content = _try_read_session_file(entry, file_name)
707+
if content:
708+
return content
709+
710+
return None
711+
712+
713+
def _parse_transcript_entries(content: str) -> list[_TranscriptEntry]:
714+
"""Parses JSONL content into transcript entries.
715+
716+
Only keeps entries that have a uuid and are transcript message types
717+
(user/assistant/progress/system/attachment). Skips corrupt lines.
718+
"""
719+
entries: list[_TranscriptEntry] = []
720+
start = 0
721+
length = len(content)
722+
723+
while start < length:
724+
end = content.find("\n", start)
725+
if end == -1:
726+
end = length
727+
728+
line = content[start:end].strip()
729+
start = end + 1
730+
if not line:
731+
continue
732+
733+
try:
734+
entry = json.loads(line)
735+
except (json.JSONDecodeError, ValueError):
736+
continue
737+
738+
if not isinstance(entry, dict):
739+
continue
740+
entry_type = entry.get("type")
741+
if entry_type in _TRANSCRIPT_ENTRY_TYPES and isinstance(entry.get("uuid"), str):
742+
entries.append(entry)
743+
744+
return entries
745+
746+
747+
def _build_conversation_chain(
748+
entries: list[_TranscriptEntry],
749+
) -> list[_TranscriptEntry]:
750+
"""Builds the conversation chain by finding the leaf and walking parentUuid.
751+
752+
Returns messages in chronological order (root → leaf).
753+
754+
Note: logicalParentUuid (set on compact_boundary entries) is intentionally
755+
NOT followed. This matches VS Code IDE behavior — post-compaction, the
756+
isCompactSummary message replaces earlier messages, so following logical
757+
parents would duplicate content.
758+
"""
759+
if not entries:
760+
return []
761+
762+
# Index by uuid for O(1) parent lookup
763+
by_uuid: dict[str, _TranscriptEntry] = {}
764+
for entry in entries:
765+
by_uuid[entry["uuid"]] = entry
766+
767+
# Build index of entry positions (file order) for tie-breaking
768+
entry_index: dict[str, int] = {}
769+
for i, entry in enumerate(entries):
770+
entry_index[entry["uuid"]] = i
771+
772+
# Find terminal messages (no children point to them via parentUuid)
773+
parent_uuids: set[str] = set()
774+
for entry in entries:
775+
parent = entry.get("parentUuid")
776+
if parent:
777+
parent_uuids.add(parent)
778+
779+
terminals = [e for e in entries if e["uuid"] not in parent_uuids]
780+
781+
# From each terminal, walk back to find the nearest user/assistant leaf
782+
leaves: list[_TranscriptEntry] = []
783+
for terminal in terminals:
784+
walk_cur: _TranscriptEntry | None = terminal
785+
walk_seen: set[str] = set()
786+
while walk_cur is not None:
787+
uid = walk_cur["uuid"]
788+
if uid in walk_seen:
789+
break
790+
walk_seen.add(uid)
791+
if walk_cur.get("type") in ("user", "assistant"):
792+
leaves.append(walk_cur)
793+
break
794+
parent = walk_cur.get("parentUuid")
795+
walk_cur = by_uuid.get(parent) if parent else None
796+
797+
if not leaves:
798+
return []
799+
800+
# Pick the leaf from the main chain (not sidechain/team/meta), preferring
801+
# the highest position in the entries array (most recent in file)
802+
main_leaves = [
803+
leaf
804+
for leaf in leaves
805+
if not leaf.get("isSidechain")
806+
and not leaf.get("teamName")
807+
and not leaf.get("isMeta")
808+
]
809+
810+
def _pick_best(candidates: list[_TranscriptEntry]) -> _TranscriptEntry:
811+
best = candidates[0]
812+
best_idx = entry_index.get(best["uuid"], -1)
813+
for cur in candidates[1:]:
814+
cur_idx = entry_index.get(cur["uuid"], -1)
815+
if cur_idx > best_idx:
816+
best = cur
817+
best_idx = cur_idx
818+
return best
819+
820+
leaf = _pick_best(main_leaves) if main_leaves else _pick_best(leaves)
821+
822+
# Walk from leaf to root via parentUuid
823+
chain: list[_TranscriptEntry] = []
824+
chain_seen: set[str] = set()
825+
chain_cur: _TranscriptEntry | None = leaf
826+
while chain_cur is not None:
827+
uid = chain_cur["uuid"]
828+
if uid in chain_seen:
829+
break
830+
chain_seen.add(uid)
831+
chain.append(chain_cur)
832+
parent = chain_cur.get("parentUuid")
833+
chain_cur = by_uuid.get(parent) if parent else None
834+
835+
chain.reverse()
836+
return chain
837+
838+
839+
def _is_visible_message(entry: _TranscriptEntry) -> bool:
840+
"""Returns True if the entry should be included in the returned messages."""
841+
entry_type = entry.get("type")
842+
if entry_type != "user" and entry_type != "assistant":
843+
return False
844+
if entry.get("isMeta"):
845+
return False
846+
if entry.get("isSidechain"):
847+
return False
848+
# Note: isCompactSummary messages are intentionally included. They contain
849+
# the summarized content from compacted conversations and are the only
850+
# representation of that content post-compaction. This matches VS Code IDE
851+
# behavior (transcriptToSessionMessage does not filter them).
852+
return not entry.get("teamName")
853+
854+
855+
def _to_session_message(entry: _TranscriptEntry) -> SessionMessage:
856+
"""Converts a transcript entry dict into a SessionMessage."""
857+
entry_type = entry.get("type")
858+
# Narrow to the Literal type — _is_visible_message already guarantees
859+
# this is "user" or "assistant".
860+
msg_type: str = "user" if entry_type == "user" else "assistant"
861+
return SessionMessage(
862+
type=msg_type, # type: ignore[arg-type]
863+
uuid=entry.get("uuid", ""),
864+
session_id=entry.get("sessionId", ""),
865+
message=entry.get("message"),
866+
parent_tool_use_id=None,
867+
)
868+
869+
870+
def get_session_messages(
871+
session_id: str,
872+
directory: str | None = None,
873+
limit: int | None = None,
874+
offset: int = 0,
875+
) -> list[SessionMessage]:
876+
"""Reads a session's conversation messages from its JSONL transcript file.
877+
878+
Parses the full JSONL, builds the conversation chain via ``parentUuid``
879+
links, and returns user/assistant messages in chronological order.
880+
881+
Args:
882+
session_id: UUID of the session to read.
883+
directory: Project directory to find the session in. If omitted,
884+
searches all project directories under ``~/.claude/projects/``.
885+
limit: Maximum number of messages to return.
886+
offset: Number of messages to skip from the start.
887+
888+
Returns:
889+
List of ``SessionMessage`` objects in chronological order. Returns
890+
an empty list if the session is not found, the session_id is not a
891+
valid UUID, or the transcript contains no visible messages.
892+
893+
Example:
894+
Read all messages from a session::
895+
896+
messages = get_session_messages(
897+
"550e8400-e29b-41d4-a716-446655440000",
898+
directory="/path/to/project",
899+
)
900+
for msg in messages:
901+
print(msg.type, msg.message)
902+
903+
Read with pagination::
904+
905+
page = get_session_messages(
906+
session_id, limit=10, offset=20
907+
)
908+
"""
909+
if not _validate_uuid(session_id):
910+
return []
911+
912+
content = _read_session_file(session_id, directory)
913+
if not content:
914+
return []
915+
916+
entries = _parse_transcript_entries(content)
917+
chain = _build_conversation_chain(entries)
918+
visible = [e for e in chain if _is_visible_message(e)]
919+
messages = [_to_session_message(e) for e in visible]
920+
921+
# Apply offset and limit
922+
if limit is not None and limit > 0:
923+
return messages[offset : offset + limit]
924+
if offset > 0:
925+
return messages[offset:]
926+
return messages

src/claude_agent_sdk/types.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -730,6 +730,30 @@ class SDKSessionInfo:
730730
cwd: str | None = None
731731

732732

733+
@dataclass
734+
class SessionMessage:
735+
"""A user or assistant message from a session transcript.
736+
737+
Returned by ``get_session_messages()`` for reading historical session
738+
data. Fields match the SDK wire protocol types (SDKUserMessage /
739+
SDKAssistantMessage).
740+
741+
Attributes:
742+
type: Message type — ``"user"`` or ``"assistant"``.
743+
uuid: Unique message identifier.
744+
session_id: ID of the session this message belongs to.
745+
message: Raw Anthropic API message dict (role, content, etc.).
746+
parent_tool_use_id: Always ``None`` for top-level conversation
747+
messages (tool-use sidechain messages are filtered out).
748+
"""
749+
750+
type: Literal["user", "assistant"]
751+
uuid: str
752+
session_id: str
753+
message: Any
754+
parent_tool_use_id: None = None
755+
756+
733757
class ThinkingConfigAdaptive(TypedDict):
734758
type: Literal["adaptive"]
735759

0 commit comments

Comments
 (0)