Skip to content

Commit 9df479d

Browse files
simonwclaude
andcommitted
Extract repo from session metadata instead of fetching each session
Avoids N+1 API calls by using session_context.outcomes.git_info.repo or parsing session_context.sources URL from the sessions list response. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent eacdef9 commit 9df479d

File tree

2 files changed

+101
-44
lines changed

2 files changed

+101
-44
lines changed

src/claude_code_transcripts/__init__.py

Lines changed: 42 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -619,30 +619,56 @@ def detect_github_repo(loglines):
619619
return None
620620

621621

622-
def enrich_sessions_with_repos(sessions, token, org_uuid, fetch_fn=None):
623-
"""Enrich sessions with repo information by fetching each session's details.
622+
def extract_repo_from_session(session):
623+
"""Extract GitHub repo from session metadata.
624+
625+
Looks in session_context.outcomes for git_info.repo,
626+
or parses from session_context.sources URL.
627+
628+
Returns repo as "owner/name" or None.
629+
"""
630+
context = session.get("session_context", {})
631+
632+
# Try outcomes first (has clean repo format)
633+
outcomes = context.get("outcomes", [])
634+
for outcome in outcomes:
635+
if outcome.get("type") == "git_repository":
636+
git_info = outcome.get("git_info", {})
637+
repo = git_info.get("repo")
638+
if repo:
639+
return repo
640+
641+
# Fall back to sources URL
642+
sources = context.get("sources", [])
643+
for source in sources:
644+
if source.get("type") == "git_repository":
645+
url = source.get("url", "")
646+
# Parse github.com/owner/repo from URL
647+
if "github.com/" in url:
648+
# Extract owner/repo from https://github.com/owner/repo
649+
match = re.search(r"github\.com/([^/]+/[^/]+?)(?:\.git)?$", url)
650+
if match:
651+
return match.group(1)
652+
653+
return None
654+
655+
656+
def enrich_sessions_with_repos(sessions, token=None, org_uuid=None, fetch_fn=None):
657+
"""Enrich sessions with repo information from session metadata.
624658
625659
Args:
626660
sessions: List of session dicts from the API
627-
token: API access token
628-
org_uuid: Organization UUID
629-
fetch_fn: Optional function to fetch session data (for testing)
661+
token: Unused (kept for backward compatibility)
662+
org_uuid: Unused (kept for backward compatibility)
663+
fetch_fn: Unused (kept for backward compatibility)
630664
631665
Returns:
632666
List of session dicts with 'repo' key added
633667
"""
634-
if fetch_fn is None:
635-
fetch_fn = fetch_session
636-
637668
enriched = []
638669
for session in sessions:
639670
session_copy = dict(session)
640-
try:
641-
session_data = fetch_fn(token, org_uuid, session["id"])
642-
loglines = session_data.get("loglines", [])
643-
session_copy["repo"] = detect_github_repo(loglines)
644-
except Exception:
645-
session_copy["repo"] = None
671+
session_copy["repo"] = extract_repo_from_session(session)
646672
enriched.append(session_copy)
647673
return enriched
648674

@@ -1992,9 +2018,8 @@ def web_cmd(
19922018
if not sessions:
19932019
raise click.ClickException("No sessions found.")
19942020

1995-
# Enrich sessions with repo information
1996-
click.echo("Fetching session details to detect repos...")
1997-
sessions = enrich_sessions_with_repos(sessions, token, org_uuid)
2021+
# Enrich sessions with repo information (extracted from session metadata)
2022+
sessions = enrich_sessions_with_repos(sessions)
19982023

19992024
# Filter by repo if specified
20002025
if repo:

tests/test_all.py

Lines changed: 59 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -570,44 +570,76 @@ def test_detect_github_repo_returns_none_when_not_found(self):
570570
assert repo is None
571571

572572
def test_enrich_sessions_with_repos(self):
573-
"""Test enriching sessions with repo information."""
573+
"""Test enriching sessions with repo information from session metadata."""
574574
from claude_code_transcripts import enrich_sessions_with_repos
575575

576-
# Mock sessions from the API list
576+
# Mock sessions from the API list with session_context
577577
sessions = [
578-
{"id": "sess1", "title": "Session 1", "created_at": "2025-01-01T10:00:00Z"},
579-
{"id": "sess2", "title": "Session 2", "created_at": "2025-01-02T10:00:00Z"},
580-
]
581-
582-
# Mock fetch function that returns session data with loglines
583-
def mock_fetch(token, org_uuid, session_id):
584-
if session_id == "sess1":
585-
return {
586-
"loglines": [
578+
{
579+
"id": "sess1",
580+
"title": "Session 1",
581+
"created_at": "2025-01-01T10:00:00Z",
582+
"session_context": {
583+
"outcomes": [
587584
{
588-
"type": "assistant",
589-
"message": {
590-
"role": "assistant",
591-
"content": [
592-
{
593-
"type": "tool_result",
594-
"content": "https://github.com/simonw/datasette/pull/new/branch",
595-
}
596-
],
597-
},
585+
"type": "git_repository",
586+
"git_info": {"repo": "simonw/datasette", "type": "github"},
598587
}
599588
]
600-
}
601-
else:
602-
return {"loglines": []}
589+
},
590+
},
591+
{
592+
"id": "sess2",
593+
"title": "Session 2",
594+
"created_at": "2025-01-02T10:00:00Z",
595+
"session_context": {},
596+
},
597+
]
603598

604-
enriched = enrich_sessions_with_repos(
605-
sessions, "token", "org", fetch_fn=mock_fetch
606-
)
599+
enriched = enrich_sessions_with_repos(sessions)
607600

608601
assert enriched[0]["repo"] == "simonw/datasette"
609602
assert enriched[1]["repo"] is None
610603

604+
def test_extract_repo_from_session_outcomes(self):
605+
"""Test extracting repo from session_context.outcomes."""
606+
from claude_code_transcripts import extract_repo_from_session
607+
608+
session = {
609+
"session_context": {
610+
"outcomes": [
611+
{
612+
"type": "git_repository",
613+
"git_info": {"repo": "simonw/llm", "type": "github"},
614+
}
615+
]
616+
}
617+
}
618+
assert extract_repo_from_session(session) == "simonw/llm"
619+
620+
def test_extract_repo_from_session_sources_url(self):
621+
"""Test extracting repo from session_context.sources URL."""
622+
from claude_code_transcripts import extract_repo_from_session
623+
624+
session = {
625+
"session_context": {
626+
"sources": [
627+
{
628+
"type": "git_repository",
629+
"url": "https://github.com/simonw/datasette",
630+
}
631+
]
632+
}
633+
}
634+
assert extract_repo_from_session(session) == "simonw/datasette"
635+
636+
def test_extract_repo_from_session_no_context(self):
637+
"""Test extracting repo when no session_context exists."""
638+
from claude_code_transcripts import extract_repo_from_session
639+
640+
session = {"id": "sess1", "title": "No context"}
641+
assert extract_repo_from_session(session) is None
642+
611643
def test_filter_sessions_by_repo(self):
612644
"""Test filtering sessions by repo."""
613645
from claude_code_transcripts import filter_sessions_by_repo

0 commit comments

Comments
 (0)