|
| 1 | +from __future__ import annotations |
| 2 | + |
| 3 | +import json |
| 4 | +import logging |
| 5 | +import os |
| 6 | +import time |
| 7 | +from collections import Counter |
| 8 | +from dataclasses import dataclass |
| 9 | +from datetime import UTC, datetime, timedelta |
| 10 | +from pathlib import Path |
| 11 | +from typing import Any |
| 12 | + |
| 13 | +from project_resolver import resolve_project_name |
| 14 | + |
| 15 | +logger = logging.getLogger(__name__) |
| 16 | + |
| 17 | +CLAUDE_PROJECTS_DIR = Path(os.path.expanduser("~/.claude/projects")) |
| 18 | +_CACHE_TTL_SECONDS = 300.0 |
| 19 | +_cache: tuple[float, int, PersonaProfile] | None = None |
| 20 | + |
| 21 | + |
| 22 | +@dataclass(slots=True) |
| 23 | +class PersonaProfile: |
| 24 | + hour_histogram: list[int] |
| 25 | + top_projects: list[tuple[str, int]] |
| 26 | + recent_titles: list[str] |
| 27 | + total_sessions: int |
| 28 | + total_messages: int |
| 29 | + |
| 30 | + |
| 31 | +@dataclass(slots=True) |
| 32 | +class _MetadataLine: |
| 33 | + type: str |
| 34 | + timestamp: datetime | None |
| 35 | + session_id: str |
| 36 | + cwd: str |
| 37 | + title: str |
| 38 | + |
| 39 | + |
| 40 | +def load_profile(days_back: int = 30) -> PersonaProfile: |
| 41 | + global _cache |
| 42 | + |
| 43 | + now = time.time() |
| 44 | + if _cache is not None: |
| 45 | + cached_at, cached_days_back, cached_profile = _cache |
| 46 | + if cached_days_back == days_back and now - cached_at < _CACHE_TTL_SECONDS: |
| 47 | + return cached_profile |
| 48 | + |
| 49 | + profile = _load_profile_uncached(days_back) |
| 50 | + _cache = (now, days_back, profile) |
| 51 | + return profile |
| 52 | + |
| 53 | + |
| 54 | +def _reset_cache() -> None: |
| 55 | + global _cache |
| 56 | + _cache = None |
| 57 | + |
| 58 | + |
| 59 | +def _load_profile_uncached(days_back: int) -> PersonaProfile: |
| 60 | + histogram = [0] * 24 |
| 61 | + sessions_by_project: dict[str, set[str]] = {} |
| 62 | + message_sessions: set[str] = set() |
| 63 | + session_last_message_at: dict[str, datetime] = {} |
| 64 | + titles_by_session: dict[str, str] = {} |
| 65 | + total_messages = 0 |
| 66 | + |
| 67 | + cutoff = datetime.now(UTC) - timedelta(days=max(0, days_back)) |
| 68 | + cutoff_ts = cutoff.timestamp() |
| 69 | + |
| 70 | + if not CLAUDE_PROJECTS_DIR.is_dir(): |
| 71 | + return _empty_profile() |
| 72 | + |
| 73 | + for jsonl_path in CLAUDE_PROJECTS_DIR.rglob("*.jsonl"): |
| 74 | + try: |
| 75 | + if jsonl_path.stat().st_mtime < cutoff_ts: |
| 76 | + continue |
| 77 | + except OSError as exc: |
| 78 | + logger.warning("failed to stat Claude project log %s: %s", jsonl_path, exc) |
| 79 | + continue |
| 80 | + |
| 81 | + fallback_project = _project_from_path(jsonl_path) |
| 82 | + try: |
| 83 | + with jsonl_path.open(encoding="utf-8", errors="replace") as file: |
| 84 | + for line in file: |
| 85 | + parsed = _parse_metadata_line(line) |
| 86 | + if parsed is None: |
| 87 | + continue |
| 88 | + |
| 89 | + session_id = parsed.session_id |
| 90 | + if session_id: |
| 91 | + title = parsed.title.strip() |
| 92 | + if parsed.type == "ai-title" and title: |
| 93 | + titles_by_session[session_id] = title |
| 94 | + |
| 95 | + timestamp = parsed.timestamp |
| 96 | + if timestamp is None or timestamp < cutoff: |
| 97 | + continue |
| 98 | + |
| 99 | + is_message = parsed.type in {"user", "assistant"} |
| 100 | + if is_message: |
| 101 | + histogram[timestamp.astimezone().hour] += 1 |
| 102 | + total_messages += 1 |
| 103 | + |
| 104 | + if session_id and is_message: |
| 105 | + project = _project_from_cwd(parsed.cwd) or fallback_project |
| 106 | + sessions_by_project.setdefault(project, set()).add(session_id) |
| 107 | + message_sessions.add(session_id) |
| 108 | + current_last = session_last_message_at.get(session_id) |
| 109 | + if current_last is None or timestamp > current_last: |
| 110 | + session_last_message_at[session_id] = timestamp |
| 111 | + except OSError as exc: |
| 112 | + logger.warning("failed to read Claude project log %s: %s", jsonl_path, exc) |
| 113 | + |
| 114 | + project_counts = Counter( |
| 115 | + {project: len(session_ids) for project, session_ids in sessions_by_project.items()} |
| 116 | + ) |
| 117 | + top_projects = sorted(project_counts.items(), key=lambda item: (-item[1], item[0]))[:5] |
| 118 | + recent_titles = _recent_unique_titles(titles_by_session, session_last_message_at) |
| 119 | + |
| 120 | + return PersonaProfile( |
| 121 | + hour_histogram=histogram, |
| 122 | + top_projects=top_projects, |
| 123 | + recent_titles=recent_titles, |
| 124 | + total_sessions=len(message_sessions), |
| 125 | + total_messages=total_messages, |
| 126 | + ) |
| 127 | + |
| 128 | + |
| 129 | +def _empty_profile() -> PersonaProfile: |
| 130 | + return PersonaProfile( |
| 131 | + hour_histogram=[0] * 24, |
| 132 | + top_projects=[], |
| 133 | + recent_titles=[], |
| 134 | + total_sessions=0, |
| 135 | + total_messages=0, |
| 136 | + ) |
| 137 | + |
| 138 | + |
| 139 | +def _parse_metadata_line(line: str) -> _MetadataLine | None: |
| 140 | + try: |
| 141 | + data = json.loads(line) |
| 142 | + except json.JSONDecodeError: |
| 143 | + return None |
| 144 | + |
| 145 | + if not isinstance(data, dict): |
| 146 | + return None |
| 147 | + |
| 148 | + return _MetadataLine( |
| 149 | + type=_as_str(data.get("type")), |
| 150 | + timestamp=_parse_timestamp(data.get("timestamp")), |
| 151 | + session_id=_as_str(data.get("sessionId") or data.get("session_id")), |
| 152 | + cwd=_as_str(data.get("cwd")), |
| 153 | + title=_as_str(data.get("aiTitle")), |
| 154 | + ) |
| 155 | + |
| 156 | + |
| 157 | +def _parse_timestamp(value: Any) -> datetime | None: |
| 158 | + if not isinstance(value, str) or not value: |
| 159 | + return None |
| 160 | + try: |
| 161 | + timestamp = datetime.fromisoformat(value.replace("Z", "+00:00")) |
| 162 | + except ValueError: |
| 163 | + return None |
| 164 | + if timestamp.tzinfo is None: |
| 165 | + return timestamp.replace(tzinfo=UTC) |
| 166 | + return timestamp.astimezone(UTC) |
| 167 | + |
| 168 | + |
| 169 | +def _project_from_cwd(cwd: str) -> str: |
| 170 | + if not cwd: |
| 171 | + return "" |
| 172 | + return resolve_project_name(cwd) |
| 173 | + |
| 174 | + |
| 175 | +def _project_from_path(jsonl_path: Path) -> str: |
| 176 | + try: |
| 177 | + project_dir = jsonl_path.relative_to(CLAUDE_PROJECTS_DIR).parts[0] |
| 178 | + except (IndexError, ValueError): |
| 179 | + return "unknown" |
| 180 | + |
| 181 | + parts = [part for part in project_dir.split("-") if part] |
| 182 | + if not parts: |
| 183 | + return "unknown" |
| 184 | + |
| 185 | + slash_candidate = Path(os.sep, *parts) |
| 186 | + if slash_candidate.is_dir(): |
| 187 | + return slash_candidate.name or "unknown" |
| 188 | + |
| 189 | + existing_project = _existing_encoded_project_path(parts) |
| 190 | + if existing_project is not None: |
| 191 | + return existing_project.name or "unknown" |
| 192 | + |
| 193 | + fallback = project_dir.removeprefix("-") |
| 194 | + return fallback or "unknown" |
| 195 | + |
| 196 | + |
| 197 | +def _existing_encoded_project_path(parts: list[str]) -> Path | None: |
| 198 | + def search(index: int, current: Path) -> Path | None: |
| 199 | + for end in range(index + 1, len(parts) + 1): |
| 200 | + candidate = current / "-".join(parts[index:end]) |
| 201 | + if not candidate.is_dir(): |
| 202 | + continue |
| 203 | + if end == len(parts): |
| 204 | + return candidate |
| 205 | + result = search(end, candidate) |
| 206 | + if result is not None: |
| 207 | + return result |
| 208 | + return None |
| 209 | + |
| 210 | + return search(0, Path(os.sep)) |
| 211 | + |
| 212 | + |
| 213 | +def _recent_unique_titles( |
| 214 | + titles_by_session: dict[str, str], |
| 215 | + session_last_message_at: dict[str, datetime], |
| 216 | +) -> list[str]: |
| 217 | + titles: list[str] = [] |
| 218 | + seen: set[str] = set() |
| 219 | + ordered_sessions = sorted( |
| 220 | + titles_by_session, |
| 221 | + key=lambda session_id: session_last_message_at.get( |
| 222 | + session_id, |
| 223 | + datetime.min.replace(tzinfo=UTC), |
| 224 | + ), |
| 225 | + reverse=True, |
| 226 | + ) |
| 227 | + for session_id in ordered_sessions: |
| 228 | + if session_id not in session_last_message_at: |
| 229 | + continue |
| 230 | + title = titles_by_session[session_id] |
| 231 | + normalized = title.strip() |
| 232 | + if not normalized or normalized in seen: |
| 233 | + continue |
| 234 | + seen.add(normalized) |
| 235 | + titles.append(normalized) |
| 236 | + if len(titles) >= 8: |
| 237 | + break |
| 238 | + return titles |
| 239 | + |
| 240 | + |
| 241 | +def _as_str(value: Any) -> str: |
| 242 | + return value if isinstance(value, str) else "" |
0 commit comments