diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..cd9067a --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,43 @@ +# Claude Code Quick Reference -- chat-sdk-python + +## What is this? +Python port of Vercel Chat SDK. Multi-platform async chat framework. + +## Key Commands +- `uv sync --group dev` -- install dependencies +- `uv run pytest tests/ -q` -- run tests +- `uv run ruff check src/` -- lint +- `uv run ruff format src/` -- format + +## Architecture +- `src/chat_sdk/chat.py` -- Main Chat orchestrator (handlers, routing, concurrency) +- `src/chat_sdk/thread.py` -- Thread (streaming, pagination, subscriptions) +- `src/chat_sdk/channel.py` -- Channel (thread listing, metadata) +- `src/chat_sdk/types.py` -- All types (Message, Author, Adapter protocol) +- `src/chat_sdk/adapters/` -- 8 platform adapters +- `src/chat_sdk/shared/` -- Markdown parser, format converter, streaming renderer +- `src/chat_sdk/state/` -- Memory, Redis, Postgres backends +- `tests/` -- 2,477+ tests + +## Critical Rules +1. **Never use `datetime.utcnow()`** -- use `datetime.now(tz=timezone.utc)` +2. **Never use `asyncio.ensure_future`** -- use `asyncio.get_running_loop().create_task()` +3. **Never pass raw dicts to `self._chat.process_*`** -- use typed dataclasses (ActionEvent, ReactionEvent, etc.) +4. **Never use camelCase keys in dispatch dicts** -- always snake_case +5. **Never use `random.choices` for security tokens** -- use `secrets.token_hex` +6. **Never import optional deps at module level** -- lazy import inside functions +7. **Always use `hmac.compare_digest` for signature verification** -- never `==` +8. **Always use `is not None` for empty-string-valid fields** -- never `or` +9. **Always validate external URLs before HTTP requests** (SSRF prevention) +10. **Always check `extend_lock` return value** in processing loops + +## Adding a New Adapter +See docs/ARCHITECTURE.md and CONTRIBUTING.md. + +## Upstream Sync +See docs/UPSTREAM_SYNC.md for TS->Python translation patterns. + +## Known Limitations +- Markdown parser handles common cases but is not full CommonMark +- StreamingMarkdownRenderer's _remend is simplified vs the npm `remend` library +- No setext headings, no footnotes, no HTML nodes in the parser diff --git a/src/chat_sdk/shared/base_format_converter.py b/src/chat_sdk/shared/base_format_converter.py index 6863d26..31f3805 100644 --- a/src/chat_sdk/shared/base_format_converter.py +++ b/src/chat_sdk/shared/base_format_converter.py @@ -182,8 +182,16 @@ def render_postable(self, message: PostableMessageInput) -> str: if "ast" in message: return self.from_ast(message["ast"]) if "card" in message: - return message.get("fallback_text") or message.get("fallbackText") or "" + from chat_sdk.cards import card_to_fallback_text + + return card_to_fallback_text(message["card"]) if message.get("type") == "card": + from chat_sdk.cards import is_card_element + + if is_card_element(message): + from chat_sdk.cards import card_to_fallback_text + + return card_to_fallback_text(message) return "" return str(message) @@ -195,9 +203,8 @@ def render_postable(self, message: PostableMessageInput) -> str: if hasattr(message, "ast"): return self.from_ast(message.ast) if hasattr(message, "card"): - fallback = getattr(message, "fallback_text", None) - if fallback: - return fallback - return "" + from chat_sdk.cards import card_to_fallback_text + + return card_to_fallback_text(message.card) return str(message) diff --git a/src/chat_sdk/shared/markdown_parser.py b/src/chat_sdk/shared/markdown_parser.py index 66a8ce7..4cf515e 100644 --- a/src/chat_sdk/shared/markdown_parser.py +++ b/src/chat_sdk/shared/markdown_parser.py @@ -272,7 +272,7 @@ def _parse_inline(text: str) -> list[Content]: # Patterns used by the block parser _HEADING_RE = re.compile(r"^(#{1,6})\s+(.*)") -_THEMATIC_BREAK_RE = re.compile(r"^(\*{3,}|-{3,}|_{3,})\s*$") +_THEMATIC_BREAK_RE = re.compile(r"^([-*_]\s*){3,}\s*$") _FENCED_CODE_START_RE = re.compile(r"^(`{3,}|~{3,})(.*)") _BLOCKQUOTE_RE = re.compile(r"^>\s?(.*)") _ORDERED_LIST_RE = re.compile(r"^(\d+)[.)]\s+(.*)") @@ -452,7 +452,7 @@ def parse_markdown(text: str) -> Root: heading_match = _HEADING_RE.match(line) if heading_match: depth = len(heading_match.group(1)) - heading_text = heading_match.group(2).strip() + heading_text = heading_match.group(2).rstrip().rstrip("#").rstrip() children.append(make_heading(depth, _parse_inline(heading_text))) i += 1 continue diff --git a/src/chat_sdk/shared/streaming_markdown.py b/src/chat_sdk/shared/streaming_markdown.py index 4c9b63c..4a37fa6 100644 --- a/src/chat_sdk/shared/streaming_markdown.py +++ b/src/chat_sdk/shared/streaming_markdown.py @@ -32,75 +32,115 @@ # has unclosed constructs). +def _strip_fenced_code(text: str) -> str: + """Return *text* with content between code fences replaced by empty lines. + + This allows inline-marker counting to ignore literal characters inside + fenced code blocks (e.g. ``*`` inside a code block is not an unclosed + italic marker). + """ + lines = text.split("\n") + result_lines: list[str] = [] + in_fence = False + for line in lines: + stripped = line.lstrip() + if stripped.startswith("```") or stripped.startswith("~~~"): + in_fence = not in_fence + result_lines.append("") # replace fence line itself + elif in_fence: + result_lines.append("") # replace content inside fence + else: + result_lines.append(line) + return "\n".join(result_lines) + + def _remend(text: str) -> str: - """Close unclosed inline markdown constructs. + """Repair incomplete markdown by closing unclosed inline markers. This is a simplified Python equivalent of the ``remend`` npm package. - It scans for unclosed ``**``, ``*``, ``~~``, `` ` ``, and ``[`` and - appends the matching closers. + Fixes issues in the previous implementation: + - Dead code around ``star_count2`` (removed) + - ``~~`` counting confused by ``~~~`` code fences (handled by stripping) + - Missing ``__`` / ``_`` (underscore bold/italic) handling (added) + - Markers inside code blocks no longer counted as inline markers + + Strategy: count *unescaped* characters for ``*`` and ``_`` (parity-based) + outside code fences and code spans. Count ``~~`` substrings for + strikethrough. Count backtick characters for inline code. """ result = text - # --- code spans (backtick) --- - # Simple heuristic: if the total number of backtick characters is odd, - # there must be an unclosed code span -- close it with one backtick. - # This is idempotent: after closing, the count becomes even and no - # further modification is needed. - if result.count("`") % 2 != 0: + # --- code fences --- + # If inside an unclosed code fence, close it and return immediately. + in_code_fence = False + for line in result.split("\n"): + stripped = line.lstrip() + if stripped.startswith("```") or stripped.startswith("~~~"): + in_code_fence = not in_code_fence + + if in_code_fence: + result += "\n```" + return result + + # Strip fenced code blocks so their contents don't affect inline counts. + outside_fences = _strip_fenced_code(result) + + # --- inline code backticks --- + # Count total backtick characters outside code fences. If odd, one code + # span is unclosed -- append a single backtick. + backtick_count = outside_fences.count("`") + if backtick_count % 2 != 0: result += "`" - # --- bold / italic --- - # Count unescaped * sequences + # --- bold / italic (* based) --- + # Count total unescaped * characters outside code fences. If odd, append + # one to make even. This is idempotent: once the count is even, no + # further change occurs. star_count = 0 j = 0 - temp = result - while j < len(temp): - if temp[j] == "\\": + while j < len(outside_fences): + if outside_fences[j] == "\\": j += 2 continue - if temp[j] == "*": - run = 0 - while j < len(temp) and temp[j] == "*": - run += 1 - j += 1 - star_count += run - continue + if outside_fences[j] == "*": + star_count += 1 j += 1 if star_count % 2 != 0: result += "*" - # After fixing single, check for double - star_count2 = 0 - k = 0 - temp2 = result - while k < len(temp2): - if temp2[k] == "\\": - k += 2 - continue - if temp2[k] == "*": - run = 0 - while k < len(temp2) and temp2[k] == "*": - run += 1 - k += 1 - star_count2 += run + + # --- bold / italic (_ based) --- + # Same parity approach for underscore markers. + under_count = 0 + j = 0 + while j < len(outside_fences): + if outside_fences[j] == "\\": + j += 2 continue - k += 1 + if outside_fences[j] == "_": + under_count += 1 + j += 1 + + if under_count % 2 != 0: + result += "_" - # --- strikethrough ~~ --- - tilde_pairs = result.count("~~") + # --- strikethrough ~~ --- + # Count non-overlapping ``~~`` substrings outside code fences. If odd, + # one strikethrough is unclosed -- append ``~~``. + tilde_pairs = outside_fences.count("~~") if tilde_pairs % 2 != 0: result += "~~" # --- links [text](url) --- open_brackets = 0 m = 0 - while m < len(result): - if result[m] == "\\": + while m < len(outside_fences): + if outside_fences[m] == "\\": m += 2 continue - if result[m] == "[": + if outside_fences[m] == "[": open_brackets += 1 - elif result[m] == "]": + elif outside_fences[m] == "]": open_brackets -= 1 m += 1 if open_brackets > 0: