Skip to content

Commit 620506f

Browse files
Merge pull request #12 from Chinchill-AI/fix/markdown-streaming-audit-fixes
fix: Repair _remend, card fallback, parser thematic breaks and headings
2 parents 3c727ec + ab35767 commit 620506f

4 files changed

Lines changed: 139 additions & 49 deletions

File tree

CLAUDE.md

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# Claude Code Quick Reference -- chat-sdk-python
2+
3+
## What is this?
4+
Python port of Vercel Chat SDK. Multi-platform async chat framework.
5+
6+
## Key Commands
7+
- `uv sync --group dev` -- install dependencies
8+
- `uv run pytest tests/ -q` -- run tests
9+
- `uv run ruff check src/` -- lint
10+
- `uv run ruff format src/` -- format
11+
12+
## Architecture
13+
- `src/chat_sdk/chat.py` -- Main Chat orchestrator (handlers, routing, concurrency)
14+
- `src/chat_sdk/thread.py` -- Thread (streaming, pagination, subscriptions)
15+
- `src/chat_sdk/channel.py` -- Channel (thread listing, metadata)
16+
- `src/chat_sdk/types.py` -- All types (Message, Author, Adapter protocol)
17+
- `src/chat_sdk/adapters/` -- 8 platform adapters
18+
- `src/chat_sdk/shared/` -- Markdown parser, format converter, streaming renderer
19+
- `src/chat_sdk/state/` -- Memory, Redis, Postgres backends
20+
- `tests/` -- 2,477+ tests
21+
22+
## Critical Rules
23+
1. **Never use `datetime.utcnow()`** -- use `datetime.now(tz=timezone.utc)`
24+
2. **Never use `asyncio.ensure_future`** -- use `asyncio.get_running_loop().create_task()`
25+
3. **Never pass raw dicts to `self._chat.process_*`** -- use typed dataclasses (ActionEvent, ReactionEvent, etc.)
26+
4. **Never use camelCase keys in dispatch dicts** -- always snake_case
27+
5. **Never use `random.choices` for security tokens** -- use `secrets.token_hex`
28+
6. **Never import optional deps at module level** -- lazy import inside functions
29+
7. **Always use `hmac.compare_digest` for signature verification** -- never `==`
30+
8. **Always use `is not None` for empty-string-valid fields** -- never `or`
31+
9. **Always validate external URLs before HTTP requests** (SSRF prevention)
32+
10. **Always check `extend_lock` return value** in processing loops
33+
34+
## Adding a New Adapter
35+
See docs/ARCHITECTURE.md and CONTRIBUTING.md.
36+
37+
## Upstream Sync
38+
See docs/UPSTREAM_SYNC.md for TS->Python translation patterns.
39+
40+
## Known Limitations
41+
- Markdown parser handles common cases but is not full CommonMark
42+
- StreamingMarkdownRenderer's _remend is simplified vs the npm `remend` library
43+
- No setext headings, no footnotes, no HTML nodes in the parser

src/chat_sdk/shared/base_format_converter.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -182,8 +182,16 @@ def render_postable(self, message: PostableMessageInput) -> str:
182182
if "ast" in message:
183183
return self.from_ast(message["ast"])
184184
if "card" in message:
185-
return message.get("fallback_text") or message.get("fallbackText") or ""
185+
from chat_sdk.cards import card_to_fallback_text
186+
187+
return card_to_fallback_text(message["card"])
186188
if message.get("type") == "card":
189+
from chat_sdk.cards import is_card_element
190+
191+
if is_card_element(message):
192+
from chat_sdk.cards import card_to_fallback_text
193+
194+
return card_to_fallback_text(message)
187195
return ""
188196
return str(message)
189197

@@ -195,9 +203,8 @@ def render_postable(self, message: PostableMessageInput) -> str:
195203
if hasattr(message, "ast"):
196204
return self.from_ast(message.ast)
197205
if hasattr(message, "card"):
198-
fallback = getattr(message, "fallback_text", None)
199-
if fallback:
200-
return fallback
201-
return ""
206+
from chat_sdk.cards import card_to_fallback_text
207+
208+
return card_to_fallback_text(message.card)
202209

203210
return str(message)

src/chat_sdk/shared/markdown_parser.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ def _parse_inline(text: str) -> list[Content]:
272272

273273
# Patterns used by the block parser
274274
_HEADING_RE = re.compile(r"^(#{1,6})\s+(.*)")
275-
_THEMATIC_BREAK_RE = re.compile(r"^(\*{3,}|-{3,}|_{3,})\s*$")
275+
_THEMATIC_BREAK_RE = re.compile(r"^([-*_]\s*){3,}\s*$")
276276
_FENCED_CODE_START_RE = re.compile(r"^(`{3,}|~{3,})(.*)")
277277
_BLOCKQUOTE_RE = re.compile(r"^>\s?(.*)")
278278
_ORDERED_LIST_RE = re.compile(r"^(\d+)[.)]\s+(.*)")
@@ -452,7 +452,7 @@ def parse_markdown(text: str) -> Root:
452452
heading_match = _HEADING_RE.match(line)
453453
if heading_match:
454454
depth = len(heading_match.group(1))
455-
heading_text = heading_match.group(2).strip()
455+
heading_text = heading_match.group(2).rstrip().rstrip("#").rstrip()
456456
children.append(make_heading(depth, _parse_inline(heading_text)))
457457
i += 1
458458
continue

src/chat_sdk/shared/streaming_markdown.py

Lines changed: 82 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -32,75 +32,115 @@
3232
# has unclosed constructs).
3333

3434

35+
def _strip_fenced_code(text: str) -> str:
36+
"""Return *text* with content between code fences replaced by empty lines.
37+
38+
This allows inline-marker counting to ignore literal characters inside
39+
fenced code blocks (e.g. ``*`` inside a code block is not an unclosed
40+
italic marker).
41+
"""
42+
lines = text.split("\n")
43+
result_lines: list[str] = []
44+
in_fence = False
45+
for line in lines:
46+
stripped = line.lstrip()
47+
if stripped.startswith("```") or stripped.startswith("~~~"):
48+
in_fence = not in_fence
49+
result_lines.append("") # replace fence line itself
50+
elif in_fence:
51+
result_lines.append("") # replace content inside fence
52+
else:
53+
result_lines.append(line)
54+
return "\n".join(result_lines)
55+
56+
3557
def _remend(text: str) -> str:
36-
"""Close unclosed inline markdown constructs.
58+
"""Repair incomplete markdown by closing unclosed inline markers.
3759
3860
This is a simplified Python equivalent of the ``remend`` npm package.
39-
It scans for unclosed ``**``, ``*``, ``~~``, `` ` ``, and ``[`` and
40-
appends the matching closers.
61+
Fixes issues in the previous implementation:
62+
- Dead code around ``star_count2`` (removed)
63+
- ``~~`` counting confused by ``~~~`` code fences (handled by stripping)
64+
- Missing ``__`` / ``_`` (underscore bold/italic) handling (added)
65+
- Markers inside code blocks no longer counted as inline markers
66+
67+
Strategy: count *unescaped* characters for ``*`` and ``_`` (parity-based)
68+
outside code fences and code spans. Count ``~~`` substrings for
69+
strikethrough. Count backtick characters for inline code.
4170
"""
4271
result = text
4372

44-
# --- code spans (backtick) ---
45-
# Simple heuristic: if the total number of backtick characters is odd,
46-
# there must be an unclosed code span -- close it with one backtick.
47-
# This is idempotent: after closing, the count becomes even and no
48-
# further modification is needed.
49-
if result.count("`") % 2 != 0:
73+
# --- code fences ---
74+
# If inside an unclosed code fence, close it and return immediately.
75+
in_code_fence = False
76+
for line in result.split("\n"):
77+
stripped = line.lstrip()
78+
if stripped.startswith("```") or stripped.startswith("~~~"):
79+
in_code_fence = not in_code_fence
80+
81+
if in_code_fence:
82+
result += "\n```"
83+
return result
84+
85+
# Strip fenced code blocks so their contents don't affect inline counts.
86+
outside_fences = _strip_fenced_code(result)
87+
88+
# --- inline code backticks ---
89+
# Count total backtick characters outside code fences. If odd, one code
90+
# span is unclosed -- append a single backtick.
91+
backtick_count = outside_fences.count("`")
92+
if backtick_count % 2 != 0:
5093
result += "`"
5194

52-
# --- bold / italic ---
53-
# Count unescaped * sequences
95+
# --- bold / italic (* based) ---
96+
# Count total unescaped * characters outside code fences. If odd, append
97+
# one to make even. This is idempotent: once the count is even, no
98+
# further change occurs.
5499
star_count = 0
55100
j = 0
56-
temp = result
57-
while j < len(temp):
58-
if temp[j] == "\\":
101+
while j < len(outside_fences):
102+
if outside_fences[j] == "\\":
59103
j += 2
60104
continue
61-
if temp[j] == "*":
62-
run = 0
63-
while j < len(temp) and temp[j] == "*":
64-
run += 1
65-
j += 1
66-
star_count += run
67-
continue
105+
if outside_fences[j] == "*":
106+
star_count += 1
68107
j += 1
69108

70109
if star_count % 2 != 0:
71110
result += "*"
72-
# After fixing single, check for double
73-
star_count2 = 0
74-
k = 0
75-
temp2 = result
76-
while k < len(temp2):
77-
if temp2[k] == "\\":
78-
k += 2
79-
continue
80-
if temp2[k] == "*":
81-
run = 0
82-
while k < len(temp2) and temp2[k] == "*":
83-
run += 1
84-
k += 1
85-
star_count2 += run
111+
112+
# --- bold / italic (_ based) ---
113+
# Same parity approach for underscore markers.
114+
under_count = 0
115+
j = 0
116+
while j < len(outside_fences):
117+
if outside_fences[j] == "\\":
118+
j += 2
86119
continue
87-
k += 1
120+
if outside_fences[j] == "_":
121+
under_count += 1
122+
j += 1
123+
124+
if under_count % 2 != 0:
125+
result += "_"
88126

89-
# --- strikethrough ~~ ---
90-
tilde_pairs = result.count("~~")
127+
# --- strikethrough ~~ ---
128+
# Count non-overlapping ``~~`` substrings outside code fences. If odd,
129+
# one strikethrough is unclosed -- append ``~~``.
130+
tilde_pairs = outside_fences.count("~~")
91131
if tilde_pairs % 2 != 0:
92132
result += "~~"
93133

94134
# --- links [text](url) ---
95135
open_brackets = 0
96136
m = 0
97-
while m < len(result):
98-
if result[m] == "\\":
137+
while m < len(outside_fences):
138+
if outside_fences[m] == "\\":
99139
m += 2
100140
continue
101-
if result[m] == "[":
141+
if outside_fences[m] == "[":
102142
open_brackets += 1
103-
elif result[m] == "]":
143+
elif outside_fences[m] == "]":
104144
open_brackets -= 1
105145
m += 1
106146
if open_brackets > 0:

0 commit comments

Comments
 (0)