Skip to content

Commit 290181a

Browse files
authored
Merge pull request #74 from ClaydeCode/feat/ntfy-utf8-title
feat(pebble): send ntfy title as native UTF-8 via RFC 2047
2 parents f444ca8 + 6b11376 commit 290181a

2 files changed

Lines changed: 127 additions & 50 deletions

File tree

src/clayde/webhook/notify.py

Lines changed: 21 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from __future__ import annotations
88

99
import logging
10+
from email.header import Header
1011

1112
import httpx
1213
from pydantic import BaseModel, field_validator
@@ -16,36 +17,30 @@
1617
log = logging.getLogger("clayde.webhook.notify")
1718

1819

19-
# ntfy header values are sent through httpx, which encodes headers as
20-
# latin-1. Anything outside that range raises UnicodeEncodeError before
21-
# the request goes out, so the user never sees the notification. We
22-
# normalise common typographic Unicode to ASCII and replace anything
23-
# left over with '?'.
24-
_UNICODE_TO_ASCII = str.maketrans({
25-
"—": "-", # em dash
26-
"–": "-", # en dash
27-
"−": "-", # minus sign
28-
"‘": "'", # left single quote
29-
"’": "'", # right single quote / apostrophe
30-
"“": '"', # left double quote
31-
"”": '"', # right double quote
32-
"…": "...", # ellipsis
33-
" ": " ", # non-breaking space
34-
})
20+
def _encode_header_value(text: str) -> str:
21+
"""Encode a header value safely for httpx.
3522
36-
37-
def _to_ascii(text: str) -> str:
38-
"""Coerce arbitrary text to safe ASCII for use in HTTP headers."""
39-
return text.translate(_UNICODE_TO_ASCII).encode("ascii", "replace").decode("ascii")
23+
httpx serialises header values as latin-1, so raw non-ASCII strings
24+
raise UnicodeEncodeError before the request leaves the process. ntfy
25+
accepts RFC 2047 encoded-words (``=?utf-8?b?<base64>?=``) and decodes
26+
them server-side, so we route non-ASCII through that. ASCII titles
27+
pass through verbatim — keeps log/trace output readable and avoids
28+
pointless wire overhead.
29+
"""
30+
try:
31+
text.encode("ascii")
32+
except UnicodeEncodeError:
33+
return Header(text, charset="utf-8").encode()
34+
return text
4035

4136

4237
class NotificationPayload(BaseModel):
4338
"""Outcome of a Pebble run, as emitted by Claude in the JSON tail.
4439
4540
Title is clamped to 40 chars and body to 300 chars at construction
46-
time so accidental over-long values never propagate to ntfy headers.
47-
Title is additionally coerced to ASCII because it travels as an HTTP
48-
header and httpx rejects non-latin-1 header values.
41+
time so accidental over-long values never propagate to ntfy. The
42+
title is stored as the raw Unicode string the user/Claude produced;
43+
RFC 2047 encoding for the actual HTTP header happens in ``send_ntfy``.
4944
"""
5045

5146
title: str
@@ -55,9 +50,7 @@ class NotificationPayload(BaseModel):
5550
@field_validator("title", mode="before")
5651
@classmethod
5752
def _clamp_title(cls, v):
58-
if not isinstance(v, str):
59-
return v
60-
return _to_ascii(v)[:40]
53+
return v[:40] if isinstance(v, str) else v
6154

6255
@field_validator("body", mode="before")
6356
@classmethod
@@ -77,13 +70,14 @@ async def send_ntfy(
7770
"""POST to ntfy.sh. Best-effort: errors are logged + OTel-annotated, never raised."""
7871
url = f"{base_url.rstrip('/')}/{topic}"
7972
headers = {
80-
"Title": title,
73+
"Title": _encode_header_value(title),
8174
"Priority": "3" if success else "5",
8275
"Tags": "white_check_mark" if success else "rotating_light",
8376
}
8477
tracer = get_tracer()
8578
with tracer.start_as_current_span("clayde.pebble.notify") as span:
8679
span.set_attribute("pebble.notify_topic", topic)
80+
# Span attribute holds the raw Unicode title for readable traces.
8781
span.set_attribute("pebble.notify_title", title)
8882
span.set_attribute("pebble.outcome_success", success)
8983
try:

tests/test_webhook_notify.py

Lines changed: 106 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,14 @@
22

33
from __future__ import annotations
44

5+
import base64
6+
import re
7+
58
import httpx
69
import pytest
710
import respx
811

9-
from clayde.webhook.notify import NotificationPayload, send_ntfy
12+
from clayde.webhook.notify import NotificationPayload, _encode_header_value, send_ntfy
1013

1114

1215
def test_notification_payload_clamps_length():
@@ -15,41 +18,57 @@ def test_notification_payload_clamps_length():
1518
assert len(p.body) == 300
1619

1720

21+
def test_notification_payload_clamps_length_with_unicode():
22+
# Character-count clamp, not byte-count — verify multibyte chars still
23+
# count as one position.
24+
p = NotificationPayload(title="ü" * 100, body="ß" * 1000, success=True)
25+
assert len(p.title) == 40
26+
assert len(p.body) == 300
27+
assert p.title == "ü" * 40
28+
29+
1830
def test_notification_payload_accepts_short():
1931
p = NotificationPayload(title="hi", body="all good", success=True)
2032
assert p.title == "hi"
2133
assert p.body == "all good"
2234
assert p.success is True
2335

2436

25-
def test_notification_payload_em_dash_in_title_normalised():
26-
# Real prod failure: em dash in title raised UnicodeEncodeError when
27-
# httpx serialised the header as latin-1.
28-
p = NotificationPayload(title="Thomas Stegger — plant prefs saved", body="ok", success=True)
29-
assert "—" not in p.title
30-
assert p.title == "Thomas Stegger - plant prefs saved"
31-
# Must round-trip cleanly through latin-1 (the header codec httpx uses).
32-
p.title.encode("latin-1")
37+
def test_notification_payload_preserves_unicode():
38+
# Raw Unicode is kept as-is; RFC 2047 encoding happens in send_ntfy.
39+
p = NotificationPayload(title="Müller — Notiz", body="ok", success=True)
40+
assert p.title == "Müller — Notiz"
3341

3442

35-
def test_notification_payload_smart_quotes_in_title_normalised():
36-
p = NotificationPayload(title="“hi” ‘there’", body="ok", success=True)
37-
assert p.title == '"hi" \'there\''
43+
def test_encode_header_value_passes_ascii_through():
44+
assert _encode_header_value("plain ascii") == "plain ascii"
3845

3946

40-
def test_notification_payload_unknown_unicode_in_title_replaced():
41-
p = NotificationPayload(title="emoji \U0001f600 tail", body="ok", success=True)
42-
assert "\U0001f600" not in p.title
43-
p.title.encode("ascii")
47+
_RFC2047_WORD = re.compile(r"=\?utf-8\?[bq]\?[^?]*\?=", re.IGNORECASE)
4448

4549

46-
def test_notification_payload_ascii_coercion_runs_before_clamp():
47-
# "..." (3 chars) replaces "…" (1 char); clamp comes after, so a
48-
# title that fit pre-replacement may not fit after — and that's fine.
49-
long = "a" * 38 + "…" # 39 chars in, 41 chars after replacement
50-
p = NotificationPayload(title=long, body="ok", success=True)
51-
assert len(p.title) == 40
52-
p.title.encode("ascii")
50+
def test_encode_header_value_rfc2047_encodes_unicode():
51+
out = _encode_header_value("Thomas Stegger — plant prefs saved")
52+
# email.header.Header emits =?utf-8?[bq]?...?= encoded words; B and Q
53+
# are both valid RFC 2047 forms and ntfy decodes either.
54+
assert _RFC2047_WORD.search(out)
55+
decoded = _decode_rfc2047(out)
56+
assert decoded == "Thomas Stegger — plant prefs saved"
57+
# Result must be ASCII-only so httpx can serialise it as a header.
58+
out.encode("ascii")
59+
60+
61+
def _decode_rfc2047(encoded: str) -> str:
62+
"""Decode an RFC 2047 encoded-word string back to its Unicode form."""
63+
from email.header import decode_header
64+
parts = decode_header(encoded)
65+
out = []
66+
for chunk, charset in parts:
67+
if isinstance(chunk, bytes):
68+
out.append(chunk.decode(charset or "ascii"))
69+
else:
70+
out.append(chunk)
71+
return "".join(out)
5372

5473

5574
@pytest.mark.asyncio
@@ -68,6 +87,7 @@ async def test_send_ntfy_success_headers():
6887
)
6988
assert route.called
7089
req = route.calls.last.request
90+
# ASCII title passes through verbatim.
7191
assert req.headers["title"] == "pong"
7292
assert req.headers["priority"] == "3"
7393
assert req.headers["tags"] == "white_check_mark"
@@ -93,6 +113,69 @@ async def test_send_ntfy_uses_failure_priority_and_tags_when_success_false():
93113
assert req.headers["tags"] == "rotating_light"
94114

95115

116+
@pytest.mark.asyncio
117+
@respx.mock
118+
async def test_send_ntfy_encodes_unicode_title_as_rfc2047():
119+
route = respx.post("https://ntfy.sh/abc123").mock(
120+
return_value=httpx.Response(200, json={"id": "msg1"})
121+
)
122+
title = "Thomas Stegger — plant prefs saved"
123+
await send_ntfy(
124+
title=title,
125+
body="ok",
126+
success=True,
127+
base_url="https://ntfy.sh",
128+
topic="abc123",
129+
timeout_s=5,
130+
)
131+
req = route.calls.last.request
132+
header = req.headers["title"]
133+
# Must be ASCII-only so httpx can transmit it.
134+
header.encode("ascii")
135+
assert _RFC2047_WORD.search(header)
136+
assert _decode_rfc2047(header) == title
137+
138+
139+
@pytest.mark.asyncio
140+
@respx.mock
141+
async def test_send_ntfy_handles_emoji_title():
142+
route = respx.post("https://ntfy.sh/abc123").mock(
143+
return_value=httpx.Response(200, json={"id": "msg1"})
144+
)
145+
title = "\U0001f600 done"
146+
await send_ntfy(
147+
title=title,
148+
body="ok",
149+
success=True,
150+
base_url="https://ntfy.sh",
151+
topic="abc123",
152+
timeout_s=5,
153+
)
154+
req = route.calls.last.request
155+
header = req.headers["title"]
156+
header.encode("ascii")
157+
assert _decode_rfc2047(header) == title
158+
159+
160+
@pytest.mark.asyncio
161+
@respx.mock
162+
async def test_send_ntfy_handles_german_umlauts_title():
163+
route = respx.post("https://ntfy.sh/abc123").mock(
164+
return_value=httpx.Response(200, json={"id": "msg1"})
165+
)
166+
title = "Müller — Notiz gespeichert"
167+
await send_ntfy(
168+
title=title,
169+
body="ok",
170+
success=True,
171+
base_url="https://ntfy.sh",
172+
topic="abc123",
173+
timeout_s=5,
174+
)
175+
req = route.calls.last.request
176+
assert _decode_rfc2047(req.headers["title"]) == title
177+
178+
96179
@pytest.mark.asyncio
97180
@respx.mock
98181
async def test_send_ntfy_swallows_errors():

0 commit comments

Comments
 (0)