Skip to content

Commit cbb5680

Browse files
greynewellclaude
andauthored
Fix truncate: slice by runes not bytes to avoid invalid UTF-8 (#98)
truncate() was using s[:max-1] (byte slice) to shorten strings before embedding in SVG. For multi-byte UTF-8 characters (é, Ñ, Ü, 日, etc.) the slice position could fall inside a character's byte sequence, producing invalid UTF-8 in the generated SVG. Fix: convert to []rune, truncate by rune count, convert back. Reproducer: truncate("Über", 2) returned "\xc3…" (invalid UTF-8); now returns "Ü…". Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 258edb3 commit cbb5680

2 files changed

Lines changed: 65 additions & 3 deletions

File tree

internal/archdocs/pssg/render/shareimage.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,13 @@ func svgEscape(s string) string {
2525
return s
2626
}
2727

28-
// truncate limits string length with ellipsis.
28+
// truncate limits string length (in runes) with ellipsis.
2929
func truncate(s string, max int) string {
30-
if len(s) <= max {
30+
runes := []rune(s)
31+
if len(runes) <= max {
3132
return s
3233
}
33-
return s[:max-1] + "\u2026"
34+
return string(runes[:max-1]) + "\u2026"
3435
}
3536

3637
// svgScaffold wraps content in the standard share image scaffold.
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
package render
2+
3+
import (
4+
"testing"
5+
"unicode/utf8"
6+
)
7+
8+
func TestTruncateASCII(t *testing.T) {
9+
cases := []struct {
10+
input string
11+
max int
12+
want string
13+
}{
14+
{"hello", 10, "hello"}, // short — no truncation
15+
{"hello", 5, "hello"}, // exactly max — no truncation
16+
{"hello world", 6, "hello…"}, // truncated to 5 runes + ellipsis
17+
{"", 5, ""}, // empty string
18+
}
19+
for _, c := range cases {
20+
got := truncate(c.input, c.max)
21+
if got != c.want {
22+
t.Errorf("truncate(%q, %d) = %q, want %q", c.input, c.max, got, c.want)
23+
}
24+
}
25+
}
26+
27+
// TestTruncateMultiByte verifies that truncate does not slice inside a multi-byte
28+
// UTF-8 character sequence, which would produce invalid UTF-8 in the SVG output.
29+
// Before the fix, truncate used byte-based slicing: s[:max-1].
30+
// For a string like "Ñandú" (6 runes but 8 bytes), truncating at max=3 would
31+
// compute s[:2] = [0xC3, 0x9C] — the first 2 bytes of "Ñ" — yielding "Ñ"
32+
// rather than the expected "Ña". The important invariant is that the output is
33+
// always valid UTF-8 and has exactly min(len(runes), max) rune-units.
34+
func TestTruncateMultiByte(t *testing.T) {
35+
cases := []struct {
36+
input string
37+
max int
38+
want string
39+
}{
40+
// "Über" is 4 runes, 6 bytes (Ü = 2 bytes)
41+
{"Über", 10, "Über"}, // no truncation
42+
{"Über", 4, "Über"}, // exactly max runes — no truncation
43+
{"Über", 3, "Üb…"}, // 2 runes + ellipsis
44+
{"Über", 2, "Ü…"}, // 1 rune + ellipsis
45+
46+
// "Ñandú" is 5 runes, 7 bytes
47+
{"Ñandú", 4, "Ñan…"}, // 3 runes + ellipsis
48+
49+
// "日本語" is 3 runes, 9 bytes (each char = 3 bytes)
50+
{"日本語テスト", 4, "日本語…"}, // 3 runes + ellipsis
51+
}
52+
for _, c := range cases {
53+
got := truncate(c.input, c.max)
54+
if got != c.want {
55+
t.Errorf("truncate(%q, %d) = %q, want %q", c.input, c.max, got, c.want)
56+
}
57+
if !utf8.ValidString(got) {
58+
t.Errorf("truncate(%q, %d) = %q — result is not valid UTF-8", c.input, c.max, got)
59+
}
60+
}
61+
}

0 commit comments

Comments
 (0)