Skip to content

Commit b244932

Browse files
committed
Add a decode test with invalid utf8
1 parent 596bf1c commit b244932

2 files changed

Lines changed: 12 additions & 1 deletion

File tree

src/mdurl/_decode.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ def repl_func_with_cache(match: re.Match, cache: Sequence[str]) -> str:
8282
continue
8383

8484
if (b1 & 0xF8) == 0xF0 and (i + 9 < l):
85-
# 111110xx 10xxxxxx 10xxxxxx 10xxxxxx
85+
# 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
8686
b2 = int(seq[i + 4 : i + 6], 16)
8787
b3 = int(seq[i + 7 : i + 9], 16)
8888
b4 = int(seq[i + 10 : i + 12], 16)

tests/test_decode.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,14 @@
33

44
def test_decode_multi_byte():
55
assert decode("https://host.invalid/%F0%9F%91%A9") == "https://host.invalid/👩"
6+
7+
8+
def test_decode_invalid_utf8():
9+
assert decode("https://host.invalid/%CF") == "https://host.invalid/\ufffd"
10+
assert decode("https://host.invalid/%C0%bf") == "https://host.invalid/\ufffd\ufffd"
11+
# This is different from `urllib.parse.unquote`. We add 3 * \ufffd as does
12+
# Javascript upstream, urllib only adds 2 * \ufffd.
13+
assert (
14+
decode("https://host.invalid/%F1%81%d1%45")
15+
== "https://host.invalid/\ufffd\ufffd\ufffdE"
16+
)

0 commit comments

Comments
 (0)