Skip to content

Commit 4656d95

Browse files
author
salmanrajz
committed
fix: handle UnicodeDecodeError on usernames with special characters
Fixes #2730. Usernames containing non-ASCII characters (e.g. 'Émile') can trigger a UnicodeDecodeError inside the requests library during redirect handling. This exception is not a subclass of requests.exceptions.RequestException, so it escaped all existing except blocks in get_response() and crashed the program. Added a catch for UnicodeError (parent of both UnicodeDecodeError and UnicodeEncodeError) so these sites are gracefully skipped instead of crashing the entire scan. Added regression tests in tests/test_unicode.py.
1 parent 574aeb4 commit 4656d95

2 files changed

Lines changed: 50 additions & 0 deletions

File tree

sherlock_project/sherlock.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,9 @@ def get_response(request_future, error_type, social_network):
136136
except requests.exceptions.RequestException as err:
137137
error_context = "Unknown Error"
138138
exception_text = str(err)
139+
except UnicodeError as err:
140+
error_context = "Encoding Error"
141+
exception_text = str(err)
139142

140143
return response, error_context, exception_text
141144

tests/test_unicode.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
"""Tests for handling usernames with special/unicode characters."""
2+
3+
from concurrent.futures import Future
4+
5+
from sherlock_project.sherlock import get_response
6+
7+
8+
def _make_future_with_exception(exc):
9+
"""Create a Future that raises the given exception."""
10+
future = Future()
11+
future.set_exception(exc)
12+
return future
13+
14+
15+
def test_get_response_handles_unicode_decode_error():
16+
"""Regression test for issue #2730.
17+
18+
Usernames with special characters (e.g. 'Émile') can trigger a
19+
UnicodeDecodeError inside the requests library during redirect
20+
handling. This must not crash the program.
21+
"""
22+
future = _make_future_with_exception(
23+
UnicodeDecodeError("utf-8", b"\xe9", 0, 1, "invalid continuation byte")
24+
)
25+
response, error_context, exception_text = get_response(
26+
request_future=future,
27+
error_type=["status_code"],
28+
social_network="TestSite",
29+
)
30+
assert response is None
31+
assert error_context == "Encoding Error"
32+
assert "utf-8" in exception_text
33+
34+
35+
def test_get_response_handles_unicode_encode_error():
36+
"""UnicodeEncodeError should also be caught (subclass of UnicodeError)."""
37+
future = _make_future_with_exception(
38+
UnicodeEncodeError("ascii", "É", 0, 1, "ordinal not in range(128)")
39+
)
40+
response, error_context, exception_text = get_response(
41+
request_future=future,
42+
error_type=["status_code"],
43+
social_network="TestSite",
44+
)
45+
assert response is None
46+
assert error_context == "Encoding Error"
47+
assert "ascii" in exception_text

0 commit comments

Comments
 (0)