Skip to content

Commit 7adf61b

Browse files
author
salmanrajz
committed
fix: handle UnicodeDecodeError on usernames with special characters
Fixes #2730. Usernames containing non-ASCII characters (e.g. 'Émile') can trigger a UnicodeDecodeError inside the requests library during redirect handling. This exception is not a subclass of requests.exceptions.RequestException, so it escaped all existing except blocks in get_response() and crashed the program. Added a catch for UnicodeError (parent of both UnicodeDecodeError and UnicodeEncodeError) so these sites are gracefully skipped instead of crashing the entire scan. Added regression tests in tests/test_unicode.py.
1 parent 574aeb4 commit 7adf61b

2 files changed

Lines changed: 51 additions & 0 deletions

File tree

sherlock_project/sherlock.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,9 @@ def get_response(request_future, error_type, social_network):
136136
except requests.exceptions.RequestException as err:
137137
error_context = "Unknown Error"
138138
exception_text = str(err)
139+
except UnicodeError as err:
140+
error_context = "Encoding Error"
141+
exception_text = str(err)
139142

140143
return response, error_context, exception_text
141144

tests/test_unicode.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
"""Tests for handling usernames with special/unicode characters."""
2+
3+
from concurrent.futures import Future
4+
from unittest.mock import MagicMock
5+
6+
from sherlock_project.sherlock import get_response
7+
8+
9+
def _make_future_with_exception(exc):
10+
"""Create a Future that raises the given exception."""
11+
future = Future()
12+
future.set_exception(exc)
13+
return future
14+
15+
16+
def test_get_response_handles_unicode_decode_error():
17+
"""Regression test for issue #2730.
18+
19+
Usernames with special characters (e.g. 'Émile') can trigger a
20+
UnicodeDecodeError inside the requests library during redirect
21+
handling. This must not crash the program.
22+
"""
23+
future = _make_future_with_exception(
24+
UnicodeDecodeError("utf-8", b"\xe9", 0, 1, "invalid continuation byte")
25+
)
26+
response, error_context, exception_text = get_response(
27+
request_future=future,
28+
error_type=["status_code"],
29+
social_network="TestSite",
30+
)
31+
assert response is None
32+
assert error_context == "Encoding Error"
33+
assert "utf-8" in exception_text
34+
35+
36+
def test_get_response_handles_unicode_encode_error():
37+
"""UnicodeEncodeError should also be caught (subclass of UnicodeError)."""
38+
future = _make_future_with_exception(
39+
UnicodeEncodeError("ascii", "É", 0, 1, "ordinal not in range(128)")
40+
)
41+
response, error_context, exception_text = get_response(
42+
request_future=future,
43+
error_type=["status_code"],
44+
social_network="TestSite",
45+
)
46+
assert response is None
47+
assert error_context == "Encoding Error"
48+
assert "ascii" in exception_text

0 commit comments

Comments
 (0)