Skip to content

Commit fcaa276

Browse files
Improved blocklist parsing robustness and added comprehensive tests (#1168)
* Improved blocklist parsing robustness and added comprehensive tests - Added 15-second timeout to HTTP requests to prevent indefinite hanging - Used splitlines() to handle various line ending formats (CRLF, LF, CR) - Filtered empty usernames using walrus operator for better efficiency - Added test coverage for OnlineBlocklist.refresh() method - Added tests for error handling, ETag caching, and edge cases * lint fix
1 parent 6ea42df commit fcaa276

2 files changed

Lines changed: 229 additions & 2 deletions

File tree

lib/blocklist.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,14 @@ class BlocklistData:
1616

1717
def _parse_block_list_from_url(url: str, old_data: BlocklistData) -> BlocklistData:
1818
headers = {"If-None-Match": old_data.etag} if old_data.etag else {}
19-
response = requests.get(url, headers=headers)
19+
response = requests.get(url, headers=headers, timeout=15)
2020

2121
response.raise_for_status()
2222

2323
if response.status_code == 304:
2424
return old_data
2525

26-
block_list = [username.strip() for username in response.text.strip().split("\n")]
26+
block_list = [username for line in response.text.strip().splitlines() if (username := line.strip())]
2727

2828
return BlocklistData(block_list, response.headers.get("ETag"))
2929

test_bot/test_blocklist.py

Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,227 @@
1+
"""Tests for the blocklist module."""
2+
from unittest.mock import Mock, patch
3+
4+
import pytest
5+
from urllib3.exceptions import HTTPError
6+
7+
from lib.blocklist import BlocklistData, _parse_block_list_from_url, OnlineBlocklist
8+
9+
10+
def test_parse_block_list_from_url_success() -> None:
11+
"""Test parsing blocklist from URL with successful response."""
12+
mock_response = Mock()
13+
mock_response.status_code = 200
14+
mock_response.text = "user1\nuser2\nuser3"
15+
mock_response.headers = {"ETag": "test-etag-123"}
16+
mock_response.raise_for_status = Mock()
17+
18+
old_data = BlocklistData([], None)
19+
20+
with patch("lib.blocklist.requests.get", return_value=mock_response):
21+
result = _parse_block_list_from_url("http://example.com/blocklist", old_data)
22+
23+
assert result.users == ["user1", "user2", "user3"]
24+
assert result.etag == "test-etag-123"
25+
26+
27+
def test_parse_block_list_from_url__several_line_breaks() -> None:
28+
"""Test parsing blocklist from URL with several line-breaks."""
29+
mock_response = Mock()
30+
mock_response.status_code = 200
31+
mock_response.headers = {"ETag": "test-etag-123"}
32+
mock_response.raise_for_status = Mock()
33+
34+
old_data = BlocklistData([], None)
35+
36+
texts = [
37+
"user1\nuser2\nuser3",
38+
"user1\r\nuser2\r\n\r\nuser3",
39+
"user1\ruser2\r\ruser3",
40+
"user1\nuser2\nuser3",
41+
]
42+
for text in texts:
43+
mock_response.text = text
44+
45+
with patch("lib.blocklist.requests.get", return_value=mock_response):
46+
result = _parse_block_list_from_url("http://example.com/blocklist", old_data)
47+
48+
assert result.users == ["user1", "user2", "user3"]
49+
assert result.etag == "test-etag-123"
50+
51+
52+
def test_parse_block_list_from_url_not_modified() -> None:
53+
"""Test parsing blocklist returns cached data when not modified (304)."""
54+
mock_response = Mock()
55+
mock_response.status_code = 304
56+
mock_response.raise_for_status = Mock()
57+
58+
old_data = BlocklistData(["cached_user1", "cached_user2"], "old-etag")
59+
60+
with patch("lib.blocklist.requests.get", return_value=mock_response):
61+
result = _parse_block_list_from_url("http://example.com/blocklist", old_data)
62+
63+
assert result.users == ["cached_user1", "cached_user2"]
64+
assert result.etag == "old-etag"
65+
66+
67+
def test_parse_block_list_from_url_with_etag_header() -> None:
68+
"""Test that ETag is sent in request headers when available."""
69+
mock_response = Mock()
70+
mock_response.status_code = 200
71+
mock_response.text = "user1"
72+
mock_response.headers = {}
73+
mock_response.raise_for_status = Mock()
74+
75+
old_data = BlocklistData([], "existing-etag")
76+
77+
with patch("lib.blocklist.requests.get", return_value=mock_response) as mock_get:
78+
_parse_block_list_from_url("http://example.com/blocklist", old_data)
79+
mock_get.assert_called_once_with(
80+
"http://example.com/blocklist",
81+
headers={"If-None-Match": "existing-etag"},
82+
timeout=15,
83+
)
84+
85+
86+
def test_parse_block_list_from_url_without_etag_header() -> None:
87+
"""Test that no ETag header is sent when old_data has no ETag."""
88+
mock_response = Mock()
89+
mock_response.status_code = 200
90+
mock_response.text = "user1"
91+
mock_response.headers = {}
92+
mock_response.raise_for_status = Mock()
93+
94+
old_data = BlocklistData([], None)
95+
96+
with patch("lib.blocklist.requests.get", return_value=mock_response) as mock_get:
97+
_parse_block_list_from_url("http://example.com/blocklist", old_data)
98+
mock_get.assert_called_once_with(
99+
"http://example.com/blocklist",
100+
headers={},
101+
timeout=15
102+
)
103+
104+
105+
def test_parse_block_list_from_url_strips_whitespace() -> None:
106+
"""Test that usernames are stripped of leading/trailing whitespace."""
107+
mock_response = Mock()
108+
mock_response.status_code = 200
109+
mock_response.text = " user1 \n user2\t\nuser3 "
110+
mock_response.headers = {}
111+
mock_response.raise_for_status = Mock()
112+
113+
old_data = BlocklistData([], None)
114+
115+
with patch("lib.blocklist.requests.get", return_value=mock_response):
116+
result = _parse_block_list_from_url("http://example.com/blocklist", old_data)
117+
118+
assert result.users == ["user1", "user2", "user3"]
119+
120+
121+
def test_parse_block_list_from_url_empty_response() -> None:
122+
"""Test parsing blocklist with empty response."""
123+
mock_response = Mock()
124+
mock_response.status_code = 200
125+
mock_response.text = ""
126+
mock_response.headers = {}
127+
mock_response.raise_for_status = Mock()
128+
129+
old_data = BlocklistData([], None)
130+
131+
with patch("lib.blocklist.requests.get", return_value=mock_response):
132+
result = _parse_block_list_from_url("http://example.com/blocklist", old_data)
133+
134+
assert result.users == []
135+
136+
137+
def test_parse_block_list_from_url_no_etag_in_response() -> None:
138+
"""Test parsing blocklist when response has no ETag header."""
139+
mock_response = Mock()
140+
mock_response.status_code = 200
141+
mock_response.text = "user1\nuser2"
142+
mock_response.headers = {}
143+
mock_response.raise_for_status = Mock()
144+
145+
old_data = BlocklistData([], None)
146+
147+
with patch("lib.blocklist.requests.get", return_value=mock_response):
148+
result = _parse_block_list_from_url("http://example.com/blocklist", old_data)
149+
150+
assert result.users == ["user1", "user2"]
151+
assert result.etag is None
152+
153+
154+
def test_parse_block_list_from_url_multiline_with_blanks() -> None:
155+
"""Test parsing blocklist that contains blank lines."""
156+
mock_response = Mock()
157+
mock_response.status_code = 200
158+
mock_response.text = "user1\n\nuser2\n\n\nuser3"
159+
mock_response.headers = {}
160+
mock_response.raise_for_status = Mock()
161+
162+
old_data = BlocklistData([], None)
163+
164+
with patch("lib.blocklist.requests.get", return_value=mock_response):
165+
result = _parse_block_list_from_url("http://example.com/blocklist", old_data)
166+
167+
assert result.users == ["user1", "user2", "user3"]
168+
169+
170+
def test_parse_block_list_from_url__exception() -> None:
171+
"""Test parsing blocklist from URL with exception."""
172+
mock_response = Mock()
173+
mock_response.status_code = 500
174+
mock_response.raise_for_status.side_effect = HTTPError("500 Internal Server Error")
175+
176+
old_data = BlocklistData([], None)
177+
178+
with patch("lib.blocklist.requests.get", return_value=mock_response), pytest.raises(HTTPError):
179+
_parse_block_list_from_url("http://example.com/blocklist", old_data)
180+
181+
182+
def test_online_blocklist_refresh_success() -> None:
183+
"""Test successful refresh of all blocklists."""
184+
mock_response1 = Mock()
185+
mock_response1.status_code = 200
186+
mock_response1.text = "user1\nuser2"
187+
mock_response1.headers = {"ETag": "etag1"}
188+
mock_response1.raise_for_status = Mock()
189+
190+
mock_response2 = Mock()
191+
mock_response2.status_code = 200
192+
mock_response2.text = "user3\nuser4\nuser5"
193+
mock_response2.headers = {"ETag": "etag2"}
194+
mock_response2.raise_for_status = Mock()
195+
196+
with patch("lib.blocklist.requests.get") as mock_get:
197+
mock_get.side_effect = [mock_response1, mock_response2]
198+
blocklist = OnlineBlocklist(["http://example.com/list1", "http://example.com/list2"])
199+
200+
assert blocklist.blocklist["http://example.com/list1"].users == ["user1", "user2"]
201+
assert blocklist.blocklist["http://example.com/list1"].etag == "etag1"
202+
assert blocklist.blocklist["http://example.com/list2"].users == ["user3", "user4", "user5"]
203+
assert blocklist.blocklist["http://example.com/list2"].etag == "etag2"
204+
205+
206+
def test_online_blocklist_refresh_partial_failure() -> None:
207+
"""Test refresh when some blocklists fail to load."""
208+
mock_response_success = Mock()
209+
mock_response_success.status_code = 200
210+
mock_response_success.text = "user1\nuser2"
211+
mock_response_success.headers = {"ETag": "etag1"}
212+
mock_response_success.raise_for_status = Mock()
213+
214+
mock_response_fail = Mock()
215+
mock_response_fail.raise_for_status.side_effect = HTTPError("500 Server Error")
216+
217+
with patch("lib.blocklist.requests.get") as mock_get:
218+
# First call succeeds, second call fails
219+
mock_get.side_effect = [mock_response_success, mock_response_fail]
220+
blocklist = OnlineBlocklist(["http://example.com/list1", "http://example.com/list2"])
221+
222+
# First blocklist should be loaded
223+
assert blocklist.blocklist["http://example.com/list1"].users == ["user1", "user2"]
224+
225+
# Second blocklist should remain empty (failed to load)
226+
assert blocklist.blocklist["http://example.com/list2"].users == []
227+
assert blocklist.blocklist["http://example.com/list2"].etag is None

0 commit comments

Comments
 (0)