Skip to content

Commit 4681492

Browse files
committed
Enhance tests to validate fallback behavior for virus scan errors and ensure save_file is called in main flow
1 parent cb72b73 commit 4681492

2 files changed

Lines changed: 14 additions & 9 deletions

File tree

tests/test_excel_scraper.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ def mock_stream(method, _url, timeout=10):
157157
async def test_download_excel_scan_error(test_scraper):
158158
"""
159159
Test the scenario where virus scanning fails (connection reset, etc.).
160-
The method should also skip returning file content, but not label it as a virus.
160+
If MIME is valid, content should still be returned (fallback behavior).
161161
"""
162162
fake_excel_content = b"FakeExcelData"
163163
url = "http://example.com/scan_error.xls"
@@ -179,14 +179,15 @@ async def __aexit__(self, exc_type, exc_val, exc_tb):
179179
def mock_stream(method, _url, timeout=10):
180180
return MockResponseContext(200, fake_excel_content)
181181

182-
# "ERROR" indicates scanning couldn't complete
182+
# "ERROR" means scanning failed, but MIME check passes
183183
with patch.object(test_scraper.session, 'stream', side_effect=mock_stream), \
184-
patch.object(test_scraper._security_manager, 'scan_for_viruses', return_value=("ERROR","Connection reset by peer")), \
184+
patch.object(test_scraper._security_manager, 'scan_for_viruses', return_value=("ERROR", "Connection reset")), \
185185
patch.object(test_scraper._security_manager, 'is_excel_file', return_value=True):
186186

187187
returned_url, content = await test_scraper.download_excel(url)
188188
assert returned_url == url
189-
assert content is None, "Should skip returning content if scan error occurs."
189+
assert content == fake_excel_content # ✅ New expected behavior
190+
190191

191192
@pytest.mark.asyncio
192193
async def test_download_excel_not_excel(test_scraper):

tests/test_main.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,16 +40,19 @@ async def test_main_scraper_flow():
4040
# IMPORTANT: Patch the methods on the actual module path where
4141
# `NYCInfoHubScraper` is defined and imported by main.py—i.e. "src.excel_scraper"
4242
with patch("src.excel_scraper.NYCInfoHubScraper.scrape_excel_links", return_value=mock_excel_links), \
43-
patch("src.excel_scraper.NYCInfoHubScraper.concurrent_fetch", return_value=mock_files_map), \
44-
patch("src.excel_scraper.NYCInfoHubScraper.parallel_hashing", return_value=mock_hashes), \
45-
patch("src.excel_scraper.NYCInfoHubScraper.save_file") as mock_save:
43+
patch("src.excel_scraper.NYCInfoHubScraper.concurrent_fetch", return_value=mock_files_map), \
44+
patch("src.excel_scraper.NYCInfoHubScraper.parallel_hashing", return_value=mock_hashes), \
45+
patch("src.excel_scraper.NYCInfoHubScraper.save_file") as mock_save:
4646

4747
exit_code = await main_entrypoint()
4848

49-
# Verify main() completed successfully
49+
# ✅ Check main ran successfully
5050
assert exit_code == 0, "Expected main to return 0 on success"
5151

52-
# Check calls to save_file
52+
# ✅ NEW: Confirm the mock was even called
53+
assert mock_save.called, "save_file was not called at all"
54+
55+
# ✅ Check expected calls
5356
mock_save.assert_any_call(
5457
"http://example.com/attendance_2021.xlsx",
5558
b"fake attendance bytes",
@@ -61,3 +64,4 @@ async def test_main_scraper_flow():
6164
"hash2"
6265
)
6366
assert mock_save.call_count == 2, "Expected two calls to save_file"
67+

0 commit comments

Comments
 (0)