Skip to content

Commit c05b0b1

Browse files
committed
Improve scan_for_virus robustness, avoid per-file DB lookups, fix typo, and add missing-resource test
Signed-off-by: dikshaa2909 <dikshadeware@gmail.com>
1 parent 807b070 commit c05b0b1

2 files changed

Lines changed: 58 additions & 7 deletions

File tree

scanpipe/pipes/clamav.py

Lines changed: 42 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,18 +21,23 @@
2121
# Visit https://github.com/aboutcode-org/scancode.io for support and download.
2222

2323
from pathlib import Path
24+
import logging
2425

2526
from django.conf import settings
26-
2727
import clamd
2828

29+
logger = logging.getLogger(__name__)
30+
2931

3032
def scan_for_virus(project):
3133
"""
3234
Run a ClamAV scan to detect virus infection.
33-
Create one Project error message per found virus and store the detection data
34-
on the related codebase resource ``extra_data`` field.
35+
36+
- Avoid crashes when ClamAV reports files not indexed in CodebaseResource.
37+
- Avoid per-file DB queries by preloading valid resource paths.
38+
- Record a project-level error for any detected virus.
3539
"""
40+
3641
if settings.CLAMD_USE_TCP:
3742
clamd_socket = clamd.ClamdNetworkSocket(settings.CLAMD_TCP_ADDR)
3843
else:
@@ -43,17 +48,40 @@ def scan_for_virus(project):
4348
except clamd.ClamdError as e:
4449
raise Exception(f"Error with the ClamAV service: {e}")
4550

51+
# Preload all valid indexed resource paths
52+
valid_paths = set(
53+
project.codebaseresources.values_list("path", flat=True)
54+
)
55+
56+
missing_resources = []
57+
4658
for resource_location, results in scan_response.items():
4759
status, reason = results
60+
61+
if status != "FOUND":
62+
continue
63+
4864
resource_path = Path(resource_location).relative_to(project.codebase_path)
65+
resource_path_str = str(resource_path)
66+
67+
if resource_path_str not in valid_paths:
68+
missing_resources.append(resource_path_str)
69+
logger.warning(
70+
f"ClamAV detected virus in non-indexed file: {resource_path_str}"
71+
)
72+
continue
73+
74+
resource = project.codebaseresources.filter(
75+
path=resource_path_str
76+
).first()
4977

50-
resource = project.codebaseresources.get(path=resource_path)
5178
virus_report = {
52-
"calmav": {
79+
"clamav": {
5380
"status": status,
5481
"reason": reason,
5582
}
5683
}
84+
5785
resource.update_extra_data({"virus_report": virus_report})
5886

5987
project.add_error(
@@ -62,6 +90,14 @@ def scan_for_virus(project):
6290
details={
6391
"status": status,
6492
"reason": reason,
65-
"resource_path": str(resource_path),
93+
"resource_path": resource_path_str,
6694
},
6795
)
96+
97+
if missing_resources:
98+
project.add_error(
99+
description="ClamAV detected virus in files not indexed in DB",
100+
model="ScanForVirus",
101+
details={"missing_resources": missing_resources},
102+
)
103+

scanpipe/tests/pipes/test_clamav.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,10 +60,25 @@ def test_scanpipe_pipes_clamav_scan_for_virus(self, mock_multiscan):
6060
resource1 = project.codebaseresources.first()
6161
expected_virus_report_extra_data = {
6262
"virus_report": {
63-
"calmav": {
63+
"clamav": {
6464
"status": "FOUND",
6565
"reason": "Win.Test.EICAR_HDB-1",
6666
}
6767
}
6868
}
6969
self.assertEqual(expected_virus_report_extra_data, resource1.extra_data)
70+
71+
@mock.patch("clamd.ClamdNetworkSocket.multiscan")
72+
def test_scanpipe_pipes_clamav_missing_resource_does_not_crash(self, mock_multiscan):
73+
project = Project.objects.create(name="project")
74+
75+
r1 = make_resource_file(project=project, path="indexed.txt")
76+
77+
mock_multiscan.return_value = {
78+
r1.location: ("FOUND", "Test.Virus"),
79+
str(project.codebase_path / "non_indexed.txt"): ("FOUND", "Test.Virus"),
80+
}
81+
82+
clamav.scan_for_virus(project)
83+
self.assertEqual(2, len(project.projectmessages.all()))
84+

0 commit comments

Comments
 (0)