Skip to content

Commit fba6bbb

Browse files
committed
Improve scan_for_virus robustness, avoid per-file DB lookups, fix typo, and add missing-resource test
Signed-off-by: dikshaa2909 <dikshadeware@gmail.com>
1 parent 807b070 commit fba6bbb

File tree

2 files changed

+103
-16
lines changed

2 files changed

+103
-16
lines changed

scanpipe/pipes/clamav.py

Lines changed: 54 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,48 +20,92 @@
2020
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
2121
# Visit https://github.com/aboutcode-org/scancode.io for support and download.
2222

23+
import logging
2324
from pathlib import Path
2425

26+
import clamd
2527
from django.conf import settings
2628

27-
import clamd
29+
logger = logging.getLogger(__name__)
2830

2931

3032
def scan_for_virus(project):
3133
"""
3234
Run a ClamAV scan to detect virus infection.
33-
Create one Project error message per found virus and store the detection data
34-
on the related codebase resource ``extra_data`` field.
35+
- Avoid crashes when ClamAV reports files not indexed in CodebaseResource.
36+
- Avoid per-file DB queries by preloading valid resource paths.
37+
- Record a project-level error for any detected virus.
3538
"""
3639
if settings.CLAMD_USE_TCP:
3740
clamd_socket = clamd.ClamdNetworkSocket(settings.CLAMD_TCP_ADDR)
3841
else:
3942
clamd_socket = clamd.ClamdUnixSocket()
4043

4144
try:
42-
scan_response = clamd_socket.multiscan(file=str(project.codebase_path))
45+
scan_response = clamd_socket.multiscan(
46+
file=str(project.codebase_path)
47+
)
4348
except clamd.ClamdError as e:
44-
raise Exception(f"Error with the ClamAV service: {e}")
49+
raise Exception(
50+
f"Error with the ClamAV service: {e}"
51+
)
52+
53+
# Preload all valid indexed resource paths
54+
valid_paths = set(
55+
project.codebaseresources.values_list("path", flat=True)
56+
)
57+
58+
missing_resources = []
4559

4660
for resource_location, results in scan_response.items():
4761
status, reason = results
48-
resource_path = Path(resource_location).relative_to(project.codebase_path)
4962

50-
resource = project.codebaseresources.get(path=resource_path)
63+
if status != "FOUND":
64+
continue
65+
66+
resource_path = Path(resource_location).relative_to(
67+
project.codebase_path
68+
)
69+
resource_path_str = str(resource_path)
70+
71+
if resource_path_str not in valid_paths:
72+
missing_resources.append(resource_path_str)
73+
logger.warning(
74+
"ClamAV detected virus in non-indexed file: %s",
75+
resource_path_str,
76+
)
77+
continue
78+
79+
resource = project.codebaseresources.filter(
80+
path=resource_path_str
81+
).first()
82+
5183
virus_report = {
52-
"calmav": {
84+
"clamav": {
5385
"status": status,
5486
"reason": reason,
5587
}
5688
}
57-
resource.update_extra_data({"virus_report": virus_report})
89+
90+
resource.update_extra_data(
91+
{"virus_report": virus_report}
92+
)
5893

5994
project.add_error(
6095
description="Virus detected",
6196
model="ScanForVirus",
6297
details={
6398
"status": status,
6499
"reason": reason,
65-
"resource_path": str(resource_path),
100+
"resource_path": resource_path_str,
66101
},
67102
)
103+
104+
if missing_resources:
105+
project.add_error(
106+
description="ClamAV detected virus in files not indexed in DB",
107+
model="ScanForVirus",
108+
details={
109+
"missing_resources": missing_resources
110+
},
111+
)

scanpipe/tests/pipes/test_clamav.py

Lines changed: 49 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,22 +34,36 @@ class ScanPipeClamAVPipesTest(TestCase):
3434
data = Path(__file__).parent.parent / "data"
3535

3636
@mock.patch("clamd.ClamdNetworkSocket.multiscan")
37-
def test_scanpipe_pipes_clamav_scan_for_virus(self, mock_multiscan):
37+
def test_scanpipe_pipes_clamav_scan_for_virus(
38+
self, mock_multiscan
39+
):
3840
project = Project.objects.create(name="project")
3941
r1 = make_resource_file(project=project, path="eicar.zip")
40-
r2 = make_resource_file(project=project, path="eicar.zip-extract/eicar.com")
42+
r2 = make_resource_file(
43+
project=project,
44+
path="eicar.zip-extract/eicar.com",
45+
)
4146

4247
mock_multiscan.return_value = {
4348
r1.location: ("FOUND", "Win.Test.EICAR_HDB-1"),
4449
r2.location: ("FOUND", "Win.Test.EICAR_HDB-1"),
4550
}
4651

4752
clamav.scan_for_virus(project)
53+
4854
self.assertEqual(2, len(project.projectmessages.all()))
55+
4956
error_message = project.projectmessages.all()[0]
5057
self.assertEqual("error", error_message.severity)
51-
self.assertEqual("Virus detected", error_message.description)
52-
self.assertEqual("ScanForVirus", error_message.model)
58+
self.assertEqual(
59+
"Virus detected",
60+
error_message.description,
61+
)
62+
self.assertEqual(
63+
"ScanForVirus",
64+
error_message.model,
65+
)
66+
5367
expected_details = {
5468
"reason": "Win.Test.EICAR_HDB-1",
5569
"status": "FOUND",
@@ -60,10 +74,39 @@ def test_scanpipe_pipes_clamav_scan_for_virus(self, mock_multiscan):
6074
resource1 = project.codebaseresources.first()
6175
expected_virus_report_extra_data = {
6276
"virus_report": {
63-
"calmav": {
77+
"clamav": {
6478
"status": "FOUND",
6579
"reason": "Win.Test.EICAR_HDB-1",
6680
}
6781
}
6882
}
69-
self.assertEqual(expected_virus_report_extra_data, resource1.extra_data)
83+
self.assertEqual(
84+
expected_virus_report_extra_data,
85+
resource1.extra_data,
86+
)
87+
88+
@mock.patch("clamd.ClamdNetworkSocket.multiscan")
89+
def test_scanpipe_pipes_clamav_missing_resource_does_not_crash(
90+
self, mock_multiscan
91+
):
92+
project = Project.objects.create(name="project")
93+
94+
r1 = make_resource_file(
95+
project=project,
96+
path="indexed.txt",
97+
)
98+
99+
mock_multiscan.return_value = {
100+
r1.location: ("FOUND", "Test.Virus"),
101+
str(project.codebase_path / "non_indexed.txt"): (
102+
"FOUND",
103+
"Test.Virus",
104+
),
105+
}
106+
107+
clamav.scan_for_virus(project)
108+
109+
self.assertEqual(
110+
2,
111+
len(project.projectmessages.all()),
112+
)

0 commit comments

Comments
 (0)