2121# Visit https://github.com/aboutcode-org/scancode.io for support and download.
2222
2323from pathlib import Path
24+ import logging
2425
2526from django .conf import settings
26-
2727import clamd
2828
29+ logger = logging .getLogger (__name__ )
30+
2931
3032def scan_for_virus (project ):
3133 """
3234 Run a ClamAV scan to detect virus infection.
33- Create one Project error message per found virus and store the detection data
34- on the related codebase resource ``extra_data`` field.
35+
36+ - Avoid crashes when ClamAV reports files not indexed in CodebaseResource.
37+ - Avoid per-file DB queries by preloading valid resource paths.
38+ - Record a project-level error for any detected virus.
3539 """
40+
3641 if settings .CLAMD_USE_TCP :
3742 clamd_socket = clamd .ClamdNetworkSocket (settings .CLAMD_TCP_ADDR )
3843 else :
@@ -43,17 +48,40 @@ def scan_for_virus(project):
4348 except clamd .ClamdError as e :
4449 raise Exception (f"Error with the ClamAV service: { e } " )
4550
51+ # Preload all valid indexed resource paths
52+ valid_paths = set (
53+ project .codebaseresources .values_list ("path" , flat = True )
54+ )
55+
56+ missing_resources = []
57+
4658 for resource_location , results in scan_response .items ():
4759 status , reason = results
60+
61+ if status != "FOUND" :
62+ continue
63+
4864 resource_path = Path (resource_location ).relative_to (project .codebase_path )
65+ resource_path_str = str (resource_path )
66+
67+ if resource_path_str not in valid_paths :
68+ missing_resources .append (resource_path_str )
69+ logger .warning (
70+ f"ClamAV detected virus in non-indexed file: { resource_path_str } "
71+ )
72+ continue
73+
74+ resource = project .codebaseresources .filter (
75+ path = resource_path_str
76+ ).first ()
4977
50- resource = project .codebaseresources .get (path = resource_path )
5178 virus_report = {
52- "calmav " : {
79+ "clamav " : {
5380 "status" : status ,
5481 "reason" : reason ,
5582 }
5683 }
84+
5785 resource .update_extra_data ({"virus_report" : virus_report })
5886
5987 project .add_error (
@@ -62,6 +90,14 @@ def scan_for_virus(project):
6290 details = {
6391 "status" : status ,
6492 "reason" : reason ,
65- "resource_path" : str ( resource_path ) ,
93+ "resource_path" : resource_path_str ,
6694 },
6795 )
96+
97+ if missing_resources :
98+ project .add_error (
99+ description = "ClamAV detected virus in files not indexed in DB" ,
100+ model = "ScanForVirus" ,
101+ details = {"missing_resources" : missing_resources },
102+ )
103+
0 commit comments