Skip to content

Commit 41feb96

Browse files
vulnerability_id processing: separate import/reimport
1 parent 53c84d1 commit 41feb96

5 files changed

Lines changed: 69 additions & 62 deletions

File tree

dojo/importers/base_importer.py

Lines changed: 5 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -786,51 +786,23 @@ def process_cve(
786786

787787
return finding
788788

789-
def process_vulnerability_ids(
789+
def store_vulnerability_ids(
790790
self,
791791
finding: Finding,
792792
) -> Finding:
793793
"""
794-
Parse the `unsaved_vulnerability_ids` field from findings after they are parsed
795-
to create `Vulnerability_Id` objects with the finding associated correctly.
796-
Only updates if vulnerability_ids have changed to avoid unnecessary database operations.
797-
798-
Note: The finding parameter can be:
799-
- A new finding (from import) with unsaved_vulnerability_ids set from the parsed report
800-
- An existing finding (from reimport) with unsaved_vulnerability_ids copied from the
801-
finding_from_report before calling this method
794+
Store vulnerability IDs for a finding.
795+
Reads from finding.unsaved_vulnerability_ids and saves them overwriting existing ones.
802796
803797
Args:
804-
finding: The finding to process vulnerability IDs for.
805-
For reimports, this is the existing finding with unsaved_vulnerability_ids
806-
set from the import/reimport report.
798+
finding: The finding to store vulnerability IDs for
807799
808800
Returns:
809801
The finding object
810802
811803
"""
812-
# Normalize to empty list if None - always process vulnerability IDs
813804
vulnerability_ids_to_process = finding.unsaved_vulnerability_ids or []
814-
815-
# For existing findings, check if there are changes before updating
816-
if finding.pk and len(finding.vulnerability_id_set.all()) > 0:
817-
# Get existing vulnerability IDs from the database
818-
existing_vuln_ids = set(finding.vulnerability_ids) if finding.vulnerability_ids else set()
819-
# Normalize the new vulnerability IDs (remove duplicates for comparison)
820-
new_vuln_ids = set(vulnerability_ids_to_process)
821-
822-
# Only update if vulnerability IDs have changed
823-
if existing_vuln_ids == new_vuln_ids:
824-
logger.debug(
825-
f"Skipping vulnerability_ids update for finding {finding.id} - "
826-
f"vulnerability_ids unchanged: {sorted(existing_vuln_ids)}",
827-
)
828-
return finding
829-
830-
# Use the helper function which handles deletion and creation
831-
# Always use delete_existing=True - it's a no-op if there are no existing IDs
832-
finding_helper.save_vulnerability_ids(finding, vulnerability_ids_to_process, delete_existing=True)
833-
805+
finding_helper.save_vulnerability_ids(finding, vulnerability_ids_to_process, delete_existing=False)
834806
return finding
835807

836808
def process_files(

dojo/importers/default_importer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ def process_findings(
234234
# Process any files
235235
self.process_files(finding)
236236
# Process vulnerability IDs
237-
finding = self.process_vulnerability_ids(finding)
237+
finding = self.store_vulnerability_ids(finding)
238238
# Categorize this finding as a new one
239239
new_findings.append(finding)
240240
# all data is already saved on the finding, we only need to trigger post processing in batches

dojo/importers/default_reimporter.py

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,9 @@ def process_findings(
175175
else:
176176
original_findings = self.test.finding_set.all().filter(Q(service__isnull=True) | Q(service__exact=""))
177177

178+
# Prefetch vulnerability_id_set for reconcile_vulnerability_ids
179+
original_findings = original_findings.prefetch_related("vulnerability_id_set")
180+
178181
logger.debug(f"original_findings_qyer: {original_findings.query}")
179182
self.original_items = list(original_findings)
180183
logger.debug(f"original_items: {[(item.id, item.hash_code) for item in self.original_items]}")
@@ -691,6 +694,41 @@ def process_finding_that_was_not_matched(
691694
self.process_request_response_pairs(unsaved_finding)
692695
return unsaved_finding
693696

697+
def reconcile_vulnerability_ids(
698+
self,
699+
finding: Finding,
700+
) -> Finding:
701+
"""
702+
Reconcile vulnerability IDs for an existing finding.
703+
Checks if IDs have changed before updating to avoid unnecessary database operations.
704+
Uses prefetched data if available, otherwise fetches efficiently.
705+
706+
Args:
707+
finding: The existing finding to reconcile vulnerability IDs for.
708+
Must have unsaved_vulnerability_ids set.
709+
710+
Returns:
711+
The finding object
712+
713+
"""
714+
vulnerability_ids_to_process = finding.unsaved_vulnerability_ids or []
715+
716+
# Use prefetched data directly without triggering queries
717+
existing_vuln_ids = {v.vulnerability_id for v in finding.vulnerability_id_set.all()}
718+
new_vuln_ids = set(vulnerability_ids_to_process)
719+
720+
# Early exit if unchanged
721+
if existing_vuln_ids == new_vuln_ids:
722+
logger.debug(
723+
f"Skipping vulnerability_ids update for finding {finding.id} - "
724+
f"vulnerability_ids unchanged: {sorted(existing_vuln_ids)}",
725+
)
726+
return finding
727+
728+
# Update if changed
729+
finding_helper.save_vulnerability_ids(finding, vulnerability_ids_to_process, delete_existing=True)
730+
return finding
731+
694732
def finding_post_processing(
695733
self,
696734
finding: Finding,
@@ -716,13 +754,13 @@ def finding_post_processing(
716754
self.process_files(finding)
717755
# Process vulnerability IDs
718756
# Copy unsaved_vulnerability_ids from the report finding to the existing finding
719-
# so process_vulnerability_ids can process them (see its docstring for details)
757+
# so reconcile_vulnerability_ids can process them
720758
# Always set it (even if empty list) so we can clear existing IDs when report has none
721759
finding.unsaved_vulnerability_ids = finding_from_report.unsaved_vulnerability_ids or []
722760
# Store the current cve value to check if it changes
723761
old_cve = finding.cve
724762
# legacy cve field has already been processed/set earlier
725-
finding = self.process_vulnerability_ids(finding)
763+
finding = self.reconcile_vulnerability_ids(finding)
726764
# Save the finding only if the cve field was changed by save_vulnerability_ids
727765
# This is temporary as the cve field will be phased out
728766
if finding.cve != old_cve:

unittests/test_importers_importer.py

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from rest_framework.test import APIClient
88

99
from dojo.importers.default_importer import DefaultImporter
10+
from dojo.importers.default_reimporter import DefaultReImporter
1011
from dojo.models import (
1112
Development_Environment,
1213
Engagement,
@@ -562,16 +563,15 @@ def create_default_data(self):
562563
"scan_type": NPM_AUDIT_SCAN_TYPE,
563564
}
564565

565-
@patch("dojo.importers.base_importer.Vulnerability_Id", autospec=True)
566-
def test_handle_vulnerability_ids_references_and_cve(self, mock):
566+
def test_handle_vulnerability_ids_references_and_cve(self):
567567
# Why doesn't this test use the test db and query for one?
568568
vulnerability_ids = ["CVE", "REF-1", "REF-2"]
569569
finding = Finding()
570570
finding.unsaved_vulnerability_ids = vulnerability_ids
571571
finding.test = self.test
572572
finding.reporter = self.testuser
573573
finding.save()
574-
DefaultImporter(**self.importer_data).process_vulnerability_ids(finding)
574+
DefaultImporter(**self.importer_data).store_vulnerability_ids(finding)
575575

576576
self.assertEqual("CVE", finding.vulnerability_ids[0])
577577
self.assertEqual("CVE", finding.cve)
@@ -580,45 +580,42 @@ def test_handle_vulnerability_ids_references_and_cve(self, mock):
580580
self.assertEqual("REF-2", finding.vulnerability_ids[2])
581581
finding.delete()
582582

583-
@patch("dojo.importers.base_importer.Vulnerability_Id", autospec=True)
584-
def test_handle_no_vulnerability_ids_references_and_cve(self, mock):
583+
def test_handle_no_vulnerability_ids_references_and_cve(self):
585584
vulnerability_ids = ["CVE"]
586585
finding = Finding()
587586
finding.test = self.test
588587
finding.reporter = self.testuser
589588
finding.save()
590589
finding.unsaved_vulnerability_ids = vulnerability_ids
591590

592-
DefaultImporter(**self.importer_data).process_vulnerability_ids(finding)
591+
DefaultImporter(**self.importer_data).store_vulnerability_ids(finding)
593592

594593
self.assertEqual("CVE", finding.vulnerability_ids[0])
595594
self.assertEqual("CVE", finding.cve)
596595
self.assertEqual(vulnerability_ids, finding.unsaved_vulnerability_ids)
597596
finding.delete()
598597

599-
@patch("dojo.importers.base_importer.Vulnerability_Id", autospec=True)
600-
def test_handle_vulnerability_ids_references_and_no_cve(self, mock):
598+
def test_handle_vulnerability_ids_references_and_no_cve(self):
601599
vulnerability_ids = ["REF-1", "REF-2"]
602600
finding = Finding()
603601
finding.test = self.test
604602
finding.reporter = self.testuser
605603
finding.save()
606604
finding.unsaved_vulnerability_ids = vulnerability_ids
607-
DefaultImporter(**self.importer_data).process_vulnerability_ids(finding)
605+
DefaultImporter(**self.importer_data).store_vulnerability_ids(finding)
608606

609607
self.assertEqual("REF-1", finding.vulnerability_ids[0])
610608
self.assertEqual("REF-1", finding.cve)
611609
self.assertEqual(vulnerability_ids, finding.unsaved_vulnerability_ids)
612610
self.assertEqual("REF-2", finding.vulnerability_ids[1])
613611
finding.delete()
614612

615-
@patch("dojo.importers.base_importer.Vulnerability_Id", autospec=True)
616-
def test_no_handle_vulnerability_ids_references_and_no_cve(self, mock):
613+
def test_no_handle_vulnerability_ids_references_and_no_cve(self):
617614
finding = Finding()
618615
finding.test = self.test
619616
finding.reporter = self.testuser
620617
finding.save()
621-
DefaultImporter(**self.importer_data).process_vulnerability_ids(finding)
618+
DefaultImporter(**self.importer_data).store_vulnerability_ids(finding)
622619
self.assertEqual(finding.cve, None)
623620
self.assertEqual(finding.unsaved_vulnerability_ids, None)
624621
self.assertEqual(finding.vulnerability_ids, [])
@@ -644,7 +641,7 @@ def test_clear_vulnerability_ids_on_empty_list(self):
644641

645642
# Process with empty list - should clear all IDs
646643
finding.unsaved_vulnerability_ids = []
647-
DefaultImporter(**self.importer_data).process_vulnerability_ids(finding)
644+
DefaultReImporter(test=self.test, environment=self.importer_data["environment"], scan_type=self.importer_data["scan_type"]).reconcile_vulnerability_ids(finding)
648645
# Save the finding to persist the cve=None change
649646
finding.save()
650647

@@ -681,7 +678,7 @@ def test_change_vulnerability_ids_on_reimport(self):
681678
# Process with different IDs - should replace old IDs
682679
new_vulnerability_ids = ["CVE-2021-9999", "GHSA-xxxx-yyyy"]
683680
finding.unsaved_vulnerability_ids = new_vulnerability_ids
684-
DefaultImporter(**self.importer_data).process_vulnerability_ids(finding)
681+
DefaultReImporter(test=self.test, environment=self.importer_data["environment"], scan_type=self.importer_data["scan_type"]).reconcile_vulnerability_ids(finding)
685682
# Save the finding to persist the cve change
686683
finding.save()
687684

unittests/test_importers_performance.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -177,9 +177,9 @@ def test_import_reimport_reimport_performance_async(self):
177177
self._import_reimport_performance(
178178
expected_num_queries1=340,
179179
expected_num_async_tasks1=7,
180-
expected_num_queries2=288,
180+
expected_num_queries2=294,
181181
expected_num_async_tasks2=18,
182-
expected_num_queries3=175,
182+
expected_num_queries3=180,
183183
expected_num_async_tasks3=17,
184184
)
185185

@@ -195,9 +195,9 @@ def test_import_reimport_reimport_performance_pghistory_async(self):
195195
self._import_reimport_performance(
196196
expected_num_queries1=306,
197197
expected_num_async_tasks1=7,
198-
expected_num_queries2=281,
198+
expected_num_queries2=287,
199199
expected_num_async_tasks2=18,
200-
expected_num_queries3=170,
200+
expected_num_queries3=175,
201201
expected_num_async_tasks3=17,
202202
)
203203

@@ -219,9 +219,9 @@ def test_import_reimport_reimport_performance_no_async(self):
219219
self._import_reimport_performance(
220220
expected_num_queries1=346,
221221
expected_num_async_tasks1=6,
222-
expected_num_queries2=294,
222+
expected_num_queries2=300,
223223
expected_num_async_tasks2=17,
224-
expected_num_queries3=181,
224+
expected_num_queries3=186,
225225
expected_num_async_tasks3=16,
226226
)
227227

@@ -241,9 +241,9 @@ def test_import_reimport_reimport_performance_pghistory_no_async(self):
241241
self._import_reimport_performance(
242242
expected_num_queries1=312,
243243
expected_num_async_tasks1=6,
244-
expected_num_queries2=287,
244+
expected_num_queries2=293,
245245
expected_num_async_tasks2=17,
246-
expected_num_queries3=176,
246+
expected_num_queries3=181,
247247
expected_num_async_tasks3=16,
248248
)
249249

@@ -267,9 +267,9 @@ def test_import_reimport_reimport_performance_no_async_with_product_grading(self
267267
self._import_reimport_performance(
268268
expected_num_queries1=348,
269269
expected_num_async_tasks1=8,
270-
expected_num_queries2=296,
270+
expected_num_queries2=302,
271271
expected_num_async_tasks2=19,
272-
expected_num_queries3=183,
272+
expected_num_queries3=188,
273273
expected_num_async_tasks3=18,
274274
)
275275

@@ -290,9 +290,9 @@ def test_import_reimport_reimport_performance_pghistory_no_async_with_product_gr
290290
self._import_reimport_performance(
291291
expected_num_queries1=314,
292292
expected_num_async_tasks1=8,
293-
expected_num_queries2=289,
293+
expected_num_queries2=295,
294294
expected_num_async_tasks2=19,
295-
expected_num_queries3=178,
295+
expected_num_queries3=183,
296296
expected_num_async_tasks3=18,
297297
)
298298

0 commit comments

Comments
 (0)