Skip to content

Commit 5a05a60

Browse files
reimport: optimize vulnerability_id processing
1 parent ee9ee74 commit 5a05a60

3 files changed

Lines changed: 43 additions & 18 deletions

File tree

dojo/finding/deduplication.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ def build_dedupe_scope_queryset(test):
239239
return (
240240
Finding.objects.filter(scope_q)
241241
.select_related("test", "test__engagement", "test__test_type")
242-
.prefetch_related("endpoints")
242+
.prefetch_related("endpoints", "vulnerability_id_set")
243243
)
244244

245245

dojo/importers/base_importer.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -793,9 +793,34 @@ def process_vulnerability_ids(
793793
) -> Finding:
794794
"""
795795
Parse the `unsaved_vulnerability_ids` field from findings after they are parsed
796-
to create `Vulnerability_Id` objects with the finding associated correctly
796+
to create `Vulnerability_Id` objects with the finding associated correctly.
797+
Only updates if vulnerability_ids have changed to avoid unnecessary database operations.
798+
799+
Args:
800+
finding: The finding to process vulnerability IDs for
801+
802+
Returns:
803+
The finding object
804+
797805
"""
798806
if finding.unsaved_vulnerability_ids:
807+
# Only check for changes if the finding has been saved and might have existing vulnerability_ids
808+
# For new findings, we always need to save vulnerability_ids
809+
if finding.pk and finding.vulnerability_id_set.exists():
810+
# Check if vulnerability_ids have changed before updating
811+
# Get existing vulnerability IDs from the database
812+
existing_vuln_ids = set(finding.vulnerability_ids) if finding.vulnerability_ids else set()
813+
# Normalize the new vulnerability IDs (remove duplicates for comparison)
814+
new_vuln_ids = set(finding.unsaved_vulnerability_ids)
815+
816+
# Only update if vulnerability IDs have changed
817+
if existing_vuln_ids == new_vuln_ids:
818+
logger.debug(
819+
f"Skipping vulnerability_ids update for finding {finding.id} - "
820+
f"vulnerability_ids unchanged: {sorted(existing_vuln_ids)}",
821+
)
822+
return finding
823+
799824
# Remove old vulnerability ids - keeping this call only because of flake8
800825
Vulnerability_Id.objects.filter(finding=finding).delete()
801826

unittests/test_importers_performance.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -217,11 +217,11 @@ def test_import_reimport_reimport_performance_no_async(self):
217217
testuser.usercontactinfo.block_execution = True
218218
testuser.usercontactinfo.save()
219219
self._import_reimport_performance(
220-
expected_num_queries1=345,
220+
expected_num_queries1=346,
221221
expected_num_async_tasks1=6,
222-
expected_num_queries2=293,
222+
expected_num_queries2=294,
223223
expected_num_async_tasks2=17,
224-
expected_num_queries3=180,
224+
expected_num_queries3=181,
225225
expected_num_async_tasks3=16,
226226
)
227227

@@ -239,11 +239,11 @@ def test_import_reimport_reimport_performance_pghistory_no_async(self):
239239
testuser.usercontactinfo.save()
240240

241241
self._import_reimport_performance(
242-
expected_num_queries1=311,
242+
expected_num_queries1=312,
243243
expected_num_async_tasks1=6,
244-
expected_num_queries2=286,
244+
expected_num_queries2=287,
245245
expected_num_async_tasks2=17,
246-
expected_num_queries3=175,
246+
expected_num_queries3=176,
247247
expected_num_async_tasks3=16,
248248
)
249249

@@ -265,11 +265,11 @@ def test_import_reimport_reimport_performance_no_async_with_product_grading(self
265265
self.system_settings(enable_product_grade=True)
266266

267267
self._import_reimport_performance(
268-
expected_num_queries1=347,
268+
expected_num_queries1=348,
269269
expected_num_async_tasks1=8,
270-
expected_num_queries2=295,
270+
expected_num_queries2=296,
271271
expected_num_async_tasks2=19,
272-
expected_num_queries3=182,
272+
expected_num_queries3=183,
273273
expected_num_async_tasks3=18,
274274
)
275275

@@ -288,11 +288,11 @@ def test_import_reimport_reimport_performance_pghistory_no_async_with_product_gr
288288
self.system_settings(enable_product_grade=True)
289289

290290
self._import_reimport_performance(
291-
expected_num_queries1=313,
291+
expected_num_queries1=314,
292292
expected_num_async_tasks1=8,
293-
expected_num_queries2=288,
293+
expected_num_queries2=289,
294294
expected_num_async_tasks2=19,
295-
expected_num_queries3=177,
295+
expected_num_queries3=178,
296296
expected_num_async_tasks3=18,
297297
)
298298

@@ -449,9 +449,9 @@ def test_deduplication_performance_no_async(self):
449449
testuser.usercontactinfo.save()
450450

451451
self._deduplication_performance(
452-
expected_num_queries1=316,
452+
expected_num_queries1=317,
453453
expected_num_async_tasks1=7,
454-
expected_num_queries2=287,
454+
expected_num_queries2=288,
455455
expected_num_async_tasks2=7,
456456
)
457457

@@ -469,8 +469,8 @@ def test_deduplication_performance_pghistory_no_async(self):
469469
testuser.usercontactinfo.save()
470470

471471
self._deduplication_performance(
472-
expected_num_queries1=280,
472+
expected_num_queries1=281,
473473
expected_num_async_tasks1=7,
474-
expected_num_queries2=250,
474+
expected_num_queries2=251,
475475
expected_num_async_tasks2=7,
476476
)

0 commit comments

Comments
 (0)