Skip to content

Commit 3950894

Browse files
(perf) Batch duplicate marking part 2 (#14516)
* deduplication: return modified findings * fix(lint): remove unnecessary elif after return (RET505) * update comments
1 parent 1c2d84b commit 3950894

1 file changed

Lines changed: 28 additions & 29 deletions

File tree

dojo/finding/deduplication.py

Lines changed: 28 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -701,24 +701,26 @@ def _flush_duplicate_changes(modified_new_findings):
701701
Persist duplicate field changes collected during a batch deduplication run.
702702
703703
Bulk-updates all modified new findings in one round-trip instead of one
704-
save() call per finding. Uses bulk_update (no signals) which is consistent
705-
with the original code that called super(Finding, ...).save(skip_validation=True),
706-
bypassing Finding.save() in both cases.
704+
save() call per finding. Uses bulk_update to bypass Django signals.
705+
706+
Returns the list of modified findings so callers can perform any follow-up
707+
processing (e.g. triggering prioritization) on the affected findings.
707708
"""
708709
if modified_new_findings:
709710
Finding.objects.bulk_update(
710711
modified_new_findings,
711712
["duplicate", "active", "verified", "duplicate_finding"],
712713
)
714+
return modified_new_findings
713715

714716

715717
def _dedupe_batch_hash_code(findings):
716718
if not findings:
717-
return
719+
return []
718720
test = findings[0].test
719721
candidates_by_hash = find_candidates_for_deduplication_hash(test, findings)
720722
if not candidates_by_hash:
721-
return
723+
return []
722724
modified_new_findings = []
723725
for new_finding in findings:
724726
deduplicationLogger.debug(f"deduplication start for finding {new_finding.id} with DEDUPE_ALGO_HASH_CODE")
@@ -728,16 +730,16 @@ def _dedupe_batch_hash_code(findings):
728730
break
729731
except Exception as e:
730732
deduplicationLogger.debug(str(e))
731-
_flush_duplicate_changes(modified_new_findings)
733+
return _flush_duplicate_changes(modified_new_findings)
732734

733735

734736
def _dedupe_batch_unique_id(findings):
735737
if not findings:
736-
return
738+
return []
737739
test = findings[0].test
738740
candidates_by_uid = find_candidates_for_deduplication_unique_id(test, findings)
739741
if not candidates_by_uid:
740-
return
742+
return []
741743
modified_new_findings = []
742744
for new_finding in findings:
743745
deduplicationLogger.debug(f"deduplication start for finding {new_finding.id} with DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL")
@@ -749,17 +751,17 @@ def _dedupe_batch_unique_id(findings):
749751
break
750752
except Exception as e:
751753
deduplicationLogger.debug(f"Exception when deduplicating finding {new_finding.id} against candidate {match.id}: {e!s}")
752-
_flush_duplicate_changes(modified_new_findings)
754+
return _flush_duplicate_changes(modified_new_findings)
753755

754756

755757
def _dedupe_batch_uid_or_hash(findings):
756758
if not findings:
757-
return
759+
return []
758760

759761
test = findings[0].test
760762
candidates_by_uid, existing_by_hash = find_candidates_for_deduplication_uid_or_hash(test, findings)
761763
if not (candidates_by_uid or existing_by_hash):
762-
return
764+
return []
763765
modified_new_findings = []
764766
for new_finding in findings:
765767
deduplicationLogger.debug(f"deduplication start for finding {new_finding.id} with DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL_OR_HASH_CODE")
@@ -772,16 +774,16 @@ def _dedupe_batch_uid_or_hash(findings):
772774
break
773775
except Exception as e:
774776
deduplicationLogger.debug(str(e))
775-
_flush_duplicate_changes(modified_new_findings)
777+
return _flush_duplicate_changes(modified_new_findings)
776778

777779

778780
def _dedupe_batch_legacy(findings):
779781
if not findings:
780-
return
782+
return []
781783
test = findings[0].test
782784
candidates_by_title, candidates_by_cwe = find_candidates_for_deduplication_legacy(test, findings)
783785
if not (candidates_by_title or candidates_by_cwe):
784-
return
786+
return []
785787
modified_new_findings = []
786788
for new_finding in findings:
787789
deduplicationLogger.debug(f"deduplication start for finding {new_finding.id} with DEDUPE_ALGO_LEGACY")
@@ -791,7 +793,7 @@ def _dedupe_batch_legacy(findings):
791793
break
792794
except Exception as e:
793795
deduplicationLogger.debug(str(e))
794-
_flush_duplicate_changes(modified_new_findings)
796+
return _flush_duplicate_changes(modified_new_findings)
795797

796798

797799
def dedupe_batch_of_findings(findings, *args, **kwargs):
@@ -804,7 +806,7 @@ def dedupe_batch_of_findings(findings, *args, **kwargs):
804806

805807
if not findings:
806808
logger.debug("dedupe_batch_of_findings called with no findings")
807-
return None
809+
return []
808810

809811
enabled = System_Settings.objects.get().enable_deduplication
810812

@@ -817,19 +819,17 @@ def dedupe_batch_of_findings(findings, *args, **kwargs):
817819

818820
if dedup_alg == settings.DEDUPE_ALGO_HASH_CODE:
819821
logger.debug(f"deduplicating finding batch with DEDUPE_ALGO_HASH_CODE - {len(findings)} findings")
820-
_dedupe_batch_hash_code(findings)
821-
elif dedup_alg == settings.DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL:
822+
return _dedupe_batch_hash_code(findings)
823+
if dedup_alg == settings.DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL:
822824
logger.debug(f"deduplicating finding batch with DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL - {len(findings)} findings")
823-
_dedupe_batch_unique_id(findings)
824-
elif dedup_alg == settings.DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL_OR_HASH_CODE:
825+
return _dedupe_batch_unique_id(findings)
826+
if dedup_alg == settings.DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL_OR_HASH_CODE:
825827
logger.debug(f"deduplicating finding batch with DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL_OR_HASH_CODE - {len(findings)} findings")
826-
_dedupe_batch_uid_or_hash(findings)
827-
else:
828-
logger.debug(f"deduplicating finding batch with LEGACY - {len(findings)} findings")
829-
_dedupe_batch_legacy(findings)
830-
else:
831-
deduplicationLogger.debug("dedupe: skipping dedupe because it's disabled in system settings get()")
832-
return None
828+
return _dedupe_batch_uid_or_hash(findings)
829+
logger.debug(f"deduplicating finding batch with LEGACY - {len(findings)} findings")
830+
return _dedupe_batch_legacy(findings)
831+
deduplicationLogger.debug("dedupe: skipping dedupe because it's disabled in system settings get()")
832+
return []
833833

834834

835835
# ---------------------------------------------------------------------------
@@ -1016,8 +1016,7 @@ def do_false_positive_history_batch(findings):
10161016
len(to_mark_as_fp_ids),
10171017
sorted(to_mark_as_fp_ids),
10181018
)
1019-
# QuerySet.update() bypasses Django signals — intentional, mimicking the previous
1020-
# super(Finding, find).save(skip_validation=True) calls that also skipped all post-save processing.
1019+
# QuerySet.update() bypasses Django signals — intentional as this code is called during (post) save processing.
10211020
# Note: .only() does not constrain update() — Django generates the UPDATE SQL independently.
10221021
Finding.objects.filter(id__in=to_mark_as_fp_ids).update(false_p=True, active=False, verified=False)
10231022

0 commit comments

Comments
 (0)