Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 34 additions & 2 deletions dojo/finding/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -667,6 +667,33 @@ def prepare_duplicates_for_delete(obj):
# so original_finding.all() now only contains outside-scope duplicates.
reconfigure_duplicate_cluster(original, original.original_finding.all())

# When DUPLICATE_CLUSTER_CASCADE_DELETE=True, reconfigure_duplicate_cluster is a no-op.
# Match legacy finding_delete(): delete outside-scope cluster members that point at
# in-scope originals (duplicate_cluster.order_by("-id").delete()). Transitive duplicate
# chains do not need a separate expansion pass — fix_loop_duplicates above normalizes them.
if settings.DUPLICATE_CLUSTER_CASCADE_DELETE:
outside_cascade_qs = Finding.objects.filter(
duplicate_finding_id__in=scope_ids_subquery,
).exclude(id__in=scope_ids_subquery)
outside_count = outside_cascade_qs.count()
if outside_count:
logger.debug(
"cascade-delete %d outside-scope duplicate findings (DUPLICATE_CLUSTER_CASCADE_DELETE)",
outside_count,
)
bulk_delete_findings(outside_cascade_qs, order_desc=True)
else:
outside_orphan_count = Finding.objects.filter(
duplicate_finding_id__in=scope_ids_subquery,
).exclude(
id__in=scope_ids_subquery,
).update(duplicate_finding=None, duplicate=False)
if outside_orphan_count:
logger.debug(
"nulled %d outside-scope duplicate_finding references to prevent FK violation",
outside_orphan_count,
)


@receiver(pre_delete, sender=Test)
def test_pre_delete(sender, instance, **kwargs):
Expand Down Expand Up @@ -763,14 +790,18 @@ def bulk_clear_finding_m2m(finding_qs):
Notes.objects.filter(id__in=note_ids).delete()


def bulk_delete_findings(finding_qs, chunk_size=1000):
def bulk_delete_findings(finding_qs, chunk_size=1000, *, order_desc=False):
"""
Delete findings and all related objects efficiently. Including any related object in Dojo-Pro

Sends the pre_bulk_delete signal, clears M2M through tables (not
discovered by _meta.related_objects), then uses cascade_delete for
all FK relations via raw SQL.
Chunked with per-chunk transaction.atomic() for crash safety.

When order_desc is True, findings are processed highest id first (matches
finding_delete: duplicate_cluster.order_by("-id").delete()) so self-FK
duplicate chains delete children before parents.
"""
from dojo.signals import pre_bulk_delete_findings # noqa: PLC0415 circular import
from dojo.utils_cascade_delete import ( # noqa: PLC0415 circular import
Expand All @@ -780,9 +811,10 @@ def bulk_delete_findings(finding_qs, chunk_size=1000):

pre_bulk_delete_findings.send(sender=Finding, finding_qs=finding_qs)
bulk_clear_finding_m2m(finding_qs)
ordered_qs = finding_qs.order_by("-id") if order_desc else finding_qs.order_by("id")
for chunk_num, chunk_ids in enumerate(
batched(
finding_qs.values_list("id", flat=True).order_by("id").iterator(chunk_size=chunk_size),
ordered_qs.values_list("id", flat=True).iterator(chunk_size=chunk_size),
chunk_size,
strict=False,
),
Expand Down
5 changes: 1 addition & 4 deletions dojo/settings/settings.dist.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,10 +318,7 @@
DD_FEATURE_FINDING_GROUPS=(bool, True),
DD_JIRA_TEMPLATE_ROOT=(str, "dojo/templates/issue-trackers"),
DD_TEMPLATE_DIR_PREFIX=(str, "dojo/templates/"),
# Initial behaviour in Defect Dojo was to delete all duplicates when an original was deleted
# New behaviour is to leave the duplicates in place, but set the oldest of duplicates as new original
# Set to True to revert to the old behaviour where all duplicates are deleted
DD_DUPLICATE_CLUSTER_CASCADE_DELETE=(bool, True),
DD_DUPLICATE_CLUSTER_CASCADE_DELETE=(bool, False),
# Enable Rate Limiting for the login page
DD_RATE_LIMITER_ENABLED=(bool, False),
# Examples include 5/m 100/h and more https://django-ratelimit.readthedocs.io/en/stable/rates.html#simple-rates
Expand Down
36 changes: 27 additions & 9 deletions unittests/test_prepare_duplicates_for_delete.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,12 +236,11 @@ def test_mixed_inside_and_outside_duplicates(self):
self.assertIsNone(outside_dupe.duplicate_finding)

@override_settings(DUPLICATE_CLUSTER_CASCADE_DELETE=True)
def test_cascade_delete_skips_outside_reconfigure(self):
def test_cascade_delete_removes_outside_scope_duplicate_findings(self):
"""
When DUPLICATE_CLUSTER_CASCADE_DELETE=True, outside duplicates are left untouched.

The caller (async_delete_crawl_task) handles deletion of outside-scope
duplicates separately via bulk_delete_findings.
When DUPLICATE_CLUSTER_CASCADE_DELETE=True, outside-scope findings in the duplicate
cluster are deleted (same as legacy duplicate_cluster.order_by("-id").delete() in
finding_delete), not merely unlinked.
"""
original = self._create_finding(self.test1, "Original")
outside_dupe = self._create_finding(self.test2, "Outside Dupe")
Expand All @@ -250,10 +249,29 @@ def test_cascade_delete_skips_outside_reconfigure(self):
with impersonate(self.testuser):
prepare_duplicates_for_delete(self.test1)

outside_dupe.refresh_from_db()
# Outside dupe is still a duplicate — not reconfigured or deleted
self.assertTrue(outside_dupe.duplicate)
self.assertEqual(outside_dupe.duplicate_finding_id, original.id)
self.assertFalse(
Finding.objects.filter(id=outside_dupe.id).exists(),
msg="outside-scope duplicate should be cascade-deleted with DUPLICATE_CLUSTER_CASCADE_DELETE=True",
)

@override_settings(DUPLICATE_CLUSTER_CASCADE_DELETE=True)
def test_engagement_delete_with_outside_scope_duplicate_no_fk_violation(self):
# Regression: engagement.delete() raises IntegrityError when DUPLICATE_CLUSTER_CASCADE_DELETE=True
# and an outside-scope finding still holds a duplicate_finding FK to an in-scope finding.
original = self._create_finding(self.test1, "Original in Eng1")
outside_dupe = self._create_finding(self.test3, "Outside Dupe in Eng2")
self._make_duplicate(outside_dupe, original)

# This must not raise django.db.utils.IntegrityError (FK violation)
with impersonate(self.testuser):
self.engagement1.delete()

# Engagement and its findings are gone
self.assertFalse(Engagement.objects.filter(id=self.engagement1.id).exists())
self.assertFalse(Finding.objects.filter(id=original.id).exists())

# Outside-scope duplicate was cascade-deleted with the cluster (legacy behaviour)
self.assertFalse(Finding.objects.filter(id=outside_dupe.id).exists())

def test_multiple_originals(self):
"""Multiple originals in the same test each get their clusters handled."""
Expand Down
Loading