|
1 | 1 | import logging |
2 | 2 | from contextlib import suppress |
3 | 3 | from datetime import datetime |
| 4 | +from itertools import batched |
4 | 5 | from time import strftime |
5 | 6 |
|
6 | 7 | from django.conf import settings |
@@ -563,8 +564,7 @@ def finding_delete(instance, **kwargs): |
563 | 564 | duplicate_cluster = instance.original_finding.all() |
564 | 565 | if duplicate_cluster: |
565 | 566 | if settings.DUPLICATE_CLUSTER_CASCADE_DELETE: |
566 | | - # Delete the entire duplicate cluster efficiently via bulk_delete_findings |
567 | | - bulk_delete_findings(duplicate_cluster) |
| 567 | + duplicate_cluster.order_by("-id").delete() |
568 | 568 | else: |
569 | 569 | reconfigure_duplicate_cluster(instance, duplicate_cluster) |
570 | 570 | else: |
@@ -615,53 +615,65 @@ def reconfigure_duplicate_cluster(original, cluster_outside): |
615 | 615 | cluster_outside.exclude(id=new_original.id).update(duplicate_finding=new_original) |
616 | 616 |
|
617 | 617 |
|
618 | | -def prepare_duplicates_for_delete(test=None, engagement=None): |
619 | | - logger.debug("prepare duplicates for delete, test: %s, engagement: %s", test.id if test else None, engagement.id if engagement else None) |
620 | | - if test is None and engagement is None: |
621 | | - logger.warning("nothing to prepare as test and engagement are None") |
| 618 | +def prepare_duplicates_for_delete(test=None, engagement=None, product=None, product_type=None): |
| 619 | + logger.debug( |
| 620 | + "prepare duplicates for delete, test: %s, engagement: %s, product: %s, product_type: %s", |
| 621 | + test.id if test else None, |
| 622 | + engagement.id if engagement else None, |
| 623 | + product.id if product else None, |
| 624 | + product_type.id if product_type else None, |
| 625 | + ) |
| 626 | + if test is None and engagement is None and product is None and product_type is None: |
| 627 | + logger.warning("nothing to prepare as no scope object provided") |
622 | 628 | return |
623 | 629 |
|
624 | 630 | # should not be needed in normal healthy instances. |
625 | 631 | # but in that case it's a cheap count query and we might as well run it to be safe |
626 | 632 | fix_loop_duplicates() |
627 | 633 |
|
628 | | - # Build scope filter |
629 | | - scope_filter = {} |
630 | | - if engagement: |
631 | | - scope_filter["test__engagement"] = engagement |
632 | | - if test: |
633 | | - scope_filter["test"] = test |
| 634 | + # Build scope as a subquery — never materialized into Python memory |
| 635 | + if product_type: |
| 636 | + scope_filter = {"test__engagement__product__prod_type": product_type} |
| 637 | + elif product: |
| 638 | + scope_filter = {"test__engagement__product": product} |
| 639 | + elif engagement: |
| 640 | + scope_filter = {"test__engagement": engagement} |
| 641 | + else: |
| 642 | + scope_filter = {"test": test} |
634 | 643 |
|
635 | | - scope_finding_ids = set( |
636 | | - Finding.objects.filter(**scope_filter).values_list("id", flat=True), |
637 | | - ) |
638 | | - if not scope_finding_ids: |
| 644 | + scope_ids_subquery = Finding.objects.filter(**scope_filter).values_list("id", flat=True) |
| 645 | + |
| 646 | + if not scope_ids_subquery.exists(): |
639 | 647 | logger.debug("no findings in scope, nothing to prepare") |
640 | 648 | return |
641 | 649 |
|
642 | 650 | # Bulk-reset inside-scope duplicates: single UPDATE instead of per-original mass_model_updater. |
643 | | - # Clears the duplicate_finding FK so Django's Collector won't trip over dangling references |
644 | | - # when deleting findings in this scope. |
| 651 | + # Clears the duplicate_finding FK so cascade_delete won't trip over dangling self-references. |
645 | 652 | inside_reset_count = Finding.objects.filter( |
646 | 653 | duplicate=True, |
647 | | - duplicate_finding_id__in=scope_finding_ids, |
648 | | - id__in=scope_finding_ids, |
| 654 | + duplicate_finding_id__in=scope_ids_subquery, |
| 655 | + id__in=scope_ids_subquery, |
649 | 656 | ).update(duplicate_finding=None, duplicate=False) |
650 | 657 | logger.debug("bulk-reset %d inside-scope duplicates", inside_reset_count) |
651 | 658 |
|
652 | 659 | # Reconfigure outside-scope duplicates: still per-original because each cluster |
653 | 660 | # needs a new original chosen, status copied, and found_by updated. |
654 | | - # Pre-filter to only originals that have at least one duplicate outside scope, |
655 | | - # avoiding a per-original .exists() check. |
656 | | - originals_with_outside_dupes = Finding.objects.filter( |
657 | | - id__in=scope_finding_ids, |
658 | | - original_finding__in=Finding.objects.exclude(id__in=scope_finding_ids), |
659 | | - ).distinct().prefetch_related("original_finding") |
660 | | - |
661 | | - for original in originals_with_outside_dupes: |
662 | | - # Inside-scope duplicates were already unlinked by the bulk UPDATE above, |
663 | | - # so original_finding.all() now only contains outside-scope duplicates. |
664 | | - reconfigure_duplicate_cluster(original, original.original_finding.all()) |
| 661 | + # Chunked with prefetch_related to bound memory while avoiding N+1 queries. |
| 662 | + originals_ids = ( |
| 663 | + Finding.objects.filter( |
| 664 | + id__in=scope_ids_subquery, |
| 665 | + original_finding__in=Finding.objects.exclude(id__in=scope_ids_subquery), |
| 666 | + ) |
| 667 | + .distinct() |
| 668 | + .values_list("id", flat=True) |
| 669 | + .iterator(chunk_size=500) |
| 670 | + ) |
| 671 | + |
| 672 | + for chunk_ids in batched(originals_ids, 500): |
| 673 | + for original in Finding.objects.filter(id__in=chunk_ids).prefetch_related("original_finding"): |
| 674 | + # Inside-scope duplicates were already unlinked by the bulk UPDATE above, |
| 675 | + # so original_finding.all() now only contains outside-scope duplicates. |
| 676 | + reconfigure_duplicate_cluster(original, original.original_finding.all()) |
665 | 677 |
|
666 | 678 |
|
667 | 679 | @receiver(pre_delete, sender=Test) |
|
0 commit comments