From b8d86e7b1c0cf70852bace4263114d02b730a515 Mon Sep 17 00:00:00 2001
From: Valentijn Scholten <valentijnscholten@gmail.com>
Date: Sun, 5 Apr 2026 09:27:31 +0200
Subject: [PATCH 1/4] Batch-refresh close_old_findings status fields to avoid N
 refresh_from_db queries.

Replace per-finding refresh_from_db(false_p, risk_accepted, out_of_scope) with one
values() query for all PKs and assign onto instances, falling back to refresh_from_db
when a row is missing.
---
 dojo/importers/default_reimporter.py | 48 +++++++++++++++++++++++-----
 1 file changed, 40 insertions(+), 8 deletions(-)

diff --git a/dojo/importers/default_reimporter.py b/dojo/importers/default_reimporter.py
index 2828b8cec30..7f3953e64b7 100644
--- a/dojo/importers/default_reimporter.py
+++ b/dojo/importers/default_reimporter.py
@@ -464,6 +464,38 @@ def process_findings(
 
         return self.new_items, self.reactivated_items, self.to_mitigate, self.untouched
 
+    def _sync_close_old_finding_status_fields(self, findings: list[Finding]) -> None:
+        """
+        Refresh false_p, risk_accepted, and out_of_scope from the DB for each finding.
+
+        These can change during reimport (e.g. false positive) while the in-memory instances
+        are stale. A naive refresh_from_db per finding issues one SELECT each; we batch one
+        query for all primary keys and fall back to refresh_from_db only when needed.
+        """
+        ids = [f.pk for f in findings if f.pk is not None]
+        if not ids:
+            for finding in findings:
+                finding.refresh_from_db(fields=["false_p", "risk_accepted", "out_of_scope"])
+            return
+
+        fresh_by_id = {
+            row["id"]: row
+            for row in Finding.objects.filter(pk__in=ids).values(
+                "id",
+                "false_p",
+                "risk_accepted",
+                "out_of_scope",
+            )
+        }
+        for finding in findings:
+            row = fresh_by_id.get(finding.pk)
+            if row is not None:
+                finding.false_p = row["false_p"]
+                finding.risk_accepted = row["risk_accepted"]
+                finding.out_of_scope = row["out_of_scope"]
+            else:
+                finding.refresh_from_db(fields=["false_p", "risk_accepted", "out_of_scope"])
+
     def close_old_findings(
         self,
         findings: list[Finding],
@@ -477,17 +509,17 @@ def close_old_findings(
         if self.close_old_findings_toggle is False:
             return []
         logger.debug("REIMPORT_SCAN: Closing findings no longer present in scan report")
+        # Get any status changes that could have occurred earlier in the process
+        # for special statuses only.
+        # An example of such is a finding being reported as false positive, and
+        # reimport makes this change in the database. However, the findings here
+        # are calculated based from the original values before the reimport, so
+        # any updates made during reimport are discarded without first getting the
+        # state of the finding as it stands at this moment
+        self._sync_close_old_finding_status_fields(findings)
         # Determine if pushing to jira or if the finding groups are enabled
         mitigated_findings = []
         for finding in findings:
-            # Get any status changes that could have occurred earlier in the process
-            # for special statuses only.
-            # An example of such is a finding being reported as false positive, and
-            # reimport makes this change in the database. However, the findings here
-            # are calculated based from the original values before the reimport, so
-            # any updates made during reimport are discarded without first getting the
-            # state of the finding as it stands at this moment
-            finding.refresh_from_db(fields=["false_p", "risk_accepted", "out_of_scope"])
             # Ensure the finding is not already closed
             if not finding.mitigated or not finding.is_mitigated:
                 logger.debug("mitigating finding: %i:%s", finding.id, finding)

From 2aaca496e920b1c85eb7f9806ee3685c772c5897 Mon Sep 17 00:00:00 2001
From: Valentijn Scholten <valentijnscholten@gmail.com>
Date: Sun, 5 Apr 2026 09:36:05 +0200
Subject: [PATCH 2/4] docs: cite #12291 for close_old_findings status refresh
 origin

---
 dojo/importers/default_reimporter.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/dojo/importers/default_reimporter.py b/dojo/importers/default_reimporter.py
index 7f3953e64b7..07e4a6dcbe4 100644
--- a/dojo/importers/default_reimporter.py
+++ b/dojo/importers/default_reimporter.py
@@ -469,8 +469,10 @@ def _sync_close_old_finding_status_fields(self, findings: list[Finding]) -> None
         Refresh false_p, risk_accepted, and out_of_scope from the DB for each finding.
 
         These can change during reimport (e.g. false positive) while the in-memory instances
-        are stale. A naive refresh_from_db per finding issues one SELECT each; we batch one
-        query for all primary keys and fall back to refresh_from_db only when needed.
+        are stale. Per-finding refresh_from_db in close_old_findings was added in
+        https://github.com/DefectDojo/django-DefectDojo/pull/12291. A naive refresh per
+        finding issues one SELECT each; we batch one query for all primary keys and fall
+        back to refresh_from_db only when needed.
         """
         ids = [f.pk for f in findings if f.pk is not None]
         if not ids:
@@ -515,7 +517,7 @@ def close_old_findings(
         # reimport makes this change in the database. However, the findings here
         # are calculated based from the original values before the reimport, so
         # any updates made during reimport are discarded without first getting the
-        # state of the finding as it stands at this moment
+        # state of the finding as it stands at this moment (django-DefectDojo #12291).
         self._sync_close_old_finding_status_fields(findings)
         # Determine if pushing to jira or if the finding groups are enabled
         mitigated_findings = []

From 226e284a747e89a5ed394c37fb7fb85e3bad3a7e Mon Sep 17 00:00:00 2001
From: Valentijn Scholten <valentijnscholten@gmail.com>
Date: Sun, 5 Apr 2026 09:39:42 +0200
Subject: [PATCH 3/4] perf: chunk close_old_findings status sync queries (1000
 PKs per SELECT)

---
 dojo/importers/default_reimporter.py | 56 ++++++++++++++++------------
 1 file changed, 32 insertions(+), 24 deletions(-)

diff --git a/dojo/importers/default_reimporter.py b/dojo/importers/default_reimporter.py
index 07e4a6dcbe4..f0a4c05028c 100644
--- a/dojo/importers/default_reimporter.py
+++ b/dojo/importers/default_reimporter.py
@@ -1,4 +1,5 @@
 import logging
+from itertools import batched
 
 from django.conf import settings
 from django.core.files.uploadedfile import TemporaryUploadedFile
@@ -30,6 +31,9 @@
 logger = logging.getLogger(__name__)
 deduplicationLogger = logging.getLogger("dojo.specific-loggers.deduplication")
 
+# Bound IN-list size when bulk-loading status fields for close_old_findings.
+_CLOSE_OLD_FINDINGS_STATUS_FIELDS_CHUNK = 1000
+
 
 class DefaultReImporterOptions(ImporterOptions):
     def validate_test(
@@ -471,32 +475,36 @@ def _sync_close_old_finding_status_fields(self, findings: list[Finding]) -> None
         These can change during reimport (e.g. false positive) while the in-memory instances
         are stale. Per-finding refresh_from_db in close_old_findings was added in
         https://github.com/DefectDojo/django-DefectDojo/pull/12291. A naive refresh per
-        finding issues one SELECT each; we batch one query for all primary keys and fall
+        finding issues one SELECT each; we batch one query per chunk of primary keys and fall
         back to refresh_from_db only when needed.
-        """
-        ids = [f.pk for f in findings if f.pk is not None]
-        if not ids:
-            for finding in findings:
-                finding.refresh_from_db(fields=["false_p", "risk_accepted", "out_of_scope"])
-            return
 
-        fresh_by_id = {
-            row["id"]: row
-            for row in Finding.objects.filter(pk__in=ids).values(
-                "id",
-                "false_p",
-                "risk_accepted",
-                "out_of_scope",
-            )
-        }
-        for finding in findings:
-            row = fresh_by_id.get(finding.pk)
-            if row is not None:
-                finding.false_p = row["false_p"]
-                finding.risk_accepted = row["risk_accepted"]
-                finding.out_of_scope = row["out_of_scope"]
-            else:
-                finding.refresh_from_db(fields=["false_p", "risk_accepted", "out_of_scope"])
+        This really should be fixed differently, but for now we at least optimize it to be done in bulk.
+        """
+        findings_without_pk = [f for f in findings if f.pk is None]
+        findings_with_pk = [f for f in findings if f.pk is not None]
+
+        for finding in findings_without_pk:
+            finding.refresh_from_db(fields=["false_p", "risk_accepted", "out_of_scope"])
+
+        for chunk in batched(findings_with_pk, _CLOSE_OLD_FINDINGS_STATUS_FIELDS_CHUNK, strict=False):
+            ids = [f.pk for f in chunk]
+            fresh_by_id = {
+                row["id"]: row
+                for row in Finding.objects.filter(pk__in=ids).values(
+                    "id",
+                    "false_p",
+                    "risk_accepted",
+                    "out_of_scope",
+                )
+            }
+            for finding in chunk:
+                row = fresh_by_id.get(finding.pk)
+                if row is not None:
+                    finding.false_p = row["false_p"]
+                    finding.risk_accepted = row["risk_accepted"]
+                    finding.out_of_scope = row["out_of_scope"]
+                else:
+                    finding.refresh_from_db(fields=["false_p", "risk_accepted", "out_of_scope"])
 
     def close_old_findings(
         self,

From a06d5599056daa275d5a34380c01a3a3043e2bcb Mon Sep 17 00:00:00 2001
From: Valentijn Scholten <valentijnscholten@gmail.com>
Date: Sun, 5 Apr 2026 14:43:43 +0200
Subject: [PATCH 4/4] tests: add 4th reimport step (empty scan,
 close_old_findings) to performance tests

- Add step 4 to _import_reimport_performance: reimport with an empty
  StackHawk scan and close_old_findings=True, verifying findings are
  actually closed (assertGreater on len_closed_findings).
- Fix all reimport steps to pass service="Secured Application" so the
  reimporter's service filter matches findings produced by the StackHawk
  parser (which sets service from the scan's application field). Without
  this, original_items was always empty and no matching/closing occurred.
- Add stackhawk_empty.json scan fixture.
- Fix update_performance_test_counts.py to handle reimport3 (step 4)
  by adding reimport3_queries/reimport3_async_tasks to param_map.
- Update all expected query/task counts for both Small and Locations
  test classes to reflect the new step and the batch status-sync fix.
---
 scripts/update_performance_test_counts.py     |  2 +
 .../scans/stackhawk/stackhawk_empty.json      | 32 +++++++++
 unittests/test_importers_performance.py       | 65 ++++++++++++++++++-
 3 files changed, 97 insertions(+), 2 deletions(-)
 create mode 100644 unittests/scans/stackhawk/stackhawk_empty.json

diff --git a/scripts/update_performance_test_counts.py b/scripts/update_performance_test_counts.py
index f9fd077e0e8..77a2e629e11 100644
--- a/scripts/update_performance_test_counts.py
+++ b/scripts/update_performance_test_counts.py
@@ -422,6 +422,8 @@ def _extract_call_span(method_content: str, call_name: str) -> tuple[int, int] |
         "reimport1_async_tasks": "expected_num_async_tasks2",
         "reimport2_queries": "expected_num_queries3",
         "reimport2_async_tasks": "expected_num_async_tasks3",
+        "reimport3_queries": "expected_num_queries4",
+        "reimport3_async_tasks": "expected_num_async_tasks4",
     }
     param_map_deduplication = {
         "first_import_queries": "expected_num_queries1",
diff --git a/unittests/scans/stackhawk/stackhawk_empty.json b/unittests/scans/stackhawk/stackhawk_empty.json
new file mode 100644
index 00000000000..056193baafe
--- /dev/null
+++ b/unittests/scans/stackhawk/stackhawk_empty.json
@@ -0,0 +1,32 @@
+{
+  "service": "StackHawk",
+  "scanCompleted": {
+    "scan": {
+      "comment defect dojo team": "This is an empty StackHawk scan results",
+      "id": "e2ff5651-7eef-47e9-b743-0c2f7d861e27",
+      "hawkscanVersion": "2.1.1",
+      "env": "Development",
+      "status": "COMPLETED",
+      "application": "Secured Application",
+      "startedTimestamp": "2022-02-16T23:07:19.575Z",
+      "scanURL": "https://app.stackhawk.com/scans/e2ff5651-7eef-47e9-b743-0c2f7d861e27"
+    },
+    "scanDuration": "21",
+    "spiderDuration": "45",
+    "completedScanStats": {
+      "urlsCount": "31",
+      "duration": "66",
+      "scanResultsStats": {
+        "totalCount": "0",
+        "lowCount": "0",
+        "mediumCount": "0",
+        "highCount": "0",
+        "lowTriagedCount": "0",
+        "mediumTriagedCount": "0",
+        "highTriagedCount": "0"
+      }
+    },
+    "findings": [
+    ]
+  }
+}
diff --git a/unittests/test_importers_performance.py b/unittests/test_importers_performance.py
index ea3b3b79b40..ef7b4763460 100644
--- a/unittests/test_importers_performance.py
+++ b/unittests/test_importers_performance.py
@@ -51,6 +51,7 @@
 
 STACK_HAWK_FILENAME = get_unit_tests_scans_path("stackhawk") / "stackhawk_many_vul_without_duplicated_findings.json"
 STACK_HAWK_SUBSET_FILENAME = get_unit_tests_scans_path("stackhawk") / "stackhawk_many_vul_without_duplicated_findings_subset.json"
+STACK_HAWK_EMPTY = get_unit_tests_scans_path("stackhawk") / "stackhawk_empty.json"
 STACK_HAWK_SCAN_TYPE = "StackHawk HawkScan"
 
 
@@ -126,12 +127,17 @@ def _import_reimport_performance(
         expected_num_async_tasks2,
         expected_num_queries3,
         expected_num_async_tasks3,
+        expected_num_queries4,
+        expected_num_async_tasks4,
         scan_file1,
         scan_file2,
         scan_file3,
+        scan_file4,
         scan_type,
         product_name,
         engagement_name,
+        *,
+        close_old_findings4=False,
     ):
         """
         Test import/reimport/reimport performance with specified scan files and scan type.
@@ -195,6 +201,7 @@ def _import_reimport_performance(
                                 "verified": True,
                                 "sync": True,
                                 "scan_type": scan_type,
+                                "service": "Secured Application",
                                 "tags": ["performance-test-reimport", "reimport-tag-in-param", "reimport-go-faster"],
                                 "apply_tags_to_findings": True,
                             }
@@ -224,10 +231,44 @@ def _import_reimport_performance(
                                 "verified": True,
                                 "sync": True,
                                 "scan_type": scan_type,
+                                "service": "Secured Application",
                             }
                             reimporter = DefaultReImporter(**reimport_options)
                             test, _, _len_new_findings, _len_closed_findings, _, _, _ = reimporter.process_scan(scan)
 
+        # Fourth import (reimport again, empty report)
+        # Each assertion context manager is wrapped in its own subTest so that if one fails, the others still run.
+        # This allows us to see all count mismatches in a single test run, making it easier to fix
+        # all incorrect expected values at once rather than fixing them one at a time.
+        # Nested with statements are intentional - each assertion needs its own subTest wrapper.
+        with (  # noqa: SIM117
+            self.subTest("reimport3"), impersonate(Dojo_User.objects.get(username="admin")),
+            scan_file4.open(encoding="utf-8") as scan,
+        ):
+            with self.subTest(step="reimport3", metric="queries"):
+                with self.assertNumQueries(expected_num_queries4):
+                    with self.subTest(step="reimport3", metric="async_tasks"):
+                        with self._assertNumAsyncTask(expected_num_async_tasks4):
+                            reimport_options = {
+                                "test": test,
+                                "user": lead,
+                                "lead": lead,
+                                "scan_date": None,
+                                "minimum_severity": "Info",
+                                "active": True,
+                                "verified": True,
+                                "sync": True,
+                                "scan_type": scan_type,
+                                # StackHawk parser sets the service field causing close old findings to fail if we do not specify the service field
+                                # This is a big problem that needs fixing. Parsers should not set the service field.
+                                "service": "Secured Application",
+                                "close_old_findings": close_old_findings4,
+                            }
+                            reimporter = DefaultReImporter(**reimport_options)
+                            test, _, len_new_findings4, len_closed_findings4, _, _, _ = reimporter.process_scan(scan)
+        logger.info("Step 4: new=%s closed=%s", len_new_findings4, len_closed_findings4)
+        self.assertGreater(len_closed_findings4, 0, "Step 4 (empty reimport with close_old_findings=True) should close findings")
+
 
 @tag("performance")
 @skip_unless_v2
@@ -235,7 +276,7 @@ class TestDojoImporterPerformanceSmall(TestDojoImporterPerformanceBase):
 
     """Performance tests using small sample files (StackHawk, ~6 findings)."""
 
-    def _import_reimport_performance(self, expected_num_queries1, expected_num_async_tasks1, expected_num_queries2, expected_num_async_tasks2, expected_num_queries3, expected_num_async_tasks3):
+    def _import_reimport_performance(self, expected_num_queries1, expected_num_async_tasks1, expected_num_queries2, expected_num_async_tasks2, expected_num_queries3, expected_num_async_tasks3, expected_num_queries4, expected_num_async_tasks4):
         """
         Log output can be quite large as when the assertNumQueries fails, all queries are printed.
         It could be usefule to capture the output in `less`:
@@ -251,12 +292,16 @@ def _import_reimport_performance(self, expected_num_queries1, expected_num_async
             expected_num_async_tasks2,
             expected_num_queries3,
             expected_num_async_tasks3,
+            expected_num_queries4,
+            expected_num_async_tasks4,
             scan_file1=STACK_HAWK_SUBSET_FILENAME,
             scan_file2=STACK_HAWK_FILENAME,
             scan_file3=STACK_HAWK_SUBSET_FILENAME,
+            scan_file4=STACK_HAWK_EMPTY,
             scan_type=STACK_HAWK_SCAN_TYPE,
             product_name="TestDojoDefaultImporter",
             engagement_name="Test Create Engagement",
+            close_old_findings4=True,
         )
 
     @override_settings(ENABLE_AUDITLOG=True)
@@ -275,6 +320,8 @@ def test_import_reimport_reimport_performance_pghistory_async(self):
             expected_num_async_tasks2=17,
             expected_num_queries3=108,
             expected_num_async_tasks3=16,
+            expected_num_queries4=155,
+            expected_num_async_tasks4=6,
         )
 
     @override_settings(ENABLE_AUDITLOG=True)
@@ -297,6 +344,8 @@ def test_import_reimport_reimport_performance_pghistory_no_async(self):
             expected_num_async_tasks2=17,
             expected_num_queries3=115,
             expected_num_async_tasks3=16,
+            expected_num_queries4=155,
+            expected_num_async_tasks4=6,
         )
 
     @override_settings(ENABLE_AUDITLOG=True)
@@ -320,6 +369,8 @@ def test_import_reimport_reimport_performance_pghistory_no_async_with_product_gr
             expected_num_async_tasks2=19,
             expected_num_queries3=119,
             expected_num_async_tasks3=18,
+            expected_num_queries4=162,
+            expected_num_async_tasks4=8,
         )
 
     # Deduplication is enabled in the tests above, but to properly test it we must run the same import twice and capture the results.
@@ -486,7 +537,7 @@ def setUp(self):
         for model in [Location, LocationFindingReference]:
             ContentType.objects.get_for_model(model)
 
-    def _import_reimport_performance(self, expected_num_queries1, expected_num_async_tasks1, expected_num_queries2, expected_num_async_tasks2, expected_num_queries3, expected_num_async_tasks3):
+    def _import_reimport_performance(self, expected_num_queries1, expected_num_async_tasks1, expected_num_queries2, expected_num_async_tasks2, expected_num_queries3, expected_num_async_tasks3, expected_num_queries4, expected_num_async_tasks4):
         r"""
         Log output can be quite large as when the assertNumQueries fails, all queries are printed.
         It could be useful to capture the output in `less`:
@@ -502,12 +553,16 @@ def _import_reimport_performance(self, expected_num_queries1, expected_num_async
             expected_num_async_tasks2,
             expected_num_queries3,
             expected_num_async_tasks3,
+            expected_num_queries4,
+            expected_num_async_tasks4,
             scan_file1=STACK_HAWK_SUBSET_FILENAME,
             scan_file2=STACK_HAWK_FILENAME,
             scan_file3=STACK_HAWK_SUBSET_FILENAME,
+            scan_file4=STACK_HAWK_EMPTY,
             scan_type=STACK_HAWK_SCAN_TYPE,
             product_name="TestDojoDefaultImporterLocations",
             engagement_name="Test Create Engagement Locations",
+            close_old_findings4=True,
         )
 
     @override_settings(ENABLE_AUDITLOG=True)
@@ -526,6 +581,8 @@ def test_import_reimport_reimport_performance_pghistory_async(self):
             expected_num_async_tasks2=17,
             expected_num_queries3=346,
             expected_num_async_tasks3=16,
+            expected_num_queries4=212,
+            expected_num_async_tasks4=6,
         )
 
     @override_settings(ENABLE_AUDITLOG=True)
@@ -548,6 +605,8 @@ def test_import_reimport_reimport_performance_pghistory_no_async(self):
             expected_num_async_tasks2=17,
             expected_num_queries3=355,
             expected_num_async_tasks3=16,
+            expected_num_queries4=212,
+            expected_num_async_tasks4=6,
         )
 
     @override_settings(ENABLE_AUDITLOG=True)
@@ -571,6 +630,8 @@ def test_import_reimport_reimport_performance_pghistory_no_async_with_product_gr
             expected_num_async_tasks2=19,
             expected_num_queries3=359,
             expected_num_async_tasks3=18,
+            expected_num_queries4=222,
+            expected_num_async_tasks4=8,
         )
 
     def _deduplication_performance(self, expected_num_queries1, expected_num_async_tasks1, expected_num_queries2, expected_num_async_tasks2, *, check_duplicates=True):