From b87199b7c1d8e0c4a42de5d17a7c287fe14df1aa Mon Sep 17 00:00:00 2001
From: Valentijn Scholten <valentijnscholten@gmail.com>
Date: Wed, 15 Apr 2026 20:12:06 +0200
Subject: [PATCH 1/7] perf: bulk-apply parser-supplied per-finding tags during
 import
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

finding.tags.add() per finding calls tagulous's add() which does:
  - reload() → SELECT current tags (1 query)
  - _ensure_tags_in_db() → get_or_create per tag (T queries)
  - super().add() → INSERT through-table rows (1 query)
  - tag.increment() → UPDATE count per tag (T queries)

For N findings with T parser-supplied tags: O(N·T) queries.

Replace with bulk_apply_parser_tags() in tag_utils, which groups
findings by tag name and calls bulk_add_tags_to_instances() once per
unique tag: O(unique_tags) queries regardless of N.

Tags are accumulated per batch and applied just before the
post_process_findings_batch task is dispatched, so deduplication and
rules tasks see the tags already written to the DB.

Both default_importer and default_reimporter use the same approach.
For the reimporter, finding_post_processing accepts an optional
tag_accumulator list; when supplied, tags are accumulated rather than
applied inline (backward-compatible for any direct callers).
---
 dojo/importers/default_importer.py   | 15 ++++++++++++---
 dojo/importers/default_reimporter.py | 24 ++++++++++++++++++++----
 dojo/tag_utils.py                    | 23 ++++++++++++++++++++++-
 3 files changed, 54 insertions(+), 8 deletions(-)

diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py
index a57b6884152..0f0c4b7c0cb 100644
--- a/dojo/importers/default_importer.py
+++ b/dojo/importers/default_importer.py
@@ -20,6 +20,7 @@
 )
 from dojo.notifications.helper import create_notification
 from dojo.utils import get_full_url, perform_product_grading
+from dojo.tag_utils import bulk_apply_parser_tags
 from dojo.validators import clean_tags
 
 logger = logging.getLogger(__name__)
@@ -179,6 +180,7 @@ def process_findings(
         at import time
         """
         new_findings = []
+        findings_with_parser_tags: list[tuple] = []
         logger.debug("starting import of %i parsed findings.", len(parsed_findings) if parsed_findings else 0)
         group_names_to_findings_dict = {}
 
@@ -245,12 +247,13 @@ def process_findings(
                 # TODO: Delete this after the move to Locations
                 # Process any endpoints on the finding, or added on the form
                 self.process_endpoints(finding, self.endpoints_to_add)
-            # Parsers must use unsaved_tags to store tags, so we can clean them
+            # Parsers must use unsaved_tags to store tags, so we can clean them.
+            # Accumulate for bulk application after the loop (O(unique_tags) instead of O(N·T)).
             cleaned_tags = clean_tags(finding.unsaved_tags)
             if isinstance(cleaned_tags, list):
-                finding.tags.add(*cleaned_tags)
+                findings_with_parser_tags.append((finding, cleaned_tags))
             elif isinstance(cleaned_tags, str):
-                finding.tags.add(cleaned_tags)
+                findings_with_parser_tags.append((finding, [cleaned_tags]))
             # Process any files
             self.process_files(finding)
             # Process vulnerability IDs
@@ -268,6 +271,12 @@ def process_findings(
             if len(batch_finding_ids) >= batch_max_size or is_final_finding:
                 if not settings.V3_FEATURE_LOCATIONS:
                     self.endpoint_manager.persist(user=self.user)
+
+                # Apply parser-supplied tags for this batch before post-processing starts,
+                # so rules/deduplication tasks see the tags already on the findings.
+                bulk_apply_parser_tags(findings_with_parser_tags)
+                findings_with_parser_tags.clear()
+
                 finding_ids_batch = list(batch_finding_ids)
                 batch_finding_ids.clear()
                 logger.debug("process_findings: dispatching batch with push_to_jira=%s (batch_size=%d, is_final=%s)",
diff --git a/dojo/importers/default_reimporter.py b/dojo/importers/default_reimporter.py
index efeaa252eb8..dafd6f4a00c 100644
--- a/dojo/importers/default_reimporter.py
+++ b/dojo/importers/default_reimporter.py
@@ -26,6 +26,7 @@
     Test,
     Test_Import,
 )
+from dojo.tag_utils import bulk_apply_parser_tags
 from dojo.utils import perform_product_grading
 from dojo.validators import clean_tags
 
@@ -310,6 +311,7 @@ def process_findings(
             cleaned_findings.append(sanitized)
 
         batch_finding_ids: list[int] = []
+        findings_with_parser_tags: list[tuple] = []
         # Batch size for deduplication/post-processing (only new findings)
         dedupe_batch_max_size = getattr(settings, "IMPORT_REIMPORT_DEDUPE_BATCH_SIZE", 1000)
         # Batch size for candidate matching (all findings, before matching)
@@ -417,6 +419,7 @@ def process_findings(
                         finding,
                         unsaved_finding,
                         is_matched_finding=bool(matched_findings),
+                        tag_accumulator=findings_with_parser_tags,
                     )
                     # all data is already saved on the finding, we only need to trigger post processing in batches
                     push_to_jira = self.push_to_jira and ((not self.findings_groups_enabled or not self.group_by) or not finding_will_be_grouped)
@@ -440,6 +443,12 @@ def process_findings(
                     if len(batch_finding_ids) >= dedupe_batch_max_size or is_final:
                         if not settings.V3_FEATURE_LOCATIONS:
                             self.endpoint_manager.persist(user=self.user)
+
+                        # Apply parser-supplied tags for this batch before post-processing starts,
+                        # so rules/deduplication tasks see the tags already on the findings.
+                        bulk_apply_parser_tags(findings_with_parser_tags)
+                        findings_with_parser_tags.clear()
+
                         finding_ids_batch = list(batch_finding_ids)
                         batch_finding_ids.clear()
                         dojo_dispatch_task(
@@ -976,6 +985,7 @@ def finding_post_processing(
         finding_from_report: Finding,
         *,
         is_matched_finding: bool = False,
+        tag_accumulator: list | None = None,
     ) -> Finding:
         """
         Save all associated objects to the finding after it has been saved
@@ -1006,10 +1016,16 @@ def finding_post_processing(
                 finding_from_report.unsaved_tags = merged_tags
             if finding_from_report.unsaved_tags:
                 cleaned_tags = clean_tags(finding_from_report.unsaved_tags)
-                if isinstance(cleaned_tags, list):
-                    finding.tags.add(*cleaned_tags)
-                elif isinstance(cleaned_tags, str):
-                    finding.tags.add(cleaned_tags)
+                if tag_accumulator is not None:
+                    if isinstance(cleaned_tags, list):
+                        tag_accumulator.append((finding, cleaned_tags))
+                    elif isinstance(cleaned_tags, str):
+                        tag_accumulator.append((finding, [cleaned_tags]))
+                else:
+                    if isinstance(cleaned_tags, list):
+                        finding.tags.add(*cleaned_tags)
+                    elif isinstance(cleaned_tags, str):
+                        finding.tags.add(cleaned_tags)
         # Process any files
         if finding_from_report.unsaved_files:
             finding.unsaved_files = finding_from_report.unsaved_files
diff --git a/dojo/tag_utils.py b/dojo/tag_utils.py
index cf405034be4..e9800965c51 100644
--- a/dojo/tag_utils.py
+++ b/dojo/tag_utils.py
@@ -164,6 +164,27 @@ def bulk_add_tags_to_instances(tag_or_tags, instances, tag_field_name: str = "ta
     return total_created
 
 
+def bulk_apply_parser_tags(findings_with_tags: list) -> None:
+    """Bulk-apply per-finding parser tags collected during an import loop.
+
+    Reduces O(N·T) per-finding ``finding.tags.add()`` calls to O(unique_tags) queries
+    by grouping findings by tag name and calling ``bulk_add_tags_to_instances`` once per tag.
+
+    Args:
+        findings_with_tags: list of ``(finding, [tag_str, ...])`` pairs accumulated
+            during the import loop (only for findings whose parser supplied tags).
+    """
+    from collections import defaultdict  # noqa: PLC0415
+
+    tag_to_findings: dict = defaultdict(list)
+    for finding, tag_list in findings_with_tags:
+        for tag in tag_list:
+            if tag:
+                tag_to_findings[tag].append(finding)
+    for tag_name, findings_for_tag in tag_to_findings.items():
+        bulk_add_tags_to_instances(tag_or_tags=tag_name, instances=findings_for_tag)
+
+
 def bulk_remove_all_tags(model_class, instance_ids_qs):
     """
     Remove all tags from instances identified by the given ID subquery.
@@ -226,4 +247,4 @@ def bulk_remove_all_tags(model_class, instance_ids_qs):
             )
 
 
-__all__ = ["bulk_add_tags_to_instances", "bulk_remove_all_tags"]
+__all__ = ["bulk_add_tags_to_instances", "bulk_apply_parser_tags", "bulk_remove_all_tags"]

From cb7e56623681b7de10619043e670251afa9ae464 Mon Sep 17 00:00:00 2001
From: Valentijn Scholten <valentijnscholten@gmail.com>
Date: Wed, 15 Apr 2026 20:30:45 +0200
Subject: [PATCH 2/7] chore: fix ruff linting errors in bulk-tag import code

---
 dojo/importers/default_importer.py   | 2 +-
 dojo/importers/default_reimporter.py | 9 ++++-----
 dojo/tag_utils.py                    | 4 +++-
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py
index 0f0c4b7c0cb..6ffddc669b8 100644
--- a/dojo/importers/default_importer.py
+++ b/dojo/importers/default_importer.py
@@ -19,8 +19,8 @@
     Test_Import,
 )
 from dojo.notifications.helper import create_notification
-from dojo.utils import get_full_url, perform_product_grading
 from dojo.tag_utils import bulk_apply_parser_tags
+from dojo.utils import get_full_url, perform_product_grading
 from dojo.validators import clean_tags
 
 logger = logging.getLogger(__name__)
diff --git a/dojo/importers/default_reimporter.py b/dojo/importers/default_reimporter.py
index dafd6f4a00c..2a22da10a35 100644
--- a/dojo/importers/default_reimporter.py
+++ b/dojo/importers/default_reimporter.py
@@ -1021,11 +1021,10 @@ def finding_post_processing(
                         tag_accumulator.append((finding, cleaned_tags))
                     elif isinstance(cleaned_tags, str):
                         tag_accumulator.append((finding, [cleaned_tags]))
-                else:
-                    if isinstance(cleaned_tags, list):
-                        finding.tags.add(*cleaned_tags)
-                    elif isinstance(cleaned_tags, str):
-                        finding.tags.add(cleaned_tags)
+                elif isinstance(cleaned_tags, list):
+                    finding.tags.add(*cleaned_tags)
+                elif isinstance(cleaned_tags, str):
+                    finding.tags.add(cleaned_tags)
         # Process any files
         if finding_from_report.unsaved_files:
             finding.unsaved_files = finding_from_report.unsaved_files
diff --git a/dojo/tag_utils.py b/dojo/tag_utils.py
index e9800965c51..cf92665bbf7 100644
--- a/dojo/tag_utils.py
+++ b/dojo/tag_utils.py
@@ -165,7 +165,8 @@ def bulk_add_tags_to_instances(tag_or_tags, instances, tag_field_name: str = "ta
 
 
 def bulk_apply_parser_tags(findings_with_tags: list) -> None:
-    """Bulk-apply per-finding parser tags collected during an import loop.
+    """
+    Bulk-apply per-finding parser tags collected during an import loop.
 
     Reduces O(N·T) per-finding ``finding.tags.add()`` calls to O(unique_tags) queries
     by grouping findings by tag name and calling ``bulk_add_tags_to_instances`` once per tag.
@@ -173,6 +174,7 @@ def bulk_apply_parser_tags(findings_with_tags: list) -> None:
     Args:
         findings_with_tags: list of ``(finding, [tag_str, ...])`` pairs accumulated
             during the import loop (only for findings whose parser supplied tags).
+
     """
     from collections import defaultdict  # noqa: PLC0415
 

From acd12322ae92ff96ee8dd7753b65d07a2de7fe5e Mon Sep 17 00:00:00 2001
From: Valentijn Scholten <valentijnscholten@gmail.com>
Date: Thu, 16 Apr 2026 22:03:10 +0200
Subject: [PATCH 3/7] improve bulk add tags for parsers

---
 dojo/tag_utils.py                | 180 ++++++++++++++++++++++++++++++-
 unittests/test_tag_utils_bulk.py | 176 +++++++++++++++++++++++++++++-
 2 files changed, 350 insertions(+), 6 deletions(-)

diff --git a/dojo/tag_utils.py b/dojo/tag_utils.py
index cf92665bbf7..340f8fa2804 100644
--- a/dojo/tag_utils.py
+++ b/dojo/tag_utils.py
@@ -164,12 +164,182 @@ def bulk_add_tags_to_instances(tag_or_tags, instances, tag_field_name: str = "ta
     return total_created
 
 
+def bulk_add_tag_mapping(
+    tag_to_instances: dict[str, list],
+    tag_field_name: str = "tags",
+    batch_size: int | None = None,
+) -> int:
+    """
+    Add different tags to different sets of instances in ~5 queries regardless of tag count.
+
+    Unlike calling ``bulk_add_tags_to_instances`` once per unique tag — which issues
+    O(unique_tags) queries — this function batches all work:
+
+    1. Fetch all existing tag objects in one query.
+    2. Bulk-create any missing tag objects (one INSERT + one re-fetch if needed).
+    3. Fetch all pre-existing through-model rows for these (instance, tag) pairs in one query.
+    4. Bulk-create all new relationships in one query (batched by ``batch_size``).
+    5. Update all tag counts in one ``UPDATE … CASE WHEN …`` query.
+
+    Args:
+        tag_to_instances: mapping of tag_name -> list of instances that should receive
+            that tag.  All instances must be of the same model type.
+        tag_field_name: name of the TagField on the model (default: ``"tags"``).
+        batch_size: ``bulk_create`` batch size; defaults to ``TAG_BULK_ADD_BATCH_SIZE``
+            setting (1000).
+
+    Returns:
+        Total number of new tag relationships created.
+    """
+    from collections import defaultdict  # noqa: PLC0415
+
+    from django.db.models import Case, IntegerField, When  # noqa: PLC0415
+    from django.db.models.functions import Lower  # noqa: PLC0415
+
+    if not tag_to_instances:
+        return 0
+
+    if batch_size is None:
+        batch_size = getattr(settings, "TAG_BULK_ADD_BATCH_SIZE", 1000)
+
+    all_instances = [inst for insts in tag_to_instances.values() for inst in insts]
+    if not all_instances:
+        return 0
+
+    model_class = all_instances[0].__class__
+
+    if model_class is Product:
+        msg = "bulk_add_tag_mapping: Product instances are not supported; use Product.tags.add() or a propagation-aware helper"
+        raise ValueError(msg)
+
+    try:
+        tag_field = model_class._meta.get_field(tag_field_name)
+    except Exception:
+        msg = f"Model {model_class.__name__} does not have field '{tag_field_name}'"
+        raise ValueError(msg)
+
+    if not hasattr(tag_field, "tag_options"):
+        msg = f"Field '{tag_field_name}' is not a TagField"
+        raise ValueError(msg)
+
+    tag_model = tag_field.related_model
+    through_model = tag_field.remote_field.through
+    case_sensitive = tag_field.tag_options.case_sensitive
+
+    source_field_name = None
+    target_field_name = None
+    for field in through_model._meta.fields:
+        if hasattr(field, "remote_field") and field.remote_field:
+            if field.remote_field.model == model_class:
+                source_field_name = field.name
+            elif field.remote_field.model == tag_model:
+                target_field_name = field.name
+
+    all_tag_names = list(tag_to_instances.keys())
+
+    # --- Query 1: fetch existing tag objects ---
+    if case_sensitive:
+        existing_tags: dict[str, object] = {
+            t.name: t
+            for t in tag_model.objects.filter(name__in=all_tag_names)
+        }
+        missing_names = [n for n in all_tag_names if n not in existing_tags]
+    else:
+        # Annotate with lowercased name for a case-insensitive IN lookup
+        existing_tags = {
+            t.name_lower: t
+            for t in tag_model.objects.annotate(name_lower=Lower("name")).filter(
+                name_lower__in=[n.lower() for n in all_tag_names],
+            )
+        }
+        missing_names = [n for n in all_tag_names if n.lower() not in existing_tags]
+
+    # --- Query 2: create missing tag objects then re-fetch to get their PKs ---
+    if missing_names:
+        tag_model.objects.bulk_create(
+            [tag_model(name=n, protected=False) for n in missing_names],
+            ignore_conflicts=True,
+        )
+        if case_sensitive:
+            existing_tags.update(
+                {t.name: t for t in tag_model.objects.filter(name__in=missing_names)},
+            )
+        else:
+            existing_tags.update(
+                {
+                    t.name_lower: t
+                    for t in tag_model.objects.annotate(name_lower=Lower("name")).filter(
+                        name_lower__in=[n.lower() for n in missing_names],
+                    )
+                },
+            )
+
+    def _key(name: str) -> str:
+        return name if case_sensitive else name.lower()
+
+    # --- Query 3: fetch all pre-existing (instance, tag) through-model rows ---
+    all_instance_ids = {inst.pk for inst in all_instances}
+    all_tag_pks = {tag.pk for tag in existing_tags.values()}
+
+    existing_pairs: set[tuple] = set(
+        through_model.objects.filter(
+            **{f"{source_field_name}__in": all_instance_ids},
+            **{f"{target_field_name}__in": all_tag_pks},
+        ).values_list(source_field_name, target_field_name),
+    )
+
+    new_relationships = []
+    created_per_tag: dict[int, int] = defaultdict(int)
+
+    for tag_name, instances in tag_to_instances.items():
+        tag = existing_tags.get(_key(tag_name))
+        if tag is None:
+            continue
+        for instance in instances:
+            if (instance.pk, tag.pk) not in existing_pairs:
+                new_relationships.append(
+                    through_model(**{source_field_name: instance, target_field_name: tag}),
+                )
+                created_per_tag[tag.pk] += 1
+
+    if not new_relationships:
+        return 0
+
+    # --- Query 4: bulk-create all new relationships (batched for memory) ---
+    total_created = 0
+    with transaction.atomic():
+        for i in range(0, len(new_relationships), batch_size):
+            batch = new_relationships[i : i + batch_size]
+            actually_created = through_model.objects.bulk_create(batch, ignore_conflicts=True)
+            total_created += (
+                len(actually_created) if hasattr(actually_created, "__len__") else len(batch)
+            )
+
+    # --- Query 5: update all tag counts in one UPDATE … CASE WHEN … ---
+    tag_model.objects.filter(pk__in=list(created_per_tag.keys())).update(
+        count=Case(
+            *[
+                When(pk=pk, then=models.F("count") + delta)
+                for pk, delta in created_per_tag.items()
+            ],
+            output_field=IntegerField(),
+        ),
+    )
+
+    for instance in all_instances:
+        prefetch_cache = getattr(instance, "_prefetched_objects_cache", None)
+        if prefetch_cache is not None:
+            prefetch_cache.pop(tag_field_name, None)
+
+    return total_created
+
+
 def bulk_apply_parser_tags(findings_with_tags: list) -> None:
     """
     Bulk-apply per-finding parser tags collected during an import loop.
 
-    Reduces O(N·T) per-finding ``finding.tags.add()`` calls to O(unique_tags) queries
-    by grouping findings by tag name and calling ``bulk_add_tags_to_instances`` once per tag.
+    Delegates to ``bulk_add_tag_mapping`` to process all tags in ~5 queries total,
+    regardless of how many unique tag values the parser produced.
 
     Args:
         findings_with_tags: list of ``(finding, [tag_str, ...])`` pairs accumulated
@@ -183,8 +353,8 @@ def bulk_apply_parser_tags(findings_with_tags: list) -> None:
         for tag in tag_list:
             if tag:
                 tag_to_findings[tag].append(finding)
-    for tag_name, findings_for_tag in tag_to_findings.items():
-        bulk_add_tags_to_instances(tag_or_tags=tag_name, instances=findings_for_tag)
+
+    bulk_add_tag_mapping(tag_to_findings)
 
 
 def bulk_remove_all_tags(model_class, instance_ids_qs):
@@ -249,4 +419,4 @@ def bulk_remove_all_tags(model_class, instance_ids_qs):
             )
 
 
-__all__ = ["bulk_add_tags_to_instances", "bulk_apply_parser_tags", "bulk_remove_all_tags"]
+__all__ = ["bulk_add_tag_mapping", "bulk_add_tags_to_instances", "bulk_apply_parser_tags", "bulk_remove_all_tags"]
diff --git a/unittests/test_tag_utils_bulk.py b/unittests/test_tag_utils_bulk.py
index 63fc86ba95f..1c0f4f831a8 100644
--- a/unittests/test_tag_utils_bulk.py
+++ b/unittests/test_tag_utils_bulk.py
@@ -4,7 +4,7 @@
 
 from dojo.location.models import Location
 from dojo.models import Endpoint, Engagement, Finding, Product, Product_Type, Test, Test_Type
-from dojo.tag_utils import bulk_add_tags_to_instances
+from dojo.tag_utils import bulk_add_tag_mapping, bulk_add_tags_to_instances, bulk_apply_parser_tags
 from dojo.url.models import URL
 from unittests.dojo_test_case import DojoAPITestCase, versioned_fixtures
 
@@ -260,6 +260,180 @@ def test_bulk_add_non_tag_field(self):
         self.assertIn("is not a TagField", str(cm.exception))
 
 
+class BulkTagMappingTest(TestCase):
+    """Tests for bulk_add_tag_mapping — the multi-tag, ~5-query variant."""
+
+    LOCATION_CLASS = Location if settings.V3_FEATURE_LOCATIONS else Endpoint
+
+    def setUp(self):
+        self.tag_model = self.LOCATION_CLASS.tags.tag_model
+        self.product_type = Product_Type.objects.create(name="PT-Mapping")
+        self.product = Product.objects.create(name="Mapping Product", description="test", prod_type=self.product_type)
+
+    def _make_location(self, hostname):
+        if not settings.V3_FEATURE_LOCATIONS:
+            return Endpoint.objects.create(product=self.product, host=hostname)
+        url = URL.get_or_create_from_values(host=hostname)
+        url.location.associate_with_product(self.product)
+        return url.location
+
+    def _make_locations(self, n):
+        return [self._make_location(f"map-host-{i}.example.com") for i in range(n)]
+
+    def test_basic_different_tags_different_instances(self):
+        a, b, c = self._make_locations(3)
+        created = bulk_add_tag_mapping({"alpha": [a, b], "beta": [b, c], "gamma": [c]})
+
+        self.assertEqual(created, 5)
+        a.refresh_from_db()
+        b.refresh_from_db()
+        c.refresh_from_db()
+        self.assertEqual([t.name for t in a.tags.all()], ["alpha"])
+        self.assertCountEqual([t.name for t in b.tags.all()], ["alpha", "beta"])
+        self.assertCountEqual([t.name for t in c.tags.all()], ["beta", "gamma"])
+
+        self.assertEqual(self.tag_model.objects.get(name="alpha").count, 2)
+        self.assertEqual(self.tag_model.objects.get(name="beta").count, 2)
+        self.assertEqual(self.tag_model.objects.get(name="gamma").count, 1)
+
+    def test_same_tag_across_all_instances(self):
+        instances = self._make_locations(4)
+        created = bulk_add_tag_mapping({"shared": instances})
+
+        self.assertEqual(created, 4)
+        self.assertEqual(self.tag_model.objects.get(name="shared").count, 4)
+
+    def test_skips_existing_relationships(self):
+        a, b, c = self._make_locations(3)
+        a.tags.add("existing")
+        b.tags.add("existing")
+
+        created = bulk_add_tag_mapping({"existing": [a, b, c]})
+
+        self.assertEqual(created, 1)
+        self.assertEqual(self.tag_model.objects.get(name="existing").count, 3)
+
+    def test_empty_dict_returns_zero(self):
+        created = bulk_add_tag_mapping({})
+        self.assertEqual(created, 0)
+
+    def test_empty_instance_lists_returns_zero(self):
+        created = bulk_add_tag_mapping({"tag-a": [], "tag-b": []})
+        self.assertEqual(created, 0)
+        self.assertEqual(self.tag_model.objects.filter(name__in=["tag-a", "tag-b"]).count(), 0)
+
+    def test_case_insensitive_finds_existing_tag(self):
+        # Pre-create tag in lowercase (simulating force_lowercase storage)
+        instances = self._make_locations(2)
+        instances[0].tags.add("mytag")
+
+        # Requesting "MYTAG" should match the existing "mytag" object
+        created = bulk_add_tag_mapping({"MYTAG": [instances[0], instances[1]]})
+
+        self.assertEqual(created, 1)
+        self.assertEqual(self.tag_model.objects.count(), 1)
+
+    def test_creates_new_tags_that_dont_exist(self):
+        instances = self._make_locations(2)
+        created = bulk_add_tag_mapping({"brand-new-a": [instances[0]], "brand-new-b": [instances[1]]})
+
+        self.assertEqual(created, 2)
+        self.assertTrue(self.tag_model.objects.filter(name="brand-new-a").exists())
+        self.assertTrue(self.tag_model.objects.filter(name="brand-new-b").exists())
+
+    def test_clears_prefetch_cache(self):
+        instances = list(self.LOCATION_CLASS.objects.filter(
+            pk__in=[loc.pk for loc in self._make_locations(2)],
+        ).prefetch_related("tags"))
+
+        for inst in instances:
+            self.assertEqual(list(inst.tags.all()), [])
+
+        bulk_add_tag_mapping({"cache-map": instances})
+
+        for inst in instances:
+            self.assertIn("cache-map", [t.name for t in inst.tags.all()])
+
+    def test_product_rejected(self):
+        pt = Product_Type.objects.create(name="PT-Reject")
+        product = Product.objects.create(name="P-Reject", description="x", prod_type=pt)
+        with self.assertRaises(ValueError, msg="Product instances are not supported"):
+            bulk_add_tag_mapping({"tag": [product]})
+
+    def test_batching_creates_all_relationships(self):
+        instances = self._make_locations(15)
+        created = bulk_add_tag_mapping({"batch-tag": instances}, batch_size=4)
+
+        self.assertEqual(created, 15)
+        self.assertEqual(self.tag_model.objects.get(name="batch-tag").count, 15)
+
+
+class BulkApplyParserTagsTest(TestCase):
+    """Tests for bulk_apply_parser_tags — the import-loop accumulator path."""
+
+    def setUp(self):
+        self.tag_model = Finding.tags.tag_model
+        pt = Product_Type.objects.create(name="PT-Parser")
+        product = Product.objects.create(name="Parser Product", description="x", prod_type=pt)
+        engagement = Engagement.objects.create(
+            name="E-Parser", product=product,
+            target_start=timezone.now(), target_end=timezone.now(),
+        )
+        tt = Test_Type.objects.create(name="Parser Test Type")
+        test = Test.objects.create(
+            title="T-Parser", engagement=engagement, test_type=tt,
+            target_start=timezone.now(), target_end=timezone.now(),
+        )
+        self.test = test
+
+    def _make_finding(self, title):
+        return Finding.objects.create(title=title, severity="Low", test=self.test)
+
+    def test_applies_tags_correctly(self):
+        f1 = self._make_finding("F1")
+        f2 = self._make_finding("F2")
+        f3 = self._make_finding("F3")
+
+        bulk_apply_parser_tags([
+            (f1, ["network", "web"]),
+            (f2, ["network"]),
+            (f3, ["pci"]),
+        ])
+
+        f1.refresh_from_db()
+        f2.refresh_from_db()
+        f3.refresh_from_db()
+        self.assertCountEqual([t.name for t in f1.tags.all()], ["network", "web"])
+        self.assertCountEqual([t.name for t in f2.tags.all()], ["network"])
+        self.assertCountEqual([t.name for t in f3.tags.all()], ["pci"])
+
+        self.assertEqual(self.tag_model.objects.get(name="network").count, 2)
+        self.assertEqual(self.tag_model.objects.get(name="web").count, 1)
+        self.assertEqual(self.tag_model.objects.get(name="pci").count, 1)
+
+    def test_empty_list_is_noop(self):
+        bulk_apply_parser_tags([])
+        self.assertEqual(self.tag_model.objects.count(), 0)
+
+    def test_filters_empty_tag_strings(self):
+        f = self._make_finding("F-empty")
+        bulk_apply_parser_tags([(f, ["", "valid", ""])])
+        f.refresh_from_db()
+        self.assertEqual([t.name for t in f.tags.all()], ["valid"])
+
+    def test_dynamic_tags_many_unique_values(self):
+        # Simulate a parser that emits one unique tag per finding (e.g. resource name)
+        findings = [self._make_finding(f"F-dyn-{i}") for i in range(20)]
+        pairs = [(f, [f"resource-{i}"]) for i, f in enumerate(findings)]
+        bulk_apply_parser_tags(pairs)
+
+        for i, f in enumerate(findings):
+            f.refresh_from_db()
+            self.assertEqual([t.name for t in f.tags.all()], [f"resource-{i}"])
+
+        self.assertEqual(self.tag_model.objects.count(), 20)
+
+
 @versioned_fixtures
 class BulkTagUtilsInheritanceTest(DojoAPITestCase):
     fixtures = ["dojo_testdata.json"]

From 837e2ad314a572ad8091d19f014f130024ff5681 Mon Sep 17 00:00:00 2001
From: Valentijn Scholten <valentijnscholten@gmail.com>
Date: Thu, 16 Apr 2026 22:08:44 +0200
Subject: [PATCH 4/7] ruff

---
 dojo/tag_utils.py                | 1 +
 unittests/test_tag_utils_bulk.py | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/dojo/tag_utils.py b/dojo/tag_utils.py
index 340f8fa2804..62bb1190fff 100644
--- a/dojo/tag_utils.py
+++ b/dojo/tag_utils.py
@@ -190,6 +190,7 @@ def bulk_add_tag_mapping(
 
     Returns:
         Total number of new tag relationships created.
+
     """
     from collections import defaultdict  # noqa: PLC0415
 
diff --git a/unittests/test_tag_utils_bulk.py b/unittests/test_tag_utils_bulk.py
index 1c0f4f831a8..c975c5dac3c 100644
--- a/unittests/test_tag_utils_bulk.py
+++ b/unittests/test_tag_utils_bulk.py
@@ -261,6 +261,7 @@ def test_bulk_add_non_tag_field(self):
 
 
 class BulkTagMappingTest(TestCase):
+
     """Tests for bulk_add_tag_mapping — the multi-tag, ~5-query variant."""
 
     LOCATION_CLASS = Location if settings.V3_FEATURE_LOCATIONS else Endpoint
@@ -369,6 +370,7 @@ def test_batching_creates_all_relationships(self):
 
 
 class BulkApplyParserTagsTest(TestCase):
+
     """Tests for bulk_apply_parser_tags — the import-loop accumulator path."""
 
     def setUp(self):

From 84ee6a430c475462b630d3a6daab048cb6126e51 Mon Sep 17 00:00:00 2001
From: Valentijn Scholten <valentijnscholten@gmail.com>
Date: Fri, 17 Apr 2026 09:22:02 +0200
Subject: [PATCH 5/7] fix tag creation

---
 dojo/tag_utils.py | 42 ++++++++++++++++--------------------------
 1 file changed, 16 insertions(+), 26 deletions(-)

diff --git a/dojo/tag_utils.py b/dojo/tag_utils.py
index 62bb1190fff..87ed6845961 100644
--- a/dojo/tag_utils.py
+++ b/dojo/tag_utils.py
@@ -238,6 +238,9 @@ def bulk_add_tag_mapping(
 
     all_tag_names = list(tag_to_instances.keys())
 
+    def _key(name: str) -> str:
+        return name if case_sensitive else name.lower()
+
     # --- Query 1: fetch existing tag objects ---
     if case_sensitive:
         existing_tags: dict[str, object] = {
@@ -255,28 +258,16 @@ def bulk_add_tag_mapping(
         }
         missing_names = [n for n in all_tag_names if n.lower() not in existing_tags]
 
-    # --- Query 2: create missing tag objects then re-fetch to get their PKs ---
+    # --- Query 2: create missing tag objects ---
+    # Use get_or_create to call model.save(), which lets tagulous generate the slug field.
+    # bulk_create bypasses save() so slug is never set, causing unique constraint failures.
     if missing_names:
-        tag_model.objects.bulk_create(
-            [tag_model(name=n, protected=False) for n in missing_names],
-            ignore_conflicts=True,
-        )
-        if case_sensitive:
-            existing_tags.update(
-                {t.name: t for t in tag_model.objects.filter(name__in=missing_names)},
-            )
-        else:
-            existing_tags.update(
-                {
-                    t.name_lower: t
-                    for t in tag_model.objects.annotate(name_lower=Lower("name")).filter(
-                        name_lower__in=[n.lower() for n in missing_names],
-                    )
-                },
-            )
-
-    def _key(name: str) -> str:
-        return name if case_sensitive else name.lower()
+        for n in missing_names:
+            if case_sensitive:
+                tag, _ = tag_model.objects.get_or_create(name=n, defaults={"protected": False})
+            else:
+                tag, _ = tag_model.objects.get_or_create(name__iexact=n, defaults={"name": n, "protected": False})
+            existing_tags[_key(n)] = tag
 
     # --- Query 3: fetch all pre-existing (instance, tag) through-model rows ---
     all_instance_ids = {inst.pk for inst in all_instances}
@@ -307,14 +298,13 @@ def _key(name: str) -> str:
         return 0
 
     # --- Query 4: bulk-create all new relationships (batched for memory) ---
-    total_created = 0
+    # Use len(new_relationships) for the count: existing pairs were already filtered out above,
+    # so every entry here is new. bulk_create return value is unreliable with ignore_conflicts.
+    total_created = len(new_relationships)
     with transaction.atomic():
         for i in range(0, len(new_relationships), batch_size):
             batch = new_relationships[i : i + batch_size]
-            actually_created = through_model.objects.bulk_create(batch, ignore_conflicts=True)
-            total_created += (
-                len(actually_created) if hasattr(actually_created, "__len__") else len(batch)
-            )
+            through_model.objects.bulk_create(batch, ignore_conflicts=True)
 
     # --- Query 5: update all tag counts in one UPDATE … CASE WHEN … ---
     tag_model.objects.filter(pk__in=list(created_per_tag.keys())).update(

From 65d52ae7b45fee9a55e2180529879e9fc794b1d0 Mon Sep 17 00:00:00 2001
From: Valentijn Scholten <valentijnscholten@gmail.com>
Date: Fri, 17 Apr 2026 09:54:04 +0200
Subject: [PATCH 6/7] fix tests

---
 unittests/test_tag_utils_bulk.py | 4 +++-
 unittests/test_tags.py           | 2 ++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/unittests/test_tag_utils_bulk.py b/unittests/test_tag_utils_bulk.py
index c975c5dac3c..c9e369c0a2e 100644
--- a/unittests/test_tag_utils_bulk.py
+++ b/unittests/test_tag_utils_bulk.py
@@ -1,4 +1,5 @@
 from django.conf import settings
+from django.contrib.auth.models import User
 from django.test import TestCase
 from django.utils import timezone
 
@@ -375,6 +376,7 @@ class BulkApplyParserTagsTest(TestCase):
 
     def setUp(self):
         self.tag_model = Finding.tags.tag_model
+        self.reporter = User.objects.create_user(username="parser-test-user", password="x")
         pt = Product_Type.objects.create(name="PT-Parser")
         product = Product.objects.create(name="Parser Product", description="x", prod_type=pt)
         engagement = Engagement.objects.create(
@@ -389,7 +391,7 @@ def setUp(self):
         self.test = test
 
     def _make_finding(self, title):
-        return Finding.objects.create(title=title, severity="Low", test=self.test)
+        return Finding.objects.create(title=title, severity="Low", test=self.test, reporter=self.reporter)
 
     def test_applies_tags_correctly(self):
         f1 = self._make_finding("F1")
diff --git a/unittests/test_tags.py b/unittests/test_tags.py
index b9077e1daab..b6661ab12d4 100644
--- a/unittests/test_tags.py
+++ b/unittests/test_tags.py
@@ -386,6 +386,7 @@ class TagImportTestAPI(DojoAPITestCase, TagImportMixin):
 
     def setUp(self):
         super().setUp()
+        settings.SECURE_SSL_REDIRECT = False
         testuser = User.objects.get(username="admin")
         testuser.usercontactinfo.block_execution = True
         testuser.usercontactinfo.save()
@@ -402,6 +403,7 @@ class TagImportTestUI(DojoAPITestCase, TagImportMixin):
 
     def setUp(self):
         super().setUp()
+        settings.SECURE_SSL_REDIRECT = False
         testuser = User.objects.get(username="admin")
         testuser.usercontactinfo.block_execution = True
         testuser.usercontactinfo.save()

From 3e8b19168a99b89f615c898a8cf8ebb61e3a6f46 Mon Sep 17 00:00:00 2001
From: Valentijn Scholten <valentijnscholten@gmail.com>
Date: Fri, 17 Apr 2026 10:43:09 +0200
Subject: [PATCH 7/7] fix tests

---
 unittests/test_tag_utils_bulk.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unittests/test_tag_utils_bulk.py b/unittests/test_tag_utils_bulk.py
index c9e369c0a2e..3a815041fb4 100644
--- a/unittests/test_tag_utils_bulk.py
+++ b/unittests/test_tag_utils_bulk.py
@@ -376,7 +376,7 @@ class BulkApplyParserTagsTest(TestCase):
 
     def setUp(self):
         self.tag_model = Finding.tags.tag_model
-        self.reporter = User.objects.create_user(username="parser-test-user", password="x")
+        self.reporter = User.objects.create_user(username="parser-test-user")
         pt = Product_Type.objects.create(name="PT-Parser")
         product = Product.objects.create(name="Parser Product", description="x", prod_type=pt)
         engagement = Engagement.objects.create(