Skip to content

Commit 6788368

Browse files
perf(tags): bulk-propagate inherited tags + gate child post_save on create (#14812)
Replaces the per-row `.save()` loop in `propagate_tags_on_product_sync` with bulk SQL through the existing tag-utils helpers. For every child model (Engagement/Test/Finding/Endpoint/Location), reads current inherited_tags in one query, computes the per-child diff against the Product's tags, and applies adds/removes via `bulk_add_tag_mapping` and the new `bulk_remove_tags_from_instances` helper. Both `tags` and `inherited_tags` fields are kept in sync. Also gates the per-child `inherit_tags_on_instance` post_save handler on `created=True`. The previous behavior fired on every save (create OR update), repeatedly re-applying inherited tags to children whose tag state had not changed. Sticky enforcement on user-driven tag edits is unchanged (still handled by `make_inherited_tags_sticky` on m2m_changed). Pinned query-count baselines from PR #14811 drop accordingly: product_tag_add -> 100 findings : 4758 -> 91 (~52x fewer queries) product_tag_remove -> 100 findings : 4540 -> 53 (~85x fewer queries) Sticky and child-creation paths are unchanged in this PR. Phase B targets those (centralized inheritance module + drop the duplicate `inherited_tags` TagField).
1 parent 79f58ec commit 6788368

4 files changed

Lines changed: 242 additions & 37 deletions

File tree

dojo/product/helpers.py

Lines changed: 104 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
import contextlib
22
import logging
3+
from collections import defaultdict
34

45
from django.conf import settings
56
from django.db.models import Q
67

78
from dojo.celery import app
89
from dojo.location.models import Location
910
from dojo.models import Endpoint, Engagement, Finding, Product, Test
11+
from dojo.tag_utils import bulk_add_tag_mapping, bulk_remove_tags_from_instances
1012

1113
logger = logging.getLogger(__name__)
1214

@@ -19,35 +21,115 @@ def propagate_tags_on_product(product_id, *args, **kwargs):
1921

2022

2123
def propagate_tags_on_product_sync(product):
22-
# enagagements
24+
"""
25+
Bulk-apply Product tag changes to all children using through-table SQL.
26+
27+
Replaces the previous per-row `.save()` loop. For every child model owned
28+
by the product (Engagement, Test, Finding, plus Endpoint or Location
29+
depending on the V3_FEATURE_LOCATIONS flag), reads the existing
30+
`inherited_tags` per child in one query, computes the diff against the
31+
Product's current tags, and applies adds/removes via the bulk tag
32+
helpers. Both `tags` and `inherited_tags` fields are kept in sync.
33+
"""
34+
target_names = {tag.name for tag in product.tags.all()}
35+
2336
logger.debug("Propagating tags from %s to all engagements", product)
24-
propagate_tags_on_object_list(Engagement.objects.filter(product=product))
25-
# tests
37+
_sync_inheritance_for_qs(
38+
Engagement.objects.filter(product=product),
39+
target_names_per_child=lambda _child: target_names,
40+
)
2641
logger.debug("Propagating tags from %s to all tests", product)
27-
propagate_tags_on_object_list(Test.objects.filter(engagement__product=product))
28-
# findings
42+
_sync_inheritance_for_qs(
43+
Test.objects.filter(engagement__product=product),
44+
target_names_per_child=lambda _child: target_names,
45+
)
2946
logger.debug("Propagating tags from %s to all findings", product)
30-
propagate_tags_on_object_list(Finding.objects.filter(test__engagement__product=product))
47+
_sync_inheritance_for_qs(
48+
Finding.objects.filter(test__engagement__product=product),
49+
target_names_per_child=lambda _child: target_names,
50+
)
3151
if settings.V3_FEATURE_LOCATIONS:
32-
# Locations
3352
logger.debug("Propagating tags from %s to all locations", product)
34-
propagate_tags_on_object_list(
35-
Location.objects.filter(
36-
# Locations linked directly to a product via LocationProductReference
37-
Q(products__product=product)
38-
# Locations linked indirectly to a product via LocationFindingReference
39-
| Q(findings__finding__test__engagement__product=product),
40-
).distinct(),
53+
location_qs = Location.objects.filter(
54+
Q(products__product=product)
55+
| Q(findings__finding__test__engagement__product=product),
56+
).distinct()
57+
# Locations can be linked to multiple products, so the inherited target
58+
# is the union of every related product's tags. Compute per-location.
59+
_sync_inheritance_for_qs(
60+
location_qs,
61+
target_names_per_child=_location_target_names,
4162
)
4263
else:
43-
# TODO: Delete this after the move to Locations
44-
# endpoints
4564
logger.debug("Propagating tags from %s to all endpoints", product)
46-
propagate_tags_on_object_list(Endpoint.objects.filter(product=product))
65+
_sync_inheritance_for_qs(
66+
Endpoint.objects.filter(product=product),
67+
target_names_per_child=lambda _child: target_names,
68+
)
69+
70+
71+
def _location_target_names(location):
72+
names: set[str] = set()
73+
for related_product in location.all_related_products():
74+
if related_product is None:
75+
continue
76+
names.update(tag.name for tag in related_product.tags.all())
77+
return names
78+
79+
80+
def _sync_inheritance_for_qs(queryset, *, target_names_per_child):
81+
"""
82+
Sync inherited_tags + tags for every child in `queryset` to its target tag set.
83+
84+
target_names_per_child: callable(child) -> set[str].
85+
86+
Issues bulk SQL: one through-table read for current inherited_tags, then
87+
bulk add/remove on `tags` and `inherited_tags` fields.
88+
"""
89+
children = list(queryset)
90+
if not children:
91+
return
92+
93+
model_class = type(children[0])
94+
inherited_field = model_class._meta.get_field("inherited_tags")
95+
inherited_through = inherited_field.remote_field.through
96+
inherited_tag_model = inherited_field.related_model
97+
98+
# Resolve through-table FK column for the source side.
99+
source_field_name = None
100+
for field in inherited_through._meta.fields:
101+
if hasattr(field, "remote_field") and field.remote_field and field.remote_field.model == model_class:
102+
source_field_name = field.name
103+
break
104+
105+
child_ids = [c.pk for c in children]
106+
# One query: pull every (child_id, tag_name) pair from the inherited_tags through table.
107+
existing_pairs = inherited_through.objects.filter(
108+
**{f"{source_field_name}__in": child_ids},
109+
).values_list(source_field_name, f"{inherited_tag_model._meta.model_name}__name")
110+
111+
old_inherited_by_child: dict[int, set[str]] = defaultdict(set)
112+
for child_id, tag_name in existing_pairs:
113+
old_inherited_by_child[child_id].add(tag_name)
114+
115+
# Compute per-child diff and bucket by tag name.
116+
add_map: dict[str, list] = defaultdict(list)
117+
remove_map: dict[str, list] = defaultdict(list)
118+
for child in children:
119+
target = target_names_per_child(child)
120+
old = old_inherited_by_child.get(child.pk, set())
121+
for name in target - old:
122+
add_map[name].append(child)
123+
for name in old - target:
124+
remove_map[name].append(child)
47125

126+
# Apply adds. Both `tags` and `inherited_tags` get the same set of new
127+
# inherited names — `_manage_inherited_tags` did the same.
128+
if add_map:
129+
bulk_add_tag_mapping(add_map, tag_field_name="inherited_tags")
130+
bulk_add_tag_mapping(add_map, tag_field_name="tags")
48131

49-
def propagate_tags_on_object_list(object_list):
50-
for obj in object_list:
51-
if obj and obj.id is not None:
52-
logger.debug(f"\tPropagating tags to {type(obj)} - {obj}")
53-
obj.save()
132+
# Apply removes.
133+
for name, instances in remove_map.items():
134+
bulk_remove_tags_from_instances(name, instances, tag_field_name="inherited_tags")
135+
bulk_remove_tags_from_instances(name, instances, tag_field_name="tags")

dojo/tag_utils.py

Lines changed: 113 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,118 @@ def bulk_add_tags_to_instances(tag_or_tags, instances, tag_field_name: str = "ta
164164
return total_created
165165

166166

167+
def bulk_remove_tags_from_instances(tag_or_tags, instances, tag_field_name: str = "tags", batch_size: int | None = None) -> int:
168+
"""
169+
Efficiently remove tag(s) from many model instances.
170+
171+
Symmetric to ``bulk_add_tags_to_instances``:
172+
173+
- tag_or_tags: a single string, an iterable of strings or tag objects, or a Tagulous edit string.
174+
- instances: QuerySet or list of model instances of the same class.
175+
- tag_field_name: name of the TagField on the model (default: ``"tags"``).
176+
- Decrements ``tag.count`` for every removed (instance, tag) pair.
177+
- Deletes through-model rows in one DELETE per tag (or batched).
178+
- Clears Django prefetch caches on the input instances so subsequent access reloads from DB.
179+
180+
Returns the total number of relationships removed across all provided tags.
181+
Tags that do not exist or are not currently associated with any instance are silently skipped.
182+
"""
183+
if batch_size is None:
184+
batch_size = getattr(settings, "TAG_BULK_ADD_BATCH_SIZE", 1000)
185+
186+
if hasattr(instances, "model"):
187+
instances = list(instances)
188+
189+
if not instances:
190+
return 0
191+
192+
model_class = instances[0].__class__
193+
194+
# Mirror the Product safety check from bulk_add_tags_to_instances. Removing
195+
# tags from a Product would normally trigger inheritance propagation via
196+
# m2m_changed signals; this helper bypasses signals, so disallow it.
197+
if model_class is Product:
198+
msg = "bulk_remove_tags_from_instances: Product instances are not supported; use Product.tags.remove() or a propagation-aware helper"
199+
raise ValueError(msg)
200+
201+
try:
202+
tag_field = model_class._meta.get_field(tag_field_name)
203+
except Exception:
204+
msg = f"Model {model_class.__name__} does not have field '{tag_field_name}'"
205+
raise ValueError(msg)
206+
207+
if not hasattr(tag_field, "tag_options"):
208+
msg = f"Field '{tag_field_name}' is not a TagField"
209+
raise ValueError(msg)
210+
211+
tag_model = tag_field.related_model
212+
through_model = tag_field.remote_field.through
213+
214+
# Normalize input into a list of tag names (mirrors bulk_add_tags_to_instances).
215+
tag_names: list[str] = []
216+
try:
217+
if isinstance(tag_or_tags, str):
218+
space_delimiter = getattr(tag_field, "tag_options", None).space_delimiter if hasattr(tag_field, "tag_options") else False
219+
tag_names = parse_tags(tag_or_tags, space_delimiter=space_delimiter)
220+
elif isinstance(tag_or_tags, Iterable):
221+
tag_names = [getattr(t, "name", str(t)) for t in tag_or_tags]
222+
else:
223+
tag_names = [str(tag_or_tags)]
224+
except Exception:
225+
tag_names = [str(tag_or_tags)]
226+
227+
# Resolve through-model FK names dynamically (no hard-coding).
228+
through_fields = {f.name: f for f in through_model._meta.fields}
229+
source_field_name = None
230+
target_field_name = None
231+
for field_name, field in through_fields.items():
232+
if hasattr(field, "remote_field") and field.remote_field:
233+
if field.remote_field.model == model_class:
234+
source_field_name = field_name
235+
elif field.remote_field.model == tag_model:
236+
target_field_name = field_name
237+
238+
total_removed = 0
239+
240+
for single_tag_name in tag_names:
241+
if not single_tag_name:
242+
continue
243+
244+
# Resolve the tag — skip silently if it doesn't exist (nothing to remove).
245+
if tag_field.tag_options.case_sensitive:
246+
tag = tag_model.objects.filter(name=single_tag_name).first()
247+
else:
248+
tag = tag_model.objects.filter(name__iexact=single_tag_name).first()
249+
if tag is None:
250+
continue
251+
252+
for i in range(0, len(instances), batch_size):
253+
batch_instances = instances[i:i + batch_size]
254+
batch_ids = [instance.pk for instance in batch_instances]
255+
256+
with transaction.atomic():
257+
# One DELETE per tag-batch. Returns the deleted-row count.
258+
deleted_count, _ = through_model.objects.filter(
259+
**{target_field_name: tag.pk},
260+
**{f"{source_field_name}__in": batch_ids},
261+
).delete()
262+
263+
if deleted_count:
264+
total_removed += deleted_count
265+
# Decrement the Tagulous-maintained count to avoid drift.
266+
tag_model.objects.filter(pk=tag.pk).update(
267+
count=models.F("count") - deleted_count,
268+
)
269+
270+
# Invalidate prefetch caches so callers see the new state.
271+
for instance in batch_instances:
272+
prefetch_cache = getattr(instance, "_prefetched_objects_cache", None)
273+
if prefetch_cache is not None:
274+
prefetch_cache.pop(tag_field_name, None)
275+
276+
return total_removed
277+
278+
167279
def bulk_add_tag_mapping(
168280
tag_to_instances: dict[str, list],
169281
tag_field_name: str = "tags",
@@ -410,4 +522,4 @@ def bulk_remove_all_tags(model_class, instance_ids_qs):
410522
)
411523

412524

413-
__all__ = ["bulk_add_tag_mapping", "bulk_add_tags_to_instances", "bulk_apply_parser_tags", "bulk_remove_all_tags"]
525+
__all__ = ["bulk_add_tag_mapping", "bulk_add_tags_to_instances", "bulk_apply_parser_tags", "bulk_remove_all_tags", "bulk_remove_tags_from_instances"]

dojo/tags_signals.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,12 @@ def inherit_linked_instance_tags(instance: LocationFindingReference | LocationPr
5959
@receiver(signals.post_save, sender=Finding)
6060
@receiver(signals.post_save, sender=Location)
6161
def inherit_tags_on_instance(sender, instance, created, **kwargs):
62+
# Only inherit on creation. The previous behavior fired on every save
63+
# (create OR update), repeatedly re-applying inherited tags to children
64+
# whose tag state had not changed. Sticky enforcement on user-driven
65+
# tag edits is handled by `make_inherited_tags_sticky` (m2m_changed).
66+
if not created:
67+
return
6268
inherit_instance_tags(instance)
6369

6470

unittests/test_tag_inheritance_perf.py

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -357,10 +357,12 @@ def test_baseline_product_tag_remove_propagates_to_100_locations_v3(self):
357357
# the appropriate mode so all variants execute in a single suite run.
358358

359359
# Findings-only scenarios.
360-
EXPECTED_PRODUCT_TAG_ADD_100_V2 = 4758
361-
EXPECTED_PRODUCT_TAG_ADD_100_V3 = 4759
362-
EXPECTED_PRODUCT_TAG_REMOVE_100_V2 = 4540
363-
EXPECTED_PRODUCT_TAG_REMOVE_100_V3 = 4541
360+
# Pre-Phase-A V2: 4758 add, 4540 remove. V3: 4759/4541.
361+
# Phase A bulk-propagate drops these dramatically.
362+
EXPECTED_PRODUCT_TAG_ADD_100_V2 = 91
363+
EXPECTED_PRODUCT_TAG_ADD_100_V3 = 91
364+
EXPECTED_PRODUCT_TAG_REMOVE_100_V2 = 53
365+
EXPECTED_PRODUCT_TAG_REMOVE_100_V3 = 53
364366

365367
EXPECTED_CREATE_ONE_FINDING_V2 = 64
366368
EXPECTED_CREATE_ONE_FINDING_V3 = 64
@@ -372,13 +374,13 @@ def test_baseline_product_tag_remove_propagates_to_100_locations_v3(self):
372374
EXPECTED_FINDING_REMOVE_INHERITED_V2 = 44
373375
EXPECTED_FINDING_REMOVE_INHERITED_V3 = 44
374376

375-
# V2 endpoint paths (Endpoints have no V3 counterpart in this class).
376-
EXPECTED_PRODUCT_TAG_ADD_100_ENDPOINTS = 3958
377-
EXPECTED_PRODUCT_TAG_REMOVE_100_ENDPOINTS = 3740
377+
# V2 endpoint paths. Pre-Phase-A: 3958 add, 3740 remove.
378+
EXPECTED_PRODUCT_TAG_ADD_100_ENDPOINTS = 91
379+
EXPECTED_PRODUCT_TAG_REMOVE_100_ENDPOINTS = 53
378380

379-
# V3 location paths (LocationManager has no V2 counterpart in this class).
380-
EXPECTED_PRODUCT_TAG_ADD_100_LOCATIONS = 4531
381-
EXPECTED_PRODUCT_TAG_REMOVE_100_LOCATIONS = 4307
381+
# V3 location paths. Pre-Phase-A: 4532 add, 4307 remove.
382+
EXPECTED_PRODUCT_TAG_ADD_100_LOCATIONS = 316
383+
EXPECTED_PRODUCT_TAG_REMOVE_100_LOCATIONS = 266
382384

383385

384386
@override_settings(
@@ -488,7 +490,10 @@ def test_baseline_zap_scan_reimport_no_change_v3(self):
488490
# Pinned baselines per mode. Each test forces its own V3_FEATURE_LOCATIONS
489491
# via @override_settings so all four import paths run in a single suite
490492
# invocation regardless of the ambient `DD_V3_FEATURE_LOCATIONS` env var.
491-
EXPECTED_ZAP_IMPORT_V2 = 1461
492-
EXPECTED_ZAP_IMPORT_V3 = 1319
493-
EXPECTED_ZAP_REIMPORT_NO_CHANGE_V2 = 77
494-
EXPECTED_ZAP_REIMPORT_NO_CHANGE_V3 = 95
493+
# Phase A nudges these slightly downward (post_save gated on created=True
494+
# avoids re-running inheritance on no-op finding updates during reimport).
495+
# Pre-Phase-A: 1461/1319 import, 77/95 reimport.
496+
EXPECTED_ZAP_IMPORT_V2 = 1385
497+
EXPECTED_ZAP_IMPORT_V3 = 1243
498+
EXPECTED_ZAP_REIMPORT_NO_CHANGE_V2 = 69
499+
EXPECTED_ZAP_REIMPORT_NO_CHANGE_V3 = 87

0 commit comments

Comments
 (0)