Skip to content

Commit bbd571c

Browse files
perf(tags): bulk-precompute Location target tag names per flush
`_location_target_names(location)` issued one `Product.objects.filter(...).distinct()` per location plus N `product.tags.all()` per related product, producing an N+1 across the location queryset on every product tag toggle. Replace the per-location callable with a precomputed {location_id: set[tag_name]} map built in 3 bulk queries: the two LocationProductReference / LocationFindingReference paths union together into {location_id: {product_id}}, then a single Product_tags through-table read fans out to {product_id: {tag_name}}. product_tag_add (100 locations, V3): 320 -> 123 queries product_tag_remove (100 locations, V3): 270 -> 73 queries ZAP scan import (V3): 984 -> 947 ZAP scan reimport, no change (V3): 140 -> 103
1 parent a46bf5a commit bbd571c

2 files changed

Lines changed: 59 additions & 18 deletions

File tree

dojo/product/helpers.py

Lines changed: 54 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from django.db.models import Q
77

88
from dojo.celery import app
9-
from dojo.location.models import Location
9+
from dojo.location.models import Location, LocationFindingReference, LocationProductReference
1010
from dojo.models import Endpoint, Engagement, Finding, Product, Test
1111
from dojo.tag_utils import bulk_add_tag_mapping, bulk_remove_tags_from_instances
1212

@@ -50,15 +50,19 @@ def propagate_tags_on_product_sync(product):
5050
)
5151
if settings.V3_FEATURE_LOCATIONS:
5252
logger.debug("Propagating tags from %s to all locations", product)
53-
location_qs = Location.objects.filter(
53+
# Materialize once so we can build a precomputed
54+
# {location_id: set[tag_name]} map without re-evaluating the queryset
55+
# or paying N+1 in `_location_target_names`.
56+
locations = list(Location.objects.filter(
5457
Q(products__product=product)
5558
| Q(findings__finding__test__engagement__product=product),
56-
).distinct()
57-
# Locations can be linked to multiple products, so the inherited target
58-
# is the union of every related product's tags. Compute per-location.
59+
).distinct())
60+
location_target_names = _build_location_target_names_map(
61+
[loc.pk for loc in locations],
62+
)
5963
_sync_inheritance_for_qs(
60-
location_qs,
61-
target_names_per_child=_location_target_names,
64+
locations,
65+
target_names_per_child=lambda loc: location_target_names.get(loc.pk, set()),
6266
)
6367
else:
6468
logger.debug("Propagating tags from %s to all endpoints", product)
@@ -68,13 +72,49 @@ def propagate_tags_on_product_sync(product):
6872
)
6973

7074

71-
def _location_target_names(location):
72-
names: set[str] = set()
73-
for related_product in location.all_related_products():
74-
if related_product is None:
75-
continue
76-
names.update(tag.name for tag in related_product.tags.all())
77-
return names
75+
def _build_location_target_names_map(location_ids):
76+
"""
77+
Bulk-compute {location_id: set[tag_name]} for the given locations.
78+
79+
Replaces the per-location `_location_target_names` callable, which issued
80+
one `Product.objects.filter(...).distinct()` query plus N `.tags.all()`
81+
queries per location. Now: 3 queries total regardless of fan-out.
82+
"""
83+
if not location_ids:
84+
return {}
85+
86+
location_to_products: dict[int, set[int]] = defaultdict(set)
87+
for loc_id, prod_id in LocationProductReference.objects.filter(
88+
location_id__in=location_ids,
89+
).values_list("location_id", "product_id"):
90+
location_to_products[loc_id].add(prod_id)
91+
for loc_id, prod_id in LocationFindingReference.objects.filter(
92+
location_id__in=location_ids,
93+
).values_list("location_id", "finding__test__engagement__product_id"):
94+
if prod_id is not None:
95+
location_to_products[loc_id].add(prod_id)
96+
97+
all_product_ids = {pid for pids in location_to_products.values() for pid in pids}
98+
if not all_product_ids:
99+
return {loc_id: set() for loc_id in location_ids}
100+
101+
product_tags_through = Product.tags.through
102+
tag_model = Product.tags.tag_model
103+
tag_field_name = tag_model._meta.model_name
104+
product_to_tag_names: dict[int, set[str]] = defaultdict(set)
105+
for prod_id, tag_name in product_tags_through.objects.filter(
106+
product_id__in=all_product_ids,
107+
).values_list("product_id", f"{tag_field_name}__name"):
108+
product_to_tag_names[prod_id].add(tag_name)
109+
110+
return {
111+
loc_id: {
112+
name
113+
for pid in pids
114+
for name in product_to_tag_names.get(pid, set())
115+
}
116+
for loc_id, pids in location_to_products.items()
117+
}
78118

79119

80120
def _sync_inheritance_for_qs(queryset, *, target_names_per_child):

unittests/test_tag_inheritance_perf.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -387,8 +387,9 @@ def test_baseline_product_tag_remove_propagates_to_100_locations_v3(self):
387387
EXPECTED_PRODUCT_TAG_REMOVE_100_ENDPOINTS = 56
388388

389389
# V3 location paths. Pre-Phase-A: 4532 add, 4307 remove.
390-
EXPECTED_PRODUCT_TAG_ADD_100_LOCATIONS = 320
391-
EXPECTED_PRODUCT_TAG_REMOVE_100_LOCATIONS = 270
390+
# Phase B Stage 2 + location precompute: bulk-built target-name map.
391+
EXPECTED_PRODUCT_TAG_ADD_100_LOCATIONS = 123
392+
EXPECTED_PRODUCT_TAG_REMOVE_100_LOCATIONS = 73
392393

393394

394395
@override_settings(
@@ -510,6 +511,6 @@ def test_baseline_zap_scan_reimport_no_change_v3(self):
510511
# when there's no work. Stages 3+4+5 (drop duplicate inherited_tags M2M)
511512
# will collapse the reimport cost.
512513
EXPECTED_ZAP_IMPORT_V2 = 1006
513-
EXPECTED_ZAP_IMPORT_V3 = 984
514+
EXPECTED_ZAP_IMPORT_V3 = 947
514515
EXPECTED_ZAP_REIMPORT_NO_CHANGE_V2 = 82
515-
EXPECTED_ZAP_REIMPORT_NO_CHANGE_V3 = 140
516+
EXPECTED_ZAP_REIMPORT_NO_CHANGE_V3 = 103

0 commit comments

Comments
 (0)