Skip to content

Commit 01a3aaf

Browse files
perf(tags): watson-style auto-flushing tag inheritance context
Replaces the dumb suppression flag with a watson-inspired registrar context manager. Signal handlers register touched instances into the active context; the context auto-flushes on exit (and on explicit mid-batch flush). Removes the manual `flush_for_product` orchestration the importers used to call after the suppression block. `TagInheritanceContext`: - `add(instance)`: register an instance for bulk-sync (no-op when inheritance is disabled for that instance's product, so the bulk path stays cheap on inheritance-off products). - `flush()`: drain registered instances, group by (model, product), run one bulk diff per group via the existing `_sync_inheritance_for_qs` helper. Locations route through the `_build_location_target_names_map` precompute. - System-wide inheritance flag is read from the DB once per context and cached; per-product flags read off the in-memory product. Signal hookup (`tags_signals.py`): - `make_inherited_tags_sticky` (m2m_changed): when active context exists, `ctx.add(instance)` and return — no per-row work. - `inherit_tags_on_instance` (post_save, created=True): same pattern. - `inherit_tags_on_linked_instance` (post_save on LocationFinding/LocationProductReference): registers the underlying Location. Importer / reimporter: - `with tag_inheritance.batch() as tag_ctx:` opens the context. - Mid-loop: `ctx.flush()` runs before each per-batch `post_process_findings_batch` dispatch so JIRA labels reflect inherited tags on first push. - On context exit: auto-flush handles end-of-import sync (closed findings, test/engagement saves, apply_import_tags). Drops the explicit `tag_inheritance.flush_for_product(...)` calls. Query-count impact: - ZAP scan import V2: 1006 -> 1057 (+51) — extra context bookkeeping - ZAP scan import V3: 947 -> 997 (+50) - ZAP reimport (no change V2): 82 -> 69 (-13) — registrar skips no-op - ZAP reimport (no change V3): 103 -> 87 (-16) - test_importers_performance: unchanged (inheritance off → no register) Net: import path slightly heavier in exchange for correct JIRA labelling on first push and a much cleaner orchestration model. The import-path overhead is constant per import (not per finding); follow- up commits can trim it.
1 parent 6ca5d7c commit 01a3aaf

5 files changed

Lines changed: 201 additions & 97 deletions

File tree

dojo/importers/default_importer.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -115,12 +115,12 @@ def process_scan(
115115
# Note: for fresh imports, parse_findings() calls create_test() internally,
116116
# so self.test is guaranteed to be set after this call.
117117
parsed_findings = self.parse_findings(scan, parser) or []
118-
# Suppress per-row tag-inheritance signal work during the import hot
119-
# loop. Inheritance is applied in bulk after the batch via
120-
# `tag_inheritance.flush_for_product` (see below). This replaces the
121-
# per-finding `_manage_inherited_tags` signal cascade with a single
122-
# `propagate_tags_on_product_sync` pass.
123-
with tag_inheritance.batch():
118+
# Open a tag-inheritance context. Signal handlers register touched
119+
# instances into the context; the context auto-flushes (bulk-applies
120+
# inherited tags) on exit. Mid-context `ctx.flush()` calls drain the
121+
# accumulated set early — used before per-batch post-process dispatch
122+
# so JIRA labels reflect the full tag state on first push.
123+
with tag_inheritance.batch() as tag_ctx:
124124
new_findings = self.process_findings(parsed_findings, **kwargs)
125125
# Close any old findings in the processed list if the the user specified for that
126126
# to occur in the form that is then passed to the kwargs
@@ -144,10 +144,7 @@ def process_scan(
144144
new_findings=new_findings,
145145
closed_findings=closed_findings,
146146
)
147-
# Flush inherited tags in bulk for all children of the product touched
148-
# by this import. Idempotent and cheap when tag inheritance is
149-
# disabled (it short-circuits on the system + per-product flags).
150-
tag_inheritance.flush_for_product(self.test.engagement.product)
147+
# Inheritance auto-flushes on context exit above.
151148
# Send out some notifications to the user
152149
logger.debug("IMPORT_SCAN: Generating notifications")
153150
dojo_dispatch_task(
@@ -280,6 +277,11 @@ def process_findings(
280277
findings_with_parser_tags.clear()
281278
finding_ids_batch = list(batch_finding_ids)
282279
batch_finding_ids.clear()
280+
# Drain the inheritance context BEFORE dispatching post-process
281+
# so the JIRA push inside that task sees inherited tags on the
282+
# findings (otherwise inheritance lands later, on context exit).
283+
if (ctx := tag_inheritance.current()) is not None:
284+
ctx.flush()
283285
logger.debug("process_findings: dispatching batch with push_to_jira=%s (batch_size=%d, is_final=%s)",
284286
push_to_jira, len(finding_ids_batch), is_final_finding)
285287
dojo_dispatch_task(

dojo/importers/default_reimporter.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -108,11 +108,10 @@ def process_scan(
108108
# Get the findings from the parser based on what methods the parser supplies
109109
# This could either mean traditional file parsing, or API pull parsing
110110
parsed_findings = self.parse_findings(scan, parser) or []
111-
# Suppress per-row tag-inheritance signal work during the reimport
112-
# hot loop. Inheritance is applied in bulk after the batch via
113-
# `tag_inheritance.flush_for_product`. See default_importer for the
114-
# rationale.
115-
with tag_inheritance.batch():
111+
# Open a tag-inheritance context (auto-flushes on exit). Signals
112+
# register touched instances; `ctx.flush()` mid-loop drains them
113+
# before each post-process dispatch so JIRA labels are correct.
114+
with tag_inheritance.batch() as tag_ctx:
116115
(
117116
new_findings,
118117
reactivated_findings,
@@ -150,9 +149,7 @@ def process_scan(
150149
reactivated_findings=reactivated_findings,
151150
untouched_findings=untouched_findings,
152151
)
153-
# Bulk-apply tag inheritance for all children of the product touched
154-
# by this reimport.
155-
tag_inheritance.flush_for_product(self.test.engagement.product)
152+
# Inheritance auto-flushes on context exit above.
156153
# Send out som notifications to the user
157154
logger.debug("REIMPORT_SCAN: Generating notifications")
158155
updated_count = (
@@ -436,6 +433,11 @@ def process_findings(
436433
findings_with_parser_tags.clear()
437434
finding_ids_batch = list(batch_finding_ids)
438435
batch_finding_ids.clear()
436+
# Drain the inheritance context BEFORE dispatching
437+
# post-process so the JIRA push inside that task sees
438+
# inherited tags on the findings.
439+
if (ctx := tag_inheritance.current()) is not None:
440+
ctx.flush()
439441
dojo_dispatch_task(
440442
finding_helper.post_process_findings_batch,
441443
finding_ids_batch,

dojo/tag_inheritance.py

Lines changed: 154 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1,80 +1,174 @@
11
"""
2-
Tag inheritance — central coordination module.
3-
4-
Provides a thread-local ``batch()`` context manager that suppresses
5-
per-instance inheritance work driven by ``m2m_changed`` and ``post_save``
6-
signals. While inside a batch, the signal handlers in
7-
``dojo/tags_signals.py`` early-return; the calling code is responsible for
8-
applying inheritance in bulk (e.g. via the importer's existing
9-
``_bulk_inherit_tags`` path or ``propagate_tags_on_product_sync``).
10-
11-
This replaces the previous pattern of ``signals.m2m_changed.disconnect(...)``
12-
in importer hot loops, which was process-global and unsafe under threaded
13-
gunicorn / Celery thread pools / ASGI threadpools (see PR description for
14-
the full rationale).
2+
Tag inheritance — watson-style context manager.
3+
4+
Pattern mirrors `watson.search.SearchContextManager`: signal handlers
5+
register touched instances into the active context instead of running
6+
per-row inheritance work; the context flushes them in bulk on
7+
``flush()`` (called explicitly mid-batch) and on context exit.
8+
9+
Usage:
10+
with tag_inheritance.batch() as ctx:
11+
# bulk operations create/modify many instances
12+
...
13+
ctx.flush() # optional, mid-batch sync (e.g. before JIRA push)
14+
...
15+
# auto-flushes on outermost exit
16+
17+
The context lives in ``threading.local``, so concurrent threads (and
18+
Celery workers in non-prefork pools) are unaffected by other threads'
19+
batches.
1520
"""
1621
from __future__ import annotations
1722

18-
import contextlib
23+
import logging
1924
import threading
25+
from collections import defaultdict
2026
from contextlib import contextmanager
2127

28+
logger = logging.getLogger(__name__)
29+
2230
_state = threading.local()
2331

2432

33+
class TagInheritanceContext:
34+
35+
"""
36+
Per-thread registrar for instances whose inherited tags need
37+
re-syncing in bulk. Touched instances are grouped by model class;
38+
``flush()`` runs one bulk diff per (model, product) group via the
39+
existing ``_sync_inheritance_for_qs`` helper.
40+
"""
41+
42+
def __init__(self):
43+
self._depth = 0
44+
# model_class -> set of instance pks
45+
self._touched: dict[type, set[int]] = defaultdict(set)
46+
# System-wide inheritance flag is read from the DB and cached for
47+
# the lifetime of the context. Per-product flags are read directly
48+
# off the in-memory product instance.
49+
self._system_inheritance: bool | None = None
50+
51+
def is_active(self) -> bool:
52+
return self._depth > 0
53+
54+
def system_inheritance_enabled(self) -> bool:
55+
if self._system_inheritance is None:
56+
from dojo.utils import get_system_setting # noqa: PLC0415
57+
self._system_inheritance = bool(get_system_setting("enable_product_tag_inheritance"))
58+
return self._system_inheritance
59+
60+
def is_inheritance_enabled_for(self, instance) -> bool:
61+
"""
62+
True when the given instance is under at least one product whose
63+
inheritance is enabled (per-product flag or system-wide).
64+
"""
65+
from dojo.tags_signals import get_products # noqa: PLC0415
66+
products = get_products(instance)
67+
if any(getattr(p, "enable_product_tag_inheritance", False) for p in products if p):
68+
return True
69+
return self.system_inheritance_enabled()
70+
71+
def add(self, instance) -> None:
72+
"""
73+
Register an instance for bulk-sync at next flush. No-op when
74+
inheritance is disabled for this instance, so the bulk path stays
75+
cheap on inheritance-off products.
76+
"""
77+
if instance is None or getattr(instance, "pk", None) is None:
78+
return
79+
if not self.is_inheritance_enabled_for(instance):
80+
return
81+
self._touched[type(instance)].add(instance.pk)
82+
83+
def flush(self) -> None:
84+
"""
85+
Bulk-sync inherited tags for every registered instance, then
86+
clear the registry. Idempotent and cheap when nothing was
87+
touched.
88+
"""
89+
if not self._touched:
90+
return
91+
# Local imports to avoid circulars at module import time.
92+
from dojo.location.models import Location # noqa: PLC0415
93+
from dojo.product.helpers import ( # noqa: PLC0415
94+
_build_location_target_names_map,
95+
_sync_inheritance_for_qs,
96+
)
97+
from dojo.tags_signals import get_products # noqa: PLC0415
98+
99+
touched, self._touched = self._touched, defaultdict(set)
100+
101+
for model_class, pks in touched.items():
102+
if not pks:
103+
continue
104+
queryset = model_class.objects.filter(pk__in=pks)
105+
if model_class is Location:
106+
# Location target = union of related products' tags. Use
107+
# the bulk precompute helper.
108+
target_map = _build_location_target_names_map(list(pks))
109+
_sync_inheritance_for_qs(
110+
queryset,
111+
target_names_per_child=lambda loc, _m=target_map: _m.get(loc.pk, set()),
112+
)
113+
else:
114+
# All other children belong to one product (Finding via
115+
# test, Endpoint via product, etc.). Group by product so
116+
# each group gets one target name set.
117+
instances = list(queryset)
118+
by_product: dict[int, list] = defaultdict(list)
119+
product_by_id: dict[int, object] = {}
120+
for inst in instances:
121+
products = get_products(inst)
122+
for product in products:
123+
if product is None:
124+
continue
125+
by_product[product.id].append(inst)
126+
product_by_id[product.id] = product
127+
for product_id, group in by_product.items():
128+
product = product_by_id[product_id]
129+
target_names = {tag.name for tag in product.tags.all()}
130+
_sync_inheritance_for_qs(
131+
group,
132+
target_names_per_child=lambda _c, _t=target_names: _t,
133+
)
134+
135+
136+
def current() -> TagInheritanceContext | None:
137+
"""Return the active context for this thread, if any."""
138+
return getattr(_state, "ctx", None)
139+
140+
25141
def is_in_batch() -> bool:
26142
"""Return True when the current thread is inside an active ``batch()``."""
27-
return bool(getattr(_state, "depth", 0))
143+
ctx = current()
144+
return ctx is not None and ctx.is_active()
28145

29146

30147
@contextmanager
31148
def batch():
32149
"""
33-
Suppress per-instance inheritance signals for the calling thread.
34-
35-
Usage:
36-
with tag_inheritance.batch():
37-
# Bulk operations that would otherwise fire `make_inherited_tags_sticky`
38-
# or `inherit_tags_on_instance` per row.
39-
...
40-
# After exit, callers should explicitly bulk-apply inheritance via
41-
# `propagate_tags_on_product_sync(product)` (or equivalent) for any
42-
# product whose children were created/modified inside the batch.
43-
44-
The context is reentrant; nested ``with`` blocks share the suppression
45-
until the outermost block exits. State lives in ``threading.local()``,
46-
so concurrent threads (and Celery workers in non-prefork pools) are
47-
unaffected by other threads' batches.
48-
"""
49-
_state.depth = getattr(_state, "depth", 0) + 1
50-
try:
51-
yield
52-
finally:
53-
_state.depth -= 1
54-
if _state.depth <= 0:
55-
# Clean up the attribute so leak-free thread reuse stays simple.
56-
with contextlib.suppress(AttributeError):
57-
del _state.depth
150+
Open a tag-inheritance context for the calling thread.
58151
152+
Inside the context, signal handlers register touched instances
153+
instead of running per-row inheritance. On exit, the context
154+
auto-flushes (bulk-applies inheritance for every touched instance).
59155
60-
def flush_for_product(product) -> None:
156+
Reentrant: nested ``with`` blocks share the context until the
157+
outermost block exits.
61158
"""
62-
Bulk-apply tag inheritance to all children of `product`.
63-
64-
Intended to be called after a ``batch()`` block exits, when the calling
65-
code has created/modified many children of one product and the
66-
per-instance signal handlers were suppressed. Delegates to
67-
``propagate_tags_on_product_sync``, which uses the Phase A bulk-diff
68-
helpers to sync `inherited_tags` and `tags` across all children in O(1)
69-
queries per (model x tag) instead of O(N) rows.
70-
71-
Short-circuits when tag inheritance is disabled (neither the system-wide
72-
flag nor the per-product flag is set) so callers (e.g. importers) can
73-
invoke this unconditionally without paying for it.
74-
"""
75-
# Local imports to avoid circulars at module import time.
76-
from dojo.product.helpers import propagate_tags_on_product_sync # noqa: PLC0415
77-
from dojo.utils import get_system_setting # noqa: PLC0415
78-
if not getattr(product, "enable_product_tag_inheritance", False) and not get_system_setting("enable_product_tag_inheritance"):
79-
return
80-
propagate_tags_on_product_sync(product)
159+
ctx = getattr(_state, "ctx", None)
160+
owner = ctx is None
161+
if owner:
162+
ctx = TagInheritanceContext()
163+
_state.ctx = ctx
164+
ctx._depth += 1
165+
try:
166+
yield ctx
167+
finally:
168+
ctx._depth -= 1
169+
if ctx._depth <= 0:
170+
try:
171+
ctx.flush()
172+
finally:
173+
if owner:
174+
del _state.ctx

dojo/tags_signals.py

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -33,17 +33,18 @@ def product_tags_post_add_remove(sender, instance, action, **kwargs):
3333
@receiver(signals.m2m_changed, sender=Location.tags.through)
3434
def make_inherited_tags_sticky(sender, instance, action, **kwargs):
3535
"""Make sure inherited tags are added back in if they are removed"""
36-
# Inside a `tag_inheritance.batch()` block the caller takes responsibility
37-
# for applying inheritance in bulk; per-row signal work would defeat the
38-
# purpose. This replaces the old `signals.m2m_changed.disconnect(...)`
39-
# pattern, which was process-global and unsafe under threaded workers.
40-
if tag_inheritance.is_in_batch():
36+
if action not in {"post_add", "post_remove"}:
4137
return
42-
if action in {"post_add", "post_remove"}:
43-
if inherit_product_tags(instance):
44-
tag_list = [tag.name for tag in instance.tags.all()]
45-
if propagate_inheritance(instance, tag_list=tag_list):
46-
instance.inherit_tags(tag_list)
38+
# Inside a `tag_inheritance.batch()` context, register the instance
39+
# for bulk-sync at flush/exit instead of running per-row inheritance.
40+
ctx = tag_inheritance.current()
41+
if ctx is not None and ctx.is_active():
42+
ctx.add(instance)
43+
return
44+
if inherit_product_tags(instance):
45+
tag_list = [tag.name for tag in instance.tags.all()]
46+
if propagate_inheritance(instance, tag_list=tag_list):
47+
instance.inherit_tags(tag_list)
4748

4849

4950
def inherit_instance_tags(instance):
@@ -72,18 +73,23 @@ def inherit_tags_on_instance(sender, instance, created, **kwargs):
7273
# tag edits is handled by `make_inherited_tags_sticky` (m2m_changed).
7374
if not created:
7475
return
75-
# Inside a `tag_inheritance.batch()` block the caller takes responsibility
76-
# for applying inheritance in bulk after exit (typically via
77-
# `tag_inheritance.flush_for_product`).
78-
if tag_inheritance.is_in_batch():
76+
# Inside a `tag_inheritance.batch()` context, register the new instance
77+
# for bulk-sync at flush/exit instead of running per-row inheritance.
78+
ctx = tag_inheritance.current()
79+
if ctx is not None and ctx.is_active():
80+
ctx.add(instance)
7981
return
8082
inherit_instance_tags(instance)
8183

8284

8385
@receiver(signals.post_save, sender=LocationFindingReference)
8486
@receiver(signals.post_save, sender=LocationProductReference)
8587
def inherit_tags_on_linked_instance(sender, instance, created, **kwargs):
86-
if tag_inheritance.is_in_batch():
88+
# Linked refs (LocationFinding/LocationProductReference) bind a Location
89+
# to a Finding/Product. Register the underlying Location for bulk-sync.
90+
ctx = tag_inheritance.current()
91+
if ctx is not None and ctx.is_active():
92+
ctx.add(instance.location)
8793
return
8894
inherit_linked_instance_tags(instance)
8995

unittests/test_tag_inheritance_perf.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -510,7 +510,7 @@ def test_baseline_zap_scan_reimport_no_change_v3(self):
510510
# rises because flush always runs; bulk re-merge has a fixed cost even
511511
# when there's no work. Stages 3+4+5 (drop duplicate inherited_tags M2M)
512512
# will collapse the reimport cost.
513-
EXPECTED_ZAP_IMPORT_V2 = 1006
514-
EXPECTED_ZAP_IMPORT_V3 = 947
515-
EXPECTED_ZAP_REIMPORT_NO_CHANGE_V2 = 82
516-
EXPECTED_ZAP_REIMPORT_NO_CHANGE_V3 = 103
513+
EXPECTED_ZAP_IMPORT_V2 = 1057
514+
EXPECTED_ZAP_IMPORT_V3 = 997
515+
EXPECTED_ZAP_REIMPORT_NO_CHANGE_V2 = 69
516+
EXPECTED_ZAP_REIMPORT_NO_CHANGE_V3 = 87

0 commit comments

Comments
 (0)