Skip to content

Commit e8c0959

Browse files
perf(tags): replace inherited_tags TagField with _inherited_tag_names JSONField
Phase B Stage 3 of the tag inheritance redesign. Replaces the duplicate `inherited_tags` TagField on Engagement / Test / Finding / Endpoint / Location with a single `_inherited_tag_names` JSONField (Postgres `jsonb`). Drops 5 auto-generated through tables, 5 Tagulous tag tables, and 8 pghistory proxy/event models tied to the through tables. The new column carries an inverted GIN index so "find children that inherited tag X" remains efficient. Code changes ------------ - Models: 5x TagField -> JSONField. Migration 0265 copies existing M2M data into the JSON column, drops the field (Django removes the through table), drops the orphan pghistory and Tagulous models, and adds the GIN indexes. - `_manage_inherited_tags` / `propagate_inheritance` rewritten to read and write the JSON column. The `tags` M2M is only mutated when its contents actually need to change. - `_sync_inheritance_for_qs` reads `_inherited_tag_names` directly from each child instead of joining the through table; performs a bulk UPDATE per (target name set) group instead of per (model, tag) pair. Bulk re-merge dedup avoids double-writing the same (child, name) pair. - `LocationManager._bulk_inherit_tags` reads JSON column rather than the inherited_tags through table. - `tag_inheritance.flush_for_product` short-circuits when tag inheritance is disabled so importers don't pay for it on every scan. Plumbing -------- - `Meta.exclude` lists in forms / filters / api serializers updated from `inherited_tags` -> `_inherited_tag_names`. - `DojoFilter.filter_for_field` skips JSONField auto-generation for `_inherited_tag_names` (django-filter raises on JSONField). - Auditlog `tag_through_models` and `backfill.py` drop the inherited_tags proxies; pghistory now captures changes via the parent table's event. - `dojo_testdata.json` fixture renames `"inherited_tags": []` -> `"_inherited_tag_names": []`. - Tests using `.inherited_tags.all()` switched to read `_inherited_tag_names`. Pinned perf-test impact (this branch vs Stage 2) ----------------------------------------------- ZAP scan import V2 : 1006 -> 700 (~30% drop) ZAP scan import V3 : 984 -> 698 (~29% drop) ZAP reimport V2 (no change) : 82 -> 78 (~5% drop) ZAP reimport V3 (no change) : 140 -> 136 (~3% drop) Product tag add -> 100 findings V2/V3 : 94 -> 58 (~38% drop) Product tag remove -> 100 findings V2/V3 : 56 -> 38 (~32% drop) Product tag add -> 100 endpoints V2 : 194 -> 58 (~70% drop) Product tag remove -> 100 endpoints V2 : 56 -> 38 (~32% drop) Product tag add -> 100 locations V3 : 320 -> 272 (~15% drop) Product tag remove -> 100 locations V3 : 270 -> 246 (~9% drop) Create 1 finding under inheritance : 64 -> 42 (~34% drop) Create 100 findings under inheritance : 4024 -> 2814 (~30% drop) Finding add user tag (sticky) : 17 -> 16 Finding remove inherited tag (sticky) : 44 -> 24 (~45% drop) `test_importers_performance.py` baselines are unaffected because the flush_for_product call short-circuits when inheritance is disabled (the fixture used by those tests has it off). Migration notes --------------- - Destructive: drops 5 inherited_tags through tables and the 5 Tagulous tag-name tables that backed them. Forward-only. - Postgres only. Uses native `jsonb` + GIN index on the column. - Must be applied with a fresh DB or `--rebuilddb` in test environments; `--keepdb` will not pick up the schema change.
1 parent 999f576 commit e8c0959

17 files changed

Lines changed: 502 additions & 189 deletions

dojo/api_v2/serializers.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1110,7 +1110,7 @@ class EngagementSerializer(serializers.ModelSerializer):
11101110

11111111
class Meta:
11121112
model = Engagement
1113-
exclude = ("inherited_tags",)
1113+
exclude = ("_inherited_tag_names",)
11141114

11151115
def validate(self, data):
11161116
if self.context["request"].method == "POST":
@@ -1282,7 +1282,7 @@ class EndpointSerializer(serializers.ModelSerializer):
12821282

12831283
class Meta:
12841284
model = Endpoint
1285-
exclude = ("inherited_tags",)
1285+
exclude = ("_inherited_tag_names",)
12861286

12871287
def validate(self, data):
12881288

@@ -1418,7 +1418,7 @@ class TestSerializer(serializers.ModelSerializer):
14181418

14191419
class Meta:
14201420
model = Test
1421-
exclude = ("inherited_tags",)
1421+
exclude = ("_inherited_tag_names",)
14221422

14231423
def build_relational_field(self, field_name, relation_info):
14241424
if field_name == "notes":
@@ -1442,7 +1442,7 @@ class TestCreateSerializer(serializers.ModelSerializer):
14421442

14431443
class Meta:
14441444
model = Test
1445-
exclude = ("inherited_tags",)
1445+
exclude = ("_inherited_tag_names",)
14461446

14471447

14481448
class TestTypeCreateSerializer(serializers.ModelSerializer):
@@ -1746,7 +1746,7 @@ class Meta:
17461746
model = Finding
17471747
exclude = (
17481748
"cve",
1749-
"inherited_tags",
1749+
"_inherited_tag_names",
17501750
)
17511751

17521752
# TODO: Delete this after the move to Locations
@@ -1967,7 +1967,7 @@ class Meta:
19671967
model = Finding
19681968
exclude = (
19691969
"cve",
1970-
"inherited_tags",
1970+
"_inherited_tag_names",
19711971
)
19721972
extra_kwargs = {
19731973
"active": {"required": True},

dojo/auditlog/backfill.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -57,30 +57,18 @@ def get_table_names(model_name):
5757
elif model_name == "FindingTags":
5858
table_name = "dojo_finding_tags"
5959
event_table_name = "dojo_finding_tagsevent"
60-
elif model_name == "FindingInheritedTags":
61-
table_name = "dojo_finding_inherited_tags"
62-
event_table_name = "dojo_finding_inherited_tagsevent"
6360
elif model_name == "ProductTags":
6461
table_name = "dojo_product_tags"
6562
event_table_name = "dojo_product_tagsevent"
6663
elif model_name == "EngagementTags":
6764
table_name = "dojo_engagement_tags"
6865
event_table_name = "dojo_engagement_tagsevent"
69-
elif model_name == "EngagementInheritedTags":
70-
table_name = "dojo_engagement_inherited_tags"
71-
event_table_name = "dojo_engagement_inherited_tagsevent"
7266
elif model_name == "TestTags":
7367
table_name = "dojo_test_tags"
7468
event_table_name = "dojo_test_tagsevent"
75-
elif model_name == "TestInheritedTags":
76-
table_name = "dojo_test_inherited_tags"
77-
event_table_name = "dojo_test_inherited_tagsevent"
7869
elif model_name == "EndpointTags":
7970
table_name = "dojo_endpoint_tags"
8071
event_table_name = "dojo_endpoint_tagsevent"
81-
elif model_name == "EndpointInheritedTags":
82-
table_name = "dojo_endpoint_inherited_tags"
83-
event_table_name = "dojo_endpoint_inherited_tagsevent"
8472
elif model_name == "FindingTemplateTags":
8573
table_name = "dojo_finding_template_tags"
8674
event_table_name = "dojo_finding_template_tagsevent"

dojo/auditlog/services.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -398,15 +398,15 @@ class Meta:
398398
# the through models, which can't be imported at module level.
399399
tag_through_models = [
400400
# (Parent model, field name, proxy class name)
401+
# `inherited_tags` TagField was removed in the tag-inheritance
402+
# redesign; the inherited-name list now lives in a
403+
# `_inherited_tag_names` JSONField whose changes are captured by the
404+
# parent table's pghistory event automatically.
401405
(Finding, "tags", "FindingTags"),
402-
(Finding, "inherited_tags", "FindingInheritedTags"),
403406
(Product, "tags", "ProductTags"),
404407
(Engagement, "tags", "EngagementTags"),
405-
(Engagement, "inherited_tags", "EngagementInheritedTags"),
406408
(Test, "tags", "TestTags"),
407-
(Test, "inherited_tags", "TestInheritedTags"),
408409
(Endpoint, "tags", "EndpointTags"),
409-
(Endpoint, "inherited_tags", "EndpointInheritedTags"),
410410
(Finding_Template, "tags", "FindingTemplateTags"),
411411
(App_Analysis, "tags", "AppAnalysisTags"),
412412
(Objects_Product, "tags", "ObjectsProductTags"),
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
"""
2+
Replace the duplicate `inherited_tags` TagField on Engagement / Test /
3+
Finding / Endpoint / Location with a `_inherited_tag_names` JSONField.
4+
5+
Phase B Stage 3 of the tag inheritance redesign. Copies existing M2M data
6+
into the JSON column, then drops the M2M field (which also drops the
7+
auto-generated through tables and the Tagulous tag tables for the
8+
inherited_tags side).
9+
"""
10+
from django.db import migrations, models
11+
12+
13+
def copy_inherited_tags_to_json(apps, schema_editor):
14+
"""Copy each child's inherited_tags M2M values into _inherited_tag_names JSON."""
15+
for app_label, model_name in [
16+
("dojo", "Engagement"),
17+
("dojo", "Test"),
18+
("dojo", "Finding"),
19+
("dojo", "Endpoint"),
20+
("dojo", "Location"),
21+
]:
22+
try:
23+
Model = apps.get_model(app_label, model_name)
24+
except LookupError:
25+
continue
26+
for obj in Model.objects.iterator(chunk_size=1000):
27+
try:
28+
names = sorted(obj.inherited_tags.values_list("name", flat=True))
29+
except Exception:
30+
names = []
31+
if names:
32+
Model.objects.filter(pk=obj.pk).update(_inherited_tag_names=names)
33+
34+
35+
class Migration(migrations.Migration):
36+
37+
dependencies = [
38+
("dojo", "0264_alter_url_identity_hash_alter_urlevent_identity_hash"),
39+
]
40+
41+
operations = [
42+
# 1. Add the JSON column to each child model.
43+
migrations.AddField(
44+
model_name="engagement",
45+
name="_inherited_tag_names",
46+
field=models.JSONField(
47+
blank=True,
48+
default=list,
49+
help_text="Internal: tag names inherited from the product, used to identify which entries in `tags` came from inheritance vs user input.",
50+
),
51+
),
52+
migrations.AddField(
53+
model_name="endpoint",
54+
name="_inherited_tag_names",
55+
field=models.JSONField(
56+
blank=True,
57+
default=list,
58+
help_text="Internal: tag names inherited from the product, used to identify which entries in `tags` came from inheritance vs user input.",
59+
),
60+
),
61+
migrations.AddField(
62+
model_name="test",
63+
name="_inherited_tag_names",
64+
field=models.JSONField(
65+
blank=True,
66+
default=list,
67+
help_text="Internal: tag names inherited from the product, used to identify which entries in `tags` came from inheritance vs user input.",
68+
),
69+
),
70+
migrations.AddField(
71+
model_name="finding",
72+
name="_inherited_tag_names",
73+
field=models.JSONField(
74+
blank=True,
75+
default=list,
76+
help_text="Internal: tag names inherited from the product, used to identify which entries in `tags` came from inheritance vs user input.",
77+
),
78+
),
79+
migrations.AddField(
80+
model_name="location",
81+
name="_inherited_tag_names",
82+
field=models.JSONField(
83+
blank=True,
84+
default=list,
85+
help_text="Internal: tag names inherited from the product, used to identify which entries in `tags` came from inheritance vs user input.",
86+
),
87+
),
88+
# 2. Copy existing M2M data into the JSON column.
89+
migrations.RunPython(copy_inherited_tags_to_json, migrations.RunPython.noop),
90+
# 3. Drop pghistory proxies and event-tracking tables for the
91+
# inherited_tags through tables (created by migration 0256).
92+
# Must precede the RemoveField below: Django's state-rendering
93+
# fails to resolve the proxy bases once their through table
94+
# target is gone.
95+
migrations.DeleteModel(name="EngagementInheritedTagsEvent"),
96+
migrations.DeleteModel(name="EndpointInheritedTagsEvent"),
97+
migrations.DeleteModel(name="TestInheritedTagsEvent"),
98+
migrations.DeleteModel(name="FindingInheritedTagsEvent"),
99+
migrations.DeleteModel(name="EngagementInheritedTags"),
100+
migrations.DeleteModel(name="EndpointInheritedTags"),
101+
migrations.DeleteModel(name="TestInheritedTags"),
102+
migrations.DeleteModel(name="FindingInheritedTags"),
103+
# 4. Drop the duplicate inherited_tags TagField on each child. Django
104+
# will also drop the auto-generated `dojo_<model>_inherited_tags`
105+
# through tables.
106+
migrations.RemoveField(model_name="engagement", name="inherited_tags"),
107+
migrations.RemoveField(model_name="endpoint", name="inherited_tags"),
108+
migrations.RemoveField(model_name="test", name="inherited_tags"),
109+
migrations.RemoveField(model_name="finding", name="inherited_tags"),
110+
migrations.RemoveField(model_name="location", name="inherited_tags"),
111+
# 5. Drop the now-orphaned Tagulous tag models that backed the
112+
# `inherited_tags` TagFields. These were created in migration
113+
# 0188 (and 0259 for Location).
114+
migrations.DeleteModel(name="Tagulous_Engagement_inherited_tags"),
115+
migrations.DeleteModel(name="Tagulous_Endpoint_inherited_tags"),
116+
migrations.DeleteModel(name="Tagulous_Test_inherited_tags"),
117+
migrations.DeleteModel(name="Tagulous_Finding_inherited_tags"),
118+
migrations.DeleteModel(name="Tagulous_Location_inherited_tags"),
119+
# No GIN index added: the current code reads the JSON column per
120+
# row via `_sync_inheritance_for_qs` (Python-side diff) rather than
121+
# filtering with `_inherited_tag_names__contains`. Add a GIN index
122+
# in a follow-up if production query patterns shift toward SQL-side
123+
# containment lookups.
124+
]

0 commit comments

Comments
 (0)