Skip to content

Commit 336cd7f

Browse files
feat: Add pghistory tracking for tag fields (#14116)
* pghistory: add tag tracking + ui improvements * simplify ui strings * simplify ui strings * move logic * display reviewers names * add ui elements properly * catch and log errors * catch and log errors
1 parent 80f8110 commit 336cd7f

7 files changed

Lines changed: 1036 additions & 44 deletions

File tree

dojo/auditlog.py

Lines changed: 248 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import os
99
import sys
1010
import time
11+
from collections import defaultdict
1112

1213
import pghistory
1314
from dateutil.relativedelta import relativedelta
@@ -24,6 +25,153 @@
2425
# Django's app registry to be ready (AppRegistryNotReady error)
2526
# The function is called from DojoAppConfig.ready() which guarantees the registry is ready
2627

28+
# Populated by register_django_pghistory_models() - maps proxy_name -> (parent_model, field_name)
29+
TAG_MODEL_MAPPING = {}
30+
31+
32+
def _reconstruct_object_str(model_name: str, pgh_data: dict, obj_id: int) -> str:
33+
"""Reconstruct object string representation from pgh_data snapshot."""
34+
if not pgh_data:
35+
return f"{model_name} #{obj_id}" if obj_id else "N/A"
36+
37+
model_lower = model_name.lower()
38+
39+
# Model-specific reconstruction based on __str__ implementations
40+
if model_lower in {"finding", "finding_template"}:
41+
if pgh_data.get("title"):
42+
return str(pgh_data["title"])
43+
elif model_lower == "engagement":
44+
name = pgh_data.get("name", "")
45+
if name:
46+
return f"Engagement {obj_id}: {name}"
47+
elif model_lower == "dojo_user":
48+
first = pgh_data.get("first_name", "")
49+
last = pgh_data.get("last_name", "")
50+
if first or last:
51+
return f"{first} {last}".strip()
52+
if pgh_data.get("username"):
53+
return pgh_data["username"]
54+
elif model_lower in {"product", "product_type", "finding_group", "test_type"}:
55+
if pgh_data.get("name"):
56+
return str(pgh_data["name"])
57+
elif model_lower == "test":
58+
if pgh_data.get("title"):
59+
return pgh_data["title"]
60+
elif model_lower == "endpoint":
61+
if pgh_data.get("host"):
62+
return pgh_data["host"]
63+
64+
# Fallback: try common fields
65+
for field in ["title", "name", "username", "label", "host"]:
66+
if pgh_data.get(field):
67+
return str(pgh_data[field])
68+
69+
return f"{model_name} #{obj_id}" if obj_id else "N/A"
70+
71+
72+
def process_events_for_display(events):
73+
"""Process events to add object_str and object_url."""
74+
# Import here to avoid circular imports
75+
from dojo.models import Dojo_User # noqa: PLC0415
76+
77+
ids_by_model = defaultdict(set)
78+
user_ids = set()
79+
tag_ids_by_model = defaultdict(set)
80+
81+
# First pass: collect IDs
82+
for event in events:
83+
if not hasattr(event, "pgh_obj_model") or not event.pgh_obj_model:
84+
continue
85+
model_name = event.pgh_obj_model.split(".")[-1]
86+
pgh_data = getattr(event, "pgh_data", None) or {}
87+
obj_id = getattr(event, "pgh_obj_id", None)
88+
89+
if model_name == "FindingReviewers":
90+
if user_id := pgh_data.get("dojo_user_id"):
91+
user_ids.add(int(user_id))
92+
elif model_name in TAG_MODEL_MAPPING:
93+
# Find tag ID from pgh_data (key starts with "tagulous_" and ends with "_id")
94+
for key, value in pgh_data.items():
95+
if key.startswith("tagulous_") and key.endswith("_id") and value:
96+
tag_ids_by_model[model_name].add(int(value))
97+
break
98+
elif obj_id:
99+
ids_by_model[model_name].add(int(obj_id))
100+
101+
# Batch fetch model instances
102+
instances_cache = {}
103+
for model_name, obj_ids in ids_by_model.items():
104+
if obj_ids:
105+
try:
106+
model_class = apps.get_model("dojo", model_name)
107+
instances_cache[model_name] = {
108+
obj.id: obj for obj in model_class.objects.filter(id__in=obj_ids)
109+
}
110+
except LookupError:
111+
pass
112+
113+
# Batch fetch users for FindingReviewers
114+
users_cache = {}
115+
if user_ids:
116+
users_cache = {u.id: u for u in Dojo_User.objects.filter(id__in=user_ids)}
117+
118+
# Batch fetch tags per model type
119+
tags_cache = {}
120+
for model_name, tag_ids in tag_ids_by_model.items():
121+
if tag_ids and model_name in TAG_MODEL_MAPPING:
122+
parent_model, field_name = TAG_MODEL_MAPPING[model_name]
123+
tag_model = parent_model._meta.get_field(field_name).remote_field.model
124+
tags_cache[model_name] = {t.id: t.name for t in tag_model.objects.filter(id__in=tag_ids)}
125+
126+
# Second pass: annotate events
127+
for event in events:
128+
try:
129+
if not hasattr(event, "pgh_obj_model") or not event.pgh_obj_model:
130+
event.object_str = "N/A"
131+
event.object_url = None
132+
continue
133+
134+
model_name = event.pgh_obj_model.split(".")[-1]
135+
pgh_data = getattr(event, "pgh_data", None) or {}
136+
obj_id = getattr(event, "pgh_obj_id", None)
137+
obj_id_int = int(obj_id) if obj_id else None
138+
139+
if model_name == "FindingReviewers":
140+
user_id = pgh_data.get("dojo_user_id")
141+
user = users_cache.get(int(user_id)) if user_id else None
142+
if user:
143+
event.object_str = f"Reviewer: {user.get_full_name() or user.username}"
144+
else:
145+
event.object_str = f"FindingReviewers #{obj_id}"
146+
event.object_url = None
147+
elif model_name in TAG_MODEL_MAPPING:
148+
# Find tag name from cache
149+
tag_name = None
150+
for key, value in pgh_data.items():
151+
if key.startswith("tagulous_") and key.endswith("_id") and value:
152+
tag_name = tags_cache.get(model_name, {}).get(int(value))
153+
break
154+
if tag_name:
155+
event.object_str = f"Tag: {tag_name}"
156+
else:
157+
event.object_str = f"{model_name} #{obj_id}"
158+
event.object_url = None
159+
else:
160+
instance = instances_cache.get(model_name, {}).get(obj_id_int)
161+
if instance:
162+
event.object_str = str(instance)
163+
event.object_url = instance.get_absolute_url() if hasattr(instance, "get_absolute_url") else None
164+
else:
165+
event.object_str = _reconstruct_object_str(model_name, pgh_data, obj_id)
166+
event.object_url = None
167+
except Exception:
168+
# Fallback if anything fails
169+
logger.debug("Error processing event: %s", event, exc_info=True)
170+
event.object_str = f"{getattr(event, 'pgh_obj_model', 'Unknown')} #{getattr(event, 'pgh_obj_id', '?')}"
171+
event.object_url = None
172+
173+
return events
174+
27175

28176
def _flush_models_in_batches(models_to_flush, timestamp_field: str, retention_period: int, batch_size: int, max_batches: int, *, dry_run: bool = False) -> tuple[int, int, bool]:
29177
"""
@@ -145,6 +293,7 @@ def register_django_pghistory_models():
145293
"""
146294
# Import models inside function to avoid AppRegistryNotReady errors
147295
from dojo.models import ( # noqa: PLC0415
296+
App_Analysis,
148297
Cred_User,
149298
Dojo_User,
150299
Endpoint,
@@ -153,6 +302,7 @@ def register_django_pghistory_models():
153302
Finding_Group,
154303
Finding_Template,
155304
Notification_Webhooks,
305+
Objects_Product,
156306
Product,
157307
Product_Type,
158308
Risk_Acceptance,
@@ -376,6 +526,54 @@ class Meta:
376526
},
377527
)(FindingReviewers)
378528

529+
# Track tag through models for all TagField relationships
530+
# Must use proxy pattern like FindingReviewers because tagulous auto-generates
531+
# through models that cannot be imported at module level
532+
tag_through_models = [
533+
# (Parent model, field name, proxy class name)
534+
(Finding, "tags", "FindingTags"),
535+
(Finding, "inherited_tags", "FindingInheritedTags"),
536+
(Product, "tags", "ProductTags"),
537+
(Engagement, "tags", "EngagementTags"),
538+
(Engagement, "inherited_tags", "EngagementInheritedTags"),
539+
(Test, "tags", "TestTags"),
540+
(Test, "inherited_tags", "TestInheritedTags"),
541+
(Endpoint, "tags", "EndpointTags"),
542+
(Endpoint, "inherited_tags", "EndpointInheritedTags"),
543+
(Finding_Template, "tags", "FindingTemplateTags"),
544+
(App_Analysis, "tags", "AppAnalysisTags"),
545+
(Objects_Product, "tags", "ObjectsProductTags"),
546+
]
547+
548+
for parent_model, field_name, proxy_name in tag_through_models:
549+
# Populate the mapping for use in process_events_for_display
550+
TAG_MODEL_MAPPING[proxy_name] = (parent_model, field_name)
551+
552+
through_model = parent_model._meta.get_field(field_name).remote_field.through
553+
554+
# Create proxy class dynamically
555+
proxy_class = type(proxy_name, (through_model,), {
556+
"__module__": __name__,
557+
"Meta": type("Meta", (), {"proxy": True}),
558+
})
559+
560+
# Derive event table name from through table name
561+
db_table = through_model._meta.db_table + "event"
562+
563+
pghistory.track(
564+
pghistory.InsertEvent(),
565+
pghistory.DeleteEvent(),
566+
pghistory.ManualEvent(label="initial_backfill"),
567+
meta={
568+
"db_table": db_table,
569+
"indexes": [
570+
models.Index(fields=["pgh_created_at"]),
571+
models.Index(fields=["pgh_label"]),
572+
models.Index(fields=["pgh_context_id"]),
573+
],
574+
},
575+
)(proxy_class)
576+
379577
# Only log during actual application startup, not during shell commands
380578
if "shell" not in sys.argv:
381579
logger.info("Successfully registered models with django-pghistory")
@@ -488,6 +686,43 @@ def get_table_names(model_name):
488686
# M2M through table: Django creates dojo_finding_reviewers for Finding.reviewers
489687
table_name = "dojo_finding_reviewers"
490688
event_table_name = "dojo_finding_reviewersevent"
689+
# Tag through tables (tagulous auto-generated)
690+
elif model_name == "FindingTags":
691+
table_name = "dojo_finding_tags"
692+
event_table_name = "dojo_finding_tagsevent"
693+
elif model_name == "FindingInheritedTags":
694+
table_name = "dojo_finding_inherited_tags"
695+
event_table_name = "dojo_finding_inherited_tagsevent"
696+
elif model_name == "ProductTags":
697+
table_name = "dojo_product_tags"
698+
event_table_name = "dojo_product_tagsevent"
699+
elif model_name == "EngagementTags":
700+
table_name = "dojo_engagement_tags"
701+
event_table_name = "dojo_engagement_tagsevent"
702+
elif model_name == "EngagementInheritedTags":
703+
table_name = "dojo_engagement_inherited_tags"
704+
event_table_name = "dojo_engagement_inherited_tagsevent"
705+
elif model_name == "TestTags":
706+
table_name = "dojo_test_tags"
707+
event_table_name = "dojo_test_tagsevent"
708+
elif model_name == "TestInheritedTags":
709+
table_name = "dojo_test_inherited_tags"
710+
event_table_name = "dojo_test_inherited_tagsevent"
711+
elif model_name == "EndpointTags":
712+
table_name = "dojo_endpoint_tags"
713+
event_table_name = "dojo_endpoint_tagsevent"
714+
elif model_name == "EndpointInheritedTags":
715+
table_name = "dojo_endpoint_inherited_tags"
716+
event_table_name = "dojo_endpoint_inherited_tagsevent"
717+
elif model_name == "FindingTemplateTags":
718+
table_name = "dojo_finding_template_tags"
719+
event_table_name = "dojo_finding_template_tagsevent"
720+
elif model_name == "AppAnalysisTags":
721+
table_name = "dojo_app_analysis_tags"
722+
event_table_name = "dojo_app_analysis_tagsevent"
723+
elif model_name == "ObjectsProductTags":
724+
table_name = "dojo_objects_product_tags"
725+
event_table_name = "dojo_objects_product_tagsevent"
491726
else:
492727
table_name = f"dojo_{model_name.lower()}"
493728
event_table_name = f"dojo_{model_name.lower()}event"
@@ -791,4 +1026,17 @@ def get_tracked_models():
7911026
"Product_Type", "Product", "Test", "Risk_Acceptance",
7921027
"Finding_Template", "Cred_User", "Notification_Webhooks",
7931028
"FindingReviewers", # M2M through table for Finding.reviewers
1029+
# Tag through tables (tagulous auto-generated)
1030+
"FindingTags",
1031+
"FindingInheritedTags",
1032+
"ProductTags",
1033+
"EngagementTags",
1034+
"EngagementInheritedTags",
1035+
"TestTags",
1036+
"TestInheritedTags",
1037+
"EndpointTags",
1038+
"EndpointInheritedTags",
1039+
"FindingTemplateTags",
1040+
"AppAnalysisTags",
1041+
"ObjectsProductTags",
7941042
]

0 commit comments

Comments
 (0)