Skip to content

Commit 5f7e0b0

Browse files
Maffoochclaude
andcommitted
refactor: consolidate audit-log code into dojo/auditlog/ package
Move dojo/auditlog.py (1,083 lines), dojo/pghistory_models.py, and dojo/pghistory_utils.py into a self-contained dojo/auditlog/ package that mirrors the dojo/url/ canonical layout from CLAUDE.md. Also pulls in the action_history view, its template, and the LogEntryFilter / PgHistoryFilter that previously lived in dojo/views.py, dojo/templates/, and dojo/filters.py. Layout: - models.py DojoEvents proxy model (app_label dropped, auto-inferred) - services.py pghistory registration, retention/cleanup, config - helpers.py display formatting + Celery context wrappers - backfill.py backfill subsystem (used by migrations + commands) - filters.py LogEntryFilter, PgHistoryFilter - settings.py env-var schema + pghistory field defaults (source of truth) - ui/views.py action_history view - ui/urls.py action_history route (name preserved) - templates/dojo/action_history.html Backward-compatible re-exports stay at dojo/pghistory_models.py, dojo/pghistory_utils.py, and dojo/filters.py per the playbook, so the 21 existing call sites keep working without churn. dojo/auditlog.py and the original template are deleted (the package replaces the file). Notable details: - __init__.py uses PEP 562 lazy attribute resolution. settings.dist.py imports dojo.auditlog.settings at Django settings-load time, before django.conf.settings is built; eager re-exports would pull in django-dependent submodules and circularly import settings. - DojoEvents drops the explicit app_label = "dojo" per CLAUDE.md ("DO NOT set app_label in Meta — auto-inferred"). makemigrations --check reports zero new migrations. - Defensive try/except blocks in services and backfill are dropped per the user's directive — let pgtrigger / COPY failures surface loudly rather than silently swallowing them. - settings.dist.py becomes a consumer of dojo.auditlog.settings: imports the env-schema dict (splatted into environ.Env via **AUDITLOG_ENV_SCHEMA), the three PGHISTORY_*_FIELD constants, and AUDITLOG_DISABLE_ON_RAW_SAVE; adds dojo/auditlog/templates to TEMPLATES[0]["DIRS"] since auditlog isn't a separate Django app. - unittests/test_auditlog.py: two @patch targets updated from dojo.auditlog.call_command to dojo.auditlog.services.call_command, reflecting where the import now lives. Verified end-to-end against the running stack: - python manage.py check -> no issues - python manage.py makemigrations --check -> No changes detected - unittests.test_auditlog -> 7/7 pass - unittests.test_importers_performance -> 10/10 pass - reverse('action_history', cid=1, oid=2) -> /history/1/2 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 5178368 commit 5f7e0b0

19 files changed

Lines changed: 1492 additions & 1480 deletions

dojo/auditlog.py

Lines changed: 0 additions & 1083 deletions
This file was deleted.

dojo/auditlog/__init__.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
"""
2+
Audit-log package for DefectDojo.
3+
4+
The public API is exposed lazily via PEP 562 so that importing
5+
``dojo.auditlog`` (and especially ``dojo.auditlog.settings``) at Django
6+
settings-load time does not pull in submodules that depend on
7+
``django.conf.settings`` or other not-yet-built parts of the dojo app.
8+
"""
9+
import importlib
10+
11+
_LAZY_EXPORTS = { # noqa: RUF067 -- table backs the PEP 562 __getattr__ re-export
12+
"run_flush_auditlog": "dojo.auditlog.services",
13+
"configure_audit_system": "dojo.auditlog.services",
14+
"configure_pghistory_triggers": "dojo.auditlog.services",
15+
"register_django_pghistory_models": "dojo.auditlog.services",
16+
"process_events_for_display": "dojo.auditlog.helpers",
17+
"get_tracked_models": "dojo.auditlog.backfill",
18+
"process_model_backfill": "dojo.auditlog.backfill",
19+
}
20+
21+
22+
def __getattr__(name):
23+
module_path = _LAZY_EXPORTS.get(name)
24+
if module_path is None:
25+
msg = f"module 'dojo.auditlog' has no attribute {name!r}"
26+
raise AttributeError(msg)
27+
return getattr(importlib.import_module(module_path), name)

dojo/auditlog/backfill.py

Lines changed: 361 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,361 @@
1+
"""
2+
Backfill operations for pghistory event tables.
3+
4+
Used by management commands and one-shot data migrations. Errors propagate
5+
to the caller so misconfigured backfills fail loudly.
6+
"""
7+
import logging
8+
import time
9+
10+
from django.db import connection
11+
from django.utils import timezone
12+
13+
logger = logging.getLogger(__name__)
14+
15+
16+
def get_excluded_fields(model_name):
17+
"""Get the list of excluded fields for a specific model from pghistory configuration."""
18+
excluded_fields_map = {
19+
"Dojo_User": ["password"],
20+
"Product": ["updated"],
21+
"Cred_User": ["password"],
22+
"Notification_Webhooks": ["header_name", "header_value"],
23+
}
24+
return excluded_fields_map.get(model_name, [])
25+
26+
27+
def get_table_names(model_name):
28+
"""Get the source table name and event table name for a model."""
29+
if model_name == "Dojo_User":
30+
table_name = "dojo_dojo_user"
31+
event_table_name = "dojo_dojo_userevent"
32+
elif model_name == "Product_Type":
33+
table_name = "dojo_product_type"
34+
event_table_name = "dojo_product_typeevent"
35+
elif model_name == "Finding_Group":
36+
table_name = "dojo_finding_group"
37+
event_table_name = "dojo_finding_groupevent"
38+
elif model_name == "Risk_Acceptance":
39+
table_name = "dojo_risk_acceptance"
40+
event_table_name = "dojo_risk_acceptanceevent"
41+
elif model_name == "Finding_Template":
42+
table_name = "dojo_finding_template"
43+
event_table_name = "dojo_finding_templateevent"
44+
elif model_name == "Cred_User":
45+
table_name = "dojo_cred_user"
46+
event_table_name = "dojo_cred_userevent"
47+
elif model_name == "Notification_Webhooks":
48+
table_name = "dojo_notification_webhooks"
49+
event_table_name = "dojo_notification_webhooksevent"
50+
elif model_name == "FindingReviewers":
51+
# M2M through table: Django creates dojo_finding_reviewers for Finding.reviewers
52+
table_name = "dojo_finding_reviewers"
53+
event_table_name = "dojo_finding_reviewersevent"
54+
# Tag through tables (tagulous auto-generated)
55+
elif model_name == "FindingTags":
56+
table_name = "dojo_finding_tags"
57+
event_table_name = "dojo_finding_tagsevent"
58+
elif model_name == "FindingInheritedTags":
59+
table_name = "dojo_finding_inherited_tags"
60+
event_table_name = "dojo_finding_inherited_tagsevent"
61+
elif model_name == "ProductTags":
62+
table_name = "dojo_product_tags"
63+
event_table_name = "dojo_product_tagsevent"
64+
elif model_name == "EngagementTags":
65+
table_name = "dojo_engagement_tags"
66+
event_table_name = "dojo_engagement_tagsevent"
67+
elif model_name == "EngagementInheritedTags":
68+
table_name = "dojo_engagement_inherited_tags"
69+
event_table_name = "dojo_engagement_inherited_tagsevent"
70+
elif model_name == "TestTags":
71+
table_name = "dojo_test_tags"
72+
event_table_name = "dojo_test_tagsevent"
73+
elif model_name == "TestInheritedTags":
74+
table_name = "dojo_test_inherited_tags"
75+
event_table_name = "dojo_test_inherited_tagsevent"
76+
elif model_name == "EndpointTags":
77+
table_name = "dojo_endpoint_tags"
78+
event_table_name = "dojo_endpoint_tagsevent"
79+
elif model_name == "EndpointInheritedTags":
80+
table_name = "dojo_endpoint_inherited_tags"
81+
event_table_name = "dojo_endpoint_inherited_tagsevent"
82+
elif model_name == "FindingTemplateTags":
83+
table_name = "dojo_finding_template_tags"
84+
event_table_name = "dojo_finding_template_tagsevent"
85+
elif model_name == "AppAnalysisTags":
86+
table_name = "dojo_app_analysis_tags"
87+
event_table_name = "dojo_app_analysis_tagsevent"
88+
elif model_name == "ObjectsProductTags":
89+
table_name = "dojo_objects_product_tags"
90+
event_table_name = "dojo_objects_product_tagsevent"
91+
else:
92+
table_name = f"dojo_{model_name.lower()}"
93+
event_table_name = f"dojo_{model_name.lower()}event"
94+
return table_name, event_table_name
95+
96+
97+
def check_tables_exist(table_name, event_table_name):
98+
"""Check if both source and event tables exist."""
99+
with connection.cursor() as cursor:
100+
cursor.execute("""
101+
SELECT EXISTS (
102+
SELECT FROM information_schema.tables
103+
WHERE table_name = %s
104+
)
105+
""", [table_name])
106+
table_exists = cursor.fetchone()[0]
107+
108+
cursor.execute("""
109+
SELECT EXISTS (
110+
SELECT FROM information_schema.tables
111+
WHERE table_name = %s
112+
)
113+
""", [event_table_name])
114+
event_table_exists = cursor.fetchone()[0]
115+
116+
return table_exists, event_table_exists
117+
118+
119+
def process_model_backfill(
120+
model_name,
121+
batch_size=10000,
122+
*,
123+
dry_run=False,
124+
progress_callback=None,
125+
):
126+
"""
127+
Process a single model's backfill using PostgreSQL COPY.
128+
129+
Args:
130+
model_name: Name of the model to backfill
131+
batch_size: Number of records to process in each batch
132+
dry_run: If True, only show what would be done without creating events
133+
progress_callback: Optional callable that receives (message, style) tuples
134+
for progress updates. If None, uses logger.info
135+
136+
Returns:
137+
tuple: (processed_count, records_per_second)
138+
139+
"""
140+
if progress_callback is None:
141+
def progress_callback(msg, style=None):
142+
if style == "ERROR":
143+
logger.error(msg)
144+
elif style == "WARNING":
145+
logger.warning(msg)
146+
elif style == "SUCCESS":
147+
logger.info(msg)
148+
elif style == "DEBUG":
149+
logger.debug(msg)
150+
else:
151+
logger.info(msg)
152+
153+
table_name, event_table_name = get_table_names(model_name)
154+
155+
table_exists, event_table_exists = check_tables_exist(table_name, event_table_name)
156+
157+
if not table_exists:
158+
progress_callback(f" Table {table_name} not found")
159+
return 0, 0.0
160+
161+
if not event_table_exists:
162+
progress_callback(
163+
f" Event table {event_table_name} not found. "
164+
f"Is {model_name} tracked by pghistory?",
165+
"DEBUG",
166+
)
167+
return 0, 0.0
168+
169+
with connection.cursor() as cursor:
170+
cursor.execute(f"SELECT COUNT(*) FROM {table_name}")
171+
total_count = cursor.fetchone()[0]
172+
173+
if total_count == 0:
174+
progress_callback(f" No records found for {model_name}")
175+
return 0, 0.0
176+
177+
progress_callback(f" Found {total_count:,} records")
178+
179+
excluded_fields = get_excluded_fields(model_name)
180+
181+
with connection.cursor() as cursor:
182+
cursor.execute(f"SELECT COUNT(*) FROM {event_table_name} WHERE pgh_label = 'initial_backfill'")
183+
existing_count = cursor.fetchone()[0]
184+
185+
with connection.cursor() as cursor:
186+
cursor.execute(f"""
187+
SELECT COUNT(*) FROM {table_name} t
188+
WHERE NOT EXISTS (
189+
SELECT 1 FROM {event_table_name} e
190+
WHERE e.pgh_obj_id = t.id AND e.pgh_label = 'initial_backfill'
191+
)
192+
""")
193+
backfill_count = cursor.fetchone()[0]
194+
195+
progress_callback(f" Records with initial_backfill events: {existing_count:,}")
196+
progress_callback(f" Records needing initial_backfill events: {backfill_count:,}")
197+
198+
if backfill_count == 0:
199+
progress_callback(f" ✓ All {total_count:,} records already have initial_backfill events", "SUCCESS")
200+
return total_count, 0.0
201+
202+
if dry_run:
203+
progress_callback(f" Would process {backfill_count:,} records using COPY...")
204+
return backfill_count, 0.0
205+
206+
with connection.cursor() as cursor:
207+
cursor.execute("""
208+
SELECT column_name
209+
FROM information_schema.columns
210+
WHERE table_name = %s AND column_name != 'pgh_id'
211+
ORDER BY ordinal_position
212+
""", [event_table_name])
213+
event_columns = [row[0] for row in cursor.fetchall()]
214+
215+
with connection.cursor() as cursor:
216+
cursor.execute(f"""
217+
SELECT t.id FROM {table_name} t
218+
WHERE NOT EXISTS (
219+
SELECT 1 FROM {event_table_name} e
220+
WHERE e.pgh_obj_id = t.id AND e.pgh_label = 'initial_backfill'
221+
)
222+
ORDER BY t.id
223+
""")
224+
ids_to_process = [row[0] for row in cursor.fetchall()]
225+
226+
if not ids_to_process:
227+
progress_callback(" No records need backfill")
228+
return 0, 0.0
229+
230+
processed = 0
231+
batch_start_time = time.time()
232+
model_start_time = time.time()
233+
234+
with connection.cursor() as cursor:
235+
cursor.execute("""
236+
SELECT column_name
237+
FROM information_schema.columns
238+
WHERE table_name = %s
239+
ORDER BY ordinal_position
240+
""", [table_name])
241+
source_columns = [row[0] for row in cursor.fetchall()]
242+
243+
source_columns = [col for col in source_columns if col not in excluded_fields]
244+
245+
if "id" in source_columns:
246+
id_column_index = source_columns.index("id")
247+
else:
248+
id_column_index = 0
249+
progress_callback(" Warning: 'id' column not found in source columns, using first column", "WARNING")
250+
251+
for i in range(0, len(ids_to_process), batch_size):
252+
batch_ids = ids_to_process[i:i + batch_size]
253+
254+
if i > 0 and i % (batch_size * 10) == 0:
255+
progress_callback(f" Processing batch starting at index {i:,}...")
256+
257+
columns_str = ", ".join(source_columns)
258+
placeholders = ", ".join(["%s"] * len(batch_ids))
259+
query = f"""
260+
SELECT {columns_str} FROM {table_name} t
261+
WHERE t.id IN ({placeholders})
262+
ORDER BY t.id
263+
"""
264+
265+
with connection.cursor() as cursor:
266+
cursor.execute(query, batch_ids)
267+
batch_rows = cursor.fetchall()
268+
269+
if not batch_rows:
270+
progress_callback(f" No records found for batch at index {i}")
271+
continue
272+
273+
with connection.cursor() as cursor:
274+
raw_cursor = cursor.cursor
275+
copy_sql = f"COPY {event_table_name} ({', '.join(event_columns)}) FROM STDIN WITH (FORMAT text, DELIMITER E'\\t')"
276+
277+
records = []
278+
for row in batch_rows:
279+
row_data = []
280+
281+
source_values = {}
282+
for idx, value in enumerate(row):
283+
field_name = source_columns[idx]
284+
source_values[field_name] = value
285+
286+
for col in event_columns:
287+
if col == "pgh_created_at":
288+
row_data.append(timezone.now())
289+
elif col == "pgh_label":
290+
row_data.append("initial_backfill")
291+
elif col == "pgh_obj_id":
292+
row_data.append(row[id_column_index] if row[id_column_index] is not None else None)
293+
elif col == "pgh_context_id":
294+
row_data.append(None)
295+
elif col in source_values:
296+
row_data.append(source_values[col])
297+
else:
298+
row_data.append(None)
299+
300+
records.append(tuple(row_data))
301+
302+
with raw_cursor.copy(copy_sql) as copy:
303+
for record in records:
304+
copy.write_row(record)
305+
progress_callback(" COPY operation completed using write_row")
306+
307+
raw_cursor.connection.commit()
308+
309+
raw_cursor.execute(f"SELECT COUNT(*) FROM {event_table_name} WHERE pgh_label = 'initial_backfill'")
310+
count = raw_cursor.fetchone()[0]
311+
progress_callback(f" Records in event table after batch: {count}")
312+
313+
batch_processed = len(batch_rows)
314+
processed += batch_processed
315+
316+
batch_end_time = time.time()
317+
batch_duration = batch_end_time - batch_start_time
318+
batch_records_per_second = batch_processed / batch_duration if batch_duration > 0 else 0
319+
320+
progress = (processed / backfill_count) * 100
321+
progress_callback(
322+
f" Processed {processed:,}/{backfill_count:,} records ({progress:.1f}%) - "
323+
f"Last batch: {batch_duration:.2f}s ({batch_records_per_second:.1f} records/sec)",
324+
)
325+
326+
batch_start_time = time.time()
327+
328+
model_end_time = time.time()
329+
total_duration = model_end_time - model_start_time
330+
records_per_second = processed / total_duration if total_duration > 0 else 0
331+
332+
progress_callback(
333+
f" ✓ Completed {model_name}: {processed:,} records in {total_duration:.2f}s "
334+
f"({records_per_second:.1f} records/sec)",
335+
"SUCCESS",
336+
)
337+
return processed, records_per_second
338+
339+
340+
def get_tracked_models():
341+
"""Get the list of models tracked by pghistory."""
342+
return [
343+
"Dojo_User", "Endpoint", "Engagement", "Finding", "Finding_Group",
344+
"Product_Type", "Product", "Test", "Risk_Acceptance",
345+
"Finding_Template", "Cred_User", "Notification_Webhooks",
346+
"FindingReviewers", # M2M through table for Finding.reviewers
347+
"Location", "URL",
348+
# Tag through tables (tagulous auto-generated)
349+
"FindingTags",
350+
"FindingInheritedTags",
351+
"ProductTags",
352+
"EngagementTags",
353+
"EngagementInheritedTags",
354+
"TestTags",
355+
"TestInheritedTags",
356+
"EndpointTags",
357+
"EndpointInheritedTags",
358+
"FindingTemplateTags",
359+
"AppAnalysisTags",
360+
"ObjectsProductTags",
361+
]

0 commit comments

Comments
 (0)