Skip to content

Commit e97662d

Browse files
Maffoochclaude
andcommitted
✨ migrate_endpoints_to_locations: tag inheritance + per-endpoint error isolation
`bulk_create` (introduced in the prior perf commit) skips the `inherit_tags_on_linked_instance` post_save signal, so deployments with `enable_product_tag_inheritance` enabled (per-product or system-wide) would not pick up inherited product tags on migrated Locations. Track (product, location) pairs during the main loop — covering both `endpoint.product` and `finding.test.engagement.product` — and run a post-pass that calls `LocationManager(product)._bulk_inherit_tags(locations)` once per contributing product. The helper rediscovers each location's full product set via LocationProductReference/LocationFindingReference and diff-checks before writing, so revisits of shared locations across product groups are idempotent. ~5 queries per product group vs ~3 per location for a per-location `inherit_tags()` loop. Also wrap the per-endpoint body in a `try`/`except Exception` so a single bad row doesn't abort a multi-hour migration. Failures get logged with full traceback and tracked in `self.failed_endpoints`; the final "Done." line reports `<successful>/<total>` and a yellow warning lists the first 10 failing IDs. `KeyboardInterrupt` / `SystemExit` are not swallowed. The post-pass uses the same pattern per product group. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 141b1e2 commit e97662d

1 file changed

Lines changed: 138 additions & 7 deletions

File tree

dojo/management/commands/migrate_endpoints_to_locations.py

Lines changed: 138 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import datetime
33
import logging
44
import time
5+
from collections import defaultdict
56

67
from django.core.management.base import BaseCommand
78
from django.db import connection
@@ -10,7 +11,7 @@
1011

1112
from dojo.location.models import Location, LocationFindingReference, LocationProductReference
1213
from dojo.location.status import FindingLocationStatus, ProductLocationStatus
13-
from dojo.models import DojoMeta, Endpoint, Endpoint_Status
14+
from dojo.models import DojoMeta, Endpoint, Endpoint_Status, Product
1415
from dojo.url.models import URL
1516

1617
logger = logging.getLogger(__name__)
@@ -98,6 +99,29 @@ def _bench_end(self, phase: str, t0: float) -> None:
9899
self.timings[phase] += time.perf_counter() - t0
99100
self.counts[phase] += 1
100101

102+
# -- Tag inheritance bookkeeping -----------------------------------------
103+
104+
def _track_product_location(self, product: Product, location: Location) -> None:
105+
"""
106+
Record a (product, location) pair for the post-migration tag inheritance pass.
107+
108+
The migration creates locations that may be linked to multiple products
109+
(via the endpoint's own product and via each finding's product). We
110+
collect every contributing product per location so the post-pass can
111+
call ``LocationManager(product)._bulk_inherit_tags(locations)`` once
112+
per product group — covering the case where a location is shared
113+
across products with differing ``enable_product_tag_inheritance``
114+
flags (the helper short-circuits via its own diff check on repeat
115+
visits, so redundancy is safe).
116+
"""
117+
if product is None or product.id is None:
118+
return
119+
if location is None or location.id is None:
120+
return
121+
self.locations_by_product_id[product.id].add(location.id)
122+
self.product_obj_by_id.setdefault(product.id, product)
123+
self.location_obj_by_id.setdefault(location.id, location)
124+
101125
# -- Migration logic --------------------------------------------------
102126

103127
def _endpoint_to_url(self, endpoint: Endpoint) -> Location:
@@ -197,6 +221,10 @@ def _associate_location_with_findings(self, endpoint: Endpoint, location: Locati
197221
if finding is None:
198222
continue
199223
product = finding.test.engagement.product
224+
# Track this contributing product for the post-migration tag
225+
# inheritance pass (covers the case where a finding's product
226+
# differs from endpoint.product).
227+
self._track_product_location(product, location)
200228
status = self._convert_endpoint_status_to_string_status(endpoint_status)
201229
# Endpoint_Status.date is a Date; the original code persisted
202230
# the same midnight-aware datetime in a post-save UPDATE. We
@@ -298,6 +326,55 @@ def _print_benchmark_summary(self, total_endpoints: int, total_seconds: float) -
298326
f"{(total_seconds * 1000.0 / total_endpoints if total_endpoints else 0):>18.2f}"
299327
f"{'100.0%':>10}")
300328

329+
# -- Post-migration tag inheritance --------------------------------------
330+
331+
def _run_tag_inheritance(self) -> None:
332+
"""
333+
Drive `LocationManager._bulk_inherit_tags` once per contributing product.
334+
335+
Each `LocationManager` call is wrapped in its own try/except so a
336+
failure on one product group doesn't prevent the rest from running —
337+
same philosophy as the per-endpoint loop. Tag inheritance is a
338+
purely additive post-pass; the underlying location/reference rows
339+
are already committed by the main loop, so partial failure here
340+
leaves a consistent (if incomplete-inheritance) state that a
341+
targeted re-run can finish.
342+
"""
343+
if not self.locations_by_product_id:
344+
return
345+
346+
# Lazy import: dojo.importers.* pulls in a lot of modules and we
347+
# don't want it loaded at management-command discovery time.
348+
from dojo.importers.location_manager import LocationManager # noqa: PLC0415
349+
350+
t0 = time.time()
351+
n_products = len(self.locations_by_product_id)
352+
n_pairs = sum(len(loc_ids) for loc_ids in self.locations_by_product_id.values())
353+
n_unique_locations = len(self.location_obj_by_id)
354+
n_failures = 0
355+
for prod_id, loc_ids in self.locations_by_product_id.items():
356+
product = self.product_obj_by_id[prod_id]
357+
locations = [self.location_obj_by_id[lid] for lid in loc_ids]
358+
try:
359+
LocationManager(product)._bulk_inherit_tags(locations)
360+
except Exception:
361+
logger.exception(
362+
"Tag inheritance pass failed for product id=%s "
363+
"(%d location(s)); continuing with remaining products",
364+
prod_id, len(locations),
365+
)
366+
n_failures += 1
367+
elapsed = time.time() - t0
368+
msg = (
369+
f"Tag inheritance pass: visited {n_pairs:,} (product, location) pair(s) "
370+
f"across {n_products:,} product(s), {n_unique_locations:,} unique location(s), "
371+
f"in {elapsed:.2f}s"
372+
)
373+
if n_failures:
374+
self.stdout.write(self.style.WARNING(f"{msg}{n_failures} product group(s) failed"))
375+
else:
376+
self.stdout.write(self.style.SUCCESS(msg))
377+
301378
# -- handle ---------------------------------------------------------------
302379

303380
def handle(self, *args, **options):
@@ -310,6 +387,21 @@ def handle(self, *args, **options):
310387
self.timings = dict.fromkeys(PHASES, 0.0)
311388
self.counts = dict.fromkeys(PHASES, 0)
312389

390+
# Bookkeeping for the post-migration tag inheritance pass.
391+
# `locations_by_product_id` maps product.id -> set of location.ids
392+
# contributed by that product (via endpoint.product OR finding.test.
393+
# engagement.product). We hold the Product/Location objects in
394+
# parallel maps so the post-pass can hand them directly to
395+
# `LocationManager(product)._bulk_inherit_tags(locations)` without
396+
# extra DB lookups.
397+
self.locations_by_product_id: dict[int, set[int]] = defaultdict(set)
398+
self.product_obj_by_id: dict[int, Product] = {}
399+
self.location_obj_by_id: dict[int, Location] = {}
400+
401+
# Collected per-endpoint failures so a single bad row doesn't abort
402+
# a multi-hour migration. Each entry is (endpoint_id, exception_str).
403+
self.failed_endpoints: list[tuple[int | None, str]] = []
404+
313405
if self.query_count:
314406
connection.force_debug_cursor = True
315407
queries_at_chunk_start = len(connection.queries)
@@ -360,11 +452,29 @@ def handle(self, *args, **options):
360452
# prefetch will start incrementing.
361453
self._bench_end("fetch_endpoint", t_fetch)
362454

363-
# Get the URL object first
364-
location = self._endpoint_to_url(endpoint)
365-
# Associate the URL with the findings associated with the Findings
366-
# the association to a finding will also apply to a product automatically
367-
self._associate_location_with_findings(endpoint, location)
455+
# Wrap the per-endpoint work so one failure doesn't abort a
456+
# multi-hour migration. We log the full traceback and record
457+
# the endpoint id, then continue. The bulk_create-based hot
458+
# path makes partial-state on failure unlikely (each phase
459+
# is its own bulk insert), and any rows that DID land remain
460+
# valid and idempotent on re-run.
461+
try:
462+
# Get the URL object first
463+
location = self._endpoint_to_url(endpoint)
464+
# Track the endpoint's own product as a contributor for the
465+
# post-migration tag inheritance pass (the no-findings
466+
# branch of _associate_location_with_findings also depends
467+
# on this product, and it won't be tracked otherwise).
468+
if endpoint.product_id:
469+
self._track_product_location(endpoint.product, location)
470+
# Associate the URL with the findings associated with the Findings
471+
# the association to a finding will also apply to a product automatically
472+
self._associate_location_with_findings(endpoint, location)
473+
except Exception as exc:
474+
endpoint_id = getattr(endpoint, "id", None)
475+
logger.exception("Failed to migrate endpoint id=%s; continuing", endpoint_id)
476+
self.failed_endpoints.append((endpoint_id, str(exc)))
477+
continue
368478

369479
# Progress report every --progress-every endpoints
370480
if i % self.progress_every == 0:
@@ -378,10 +488,31 @@ def handle(self, *args, **options):
378488
self._log_progress(i, endpoint_count, run_t0, queries_in_chunk)
379489

380490
elapsed = time.time() - run_t0
491+
successful = i - len(self.failed_endpoints)
381492
self.stdout.write(self.style.SUCCESS(
382-
f"Done. Migrated {i:,} endpoints in {self._fmt_duration(elapsed)} "
493+
f"Done. Migrated {successful:,}/{i:,} endpoints in {self._fmt_duration(elapsed)} "
383494
f"({(i / elapsed if elapsed else 0):.2f} endpoints/sec).",
384495
))
496+
if self.failed_endpoints:
497+
preview_ids = [eid for eid, _ in self.failed_endpoints[:10]]
498+
self.stdout.write(self.style.WARNING(
499+
f"{len(self.failed_endpoints):,} endpoint(s) failed; see logger output above "
500+
f"for tracebacks. First failing endpoint IDs: {preview_ids}",
501+
))
502+
503+
# Run the post-migration tag inheritance pass. `bulk_create` skips
504+
# the `inherit_tags_on_linked_instance` post_save signal, so for
505+
# deployments with `enable_product_tag_inheritance` enabled (per
506+
# product or system-wide) the migrated Locations would otherwise
507+
# not pick up inherited product tags. We grouped (product,
508+
# location) pairs during the main loop and now drive
509+
# `LocationManager._bulk_inherit_tags` once per contributing
510+
# product. The helper rediscovers each location's full product
511+
# set via LocationProductReference/LocationFindingReference and
512+
# diff-checks before writing, so revisits of shared locations
513+
# across product groups are idempotent.
514+
self._run_tag_inheritance()
515+
385516
self._print_benchmark_summary(i, elapsed)
386517

387518
if self.query_count:

0 commit comments

Comments
 (0)