22import datetime
33import logging
44import time
5+ from collections import defaultdict
56
67from django .core .management .base import BaseCommand
78from django .db import connection
1011
1112from dojo .location .models import Location , LocationFindingReference , LocationProductReference
1213from dojo .location .status import FindingLocationStatus , ProductLocationStatus
13- from dojo .models import DojoMeta , Endpoint , Endpoint_Status
14+ from dojo .models import DojoMeta , Endpoint , Endpoint_Status , Product
1415from dojo .url .models import URL
1516
1617logger = logging .getLogger (__name__ )
@@ -98,6 +99,29 @@ def _bench_end(self, phase: str, t0: float) -> None:
9899 self .timings [phase ] += time .perf_counter () - t0
99100 self .counts [phase ] += 1
100101
102+ # -- Tag inheritance bookkeeping -----------------------------------------
103+
104+ def _track_product_location (self , product : Product , location : Location ) -> None :
105+ """
106+ Record a (product, location) pair for the post-migration tag inheritance pass.
107+
108+ The migration creates locations that may be linked to multiple products
109+ (via the endpoint's own product and via each finding's product). We
110+ collect every contributing product per location so the post-pass can
111+ call ``LocationManager(product)._bulk_inherit_tags(locations)`` once
112+ per product group — covering the case where a location is shared
113+ across products with differing ``enable_product_tag_inheritance``
114+ flags (the helper short-circuits via its own diff check on repeat
115+ visits, so redundancy is safe).
116+ """
117+ if product is None or product .id is None :
118+ return
119+ if location is None or location .id is None :
120+ return
121+ self .locations_by_product_id [product .id ].add (location .id )
122+ self .product_obj_by_id .setdefault (product .id , product )
123+ self .location_obj_by_id .setdefault (location .id , location )
124+
101125 # -- Migration logic --------------------------------------------------
102126
103127 def _endpoint_to_url (self , endpoint : Endpoint ) -> Location :
@@ -197,6 +221,10 @@ def _associate_location_with_findings(self, endpoint: Endpoint, location: Locati
197221 if finding is None :
198222 continue
199223 product = finding .test .engagement .product
224+ # Track this contributing product for the post-migration tag
225+ # inheritance pass (covers the case where a finding's product
226+ # differs from endpoint.product).
227+ self ._track_product_location (product , location )
200228 status = self ._convert_endpoint_status_to_string_status (endpoint_status )
201229 # Endpoint_Status.date is a Date; the original code persisted
202230 # the same midnight-aware datetime in a post-save UPDATE. We
@@ -298,6 +326,55 @@ def _print_benchmark_summary(self, total_endpoints: int, total_seconds: float) -
298326 f"{ (total_seconds * 1000.0 / total_endpoints if total_endpoints else 0 ):>18.2f} "
299327 f"{ '100.0%' :>10} " )
300328
329+ # -- Post-migration tag inheritance --------------------------------------
330+
331+ def _run_tag_inheritance (self ) -> None :
332+ """
333+ Drive `LocationManager._bulk_inherit_tags` once per contributing product.
334+
335+ Each `LocationManager` call is wrapped in its own try/except so a
336+ failure on one product group doesn't prevent the rest from running —
337+ same philosophy as the per-endpoint loop. Tag inheritance is a
338+ purely additive post-pass; the underlying location/reference rows
339+ are already committed by the main loop, so partial failure here
340+ leaves a consistent (if incomplete-inheritance) state that a
341+ targeted re-run can finish.
342+ """
343+ if not self .locations_by_product_id :
344+ return
345+
346+ # Lazy import: dojo.importers.* pulls in a lot of modules and we
347+ # don't want it loaded at management-command discovery time.
348+ from dojo .importers .location_manager import LocationManager # noqa: PLC0415
349+
350+ t0 = time .time ()
351+ n_products = len (self .locations_by_product_id )
352+ n_pairs = sum (len (loc_ids ) for loc_ids in self .locations_by_product_id .values ())
353+ n_unique_locations = len (self .location_obj_by_id )
354+ n_failures = 0
355+ for prod_id , loc_ids in self .locations_by_product_id .items ():
356+ product = self .product_obj_by_id [prod_id ]
357+ locations = [self .location_obj_by_id [lid ] for lid in loc_ids ]
358+ try :
359+ LocationManager (product )._bulk_inherit_tags (locations )
360+ except Exception :
361+ logger .exception (
362+ "Tag inheritance pass failed for product id=%s "
363+ "(%d location(s)); continuing with remaining products" ,
364+ prod_id , len (locations ),
365+ )
366+ n_failures += 1
367+ elapsed = time .time () - t0
368+ msg = (
369+ f"Tag inheritance pass: visited { n_pairs :,} (product, location) pair(s) "
370+ f"across { n_products :,} product(s), { n_unique_locations :,} unique location(s), "
371+ f"in { elapsed :.2f} s"
372+ )
373+ if n_failures :
374+ self .stdout .write (self .style .WARNING (f"{ msg } — { n_failures } product group(s) failed" ))
375+ else :
376+ self .stdout .write (self .style .SUCCESS (msg ))
377+
301378 # -- handle ---------------------------------------------------------------
302379
303380 def handle (self , * args , ** options ):
@@ -310,6 +387,21 @@ def handle(self, *args, **options):
310387 self .timings = dict .fromkeys (PHASES , 0.0 )
311388 self .counts = dict .fromkeys (PHASES , 0 )
312389
390+ # Bookkeeping for the post-migration tag inheritance pass.
391+ # `locations_by_product_id` maps product.id -> set of location.ids
392+ # contributed by that product (via endpoint.product OR finding.test.
393+ # engagement.product). We hold the Product/Location objects in
394+ # parallel maps so the post-pass can hand them directly to
395+ # `LocationManager(product)._bulk_inherit_tags(locations)` without
396+ # extra DB lookups.
397+ self .locations_by_product_id : dict [int , set [int ]] = defaultdict (set )
398+ self .product_obj_by_id : dict [int , Product ] = {}
399+ self .location_obj_by_id : dict [int , Location ] = {}
400+
401+ # Collected per-endpoint failures so a single bad row doesn't abort
402+ # a multi-hour migration. Each entry is (endpoint_id, exception_str).
403+ self .failed_endpoints : list [tuple [int | None , str ]] = []
404+
313405 if self .query_count :
314406 connection .force_debug_cursor = True
315407 queries_at_chunk_start = len (connection .queries )
@@ -360,11 +452,29 @@ def handle(self, *args, **options):
360452 # prefetch will start incrementing.
361453 self ._bench_end ("fetch_endpoint" , t_fetch )
362454
363- # Get the URL object first
364- location = self ._endpoint_to_url (endpoint )
365- # Associate the URL with the findings associated with the Findings
366- # the association to a finding will also apply to a product automatically
367- self ._associate_location_with_findings (endpoint , location )
455+ # Wrap the per-endpoint work so one failure doesn't abort a
456+ # multi-hour migration. We log the full traceback and record
457+ # the endpoint id, then continue. The bulk_create-based hot
458+ # path makes partial-state on failure unlikely (each phase
459+ # is its own bulk insert), and any rows that DID land remain
460+ # valid and idempotent on re-run.
461+ try :
462+ # Get the URL object first
463+ location = self ._endpoint_to_url (endpoint )
464+ # Track the endpoint's own product as a contributor for the
465+ # post-migration tag inheritance pass (the no-findings
466+ # branch of _associate_location_with_findings also depends
467+ # on this product, and it won't be tracked otherwise).
468+ if endpoint .product_id :
469+ self ._track_product_location (endpoint .product , location )
470+ # Associate the URL with the findings associated with the Findings
471+ # the association to a finding will also apply to a product automatically
472+ self ._associate_location_with_findings (endpoint , location )
473+ except Exception as exc :
474+ endpoint_id = getattr (endpoint , "id" , None )
475+ logger .exception ("Failed to migrate endpoint id=%s; continuing" , endpoint_id )
476+ self .failed_endpoints .append ((endpoint_id , str (exc )))
477+ continue
368478
369479 # Progress report every --progress-every endpoints
370480 if i % self .progress_every == 0 :
@@ -378,10 +488,31 @@ def handle(self, *args, **options):
378488 self ._log_progress (i , endpoint_count , run_t0 , queries_in_chunk )
379489
380490 elapsed = time .time () - run_t0
491+ successful = i - len (self .failed_endpoints )
381492 self .stdout .write (self .style .SUCCESS (
382- f"Done. Migrated { i :,} endpoints in { self ._fmt_duration (elapsed )} "
493+ f"Done. Migrated { successful :, } / { i :,} endpoints in { self ._fmt_duration (elapsed )} "
383494 f"({ (i / elapsed if elapsed else 0 ):.2f} endpoints/sec)." ,
384495 ))
496+ if self .failed_endpoints :
497+ preview_ids = [eid for eid , _ in self .failed_endpoints [:10 ]]
498+ self .stdout .write (self .style .WARNING (
499+ f"{ len (self .failed_endpoints ):,} endpoint(s) failed; see logger output above "
500+ f"for tracebacks. First failing endpoint IDs: { preview_ids } " ,
501+ ))
502+
503+ # Run the post-migration tag inheritance pass. `bulk_create` skips
504+ # the `inherit_tags_on_linked_instance` post_save signal, so for
505+ # deployments with `enable_product_tag_inheritance` enabled (per
506+ # product or system-wide) the migrated Locations would otherwise
507+ # not pick up inherited product tags. We grouped (product,
508+ # location) pairs during the main loop and now drive
509+ # `LocationManager._bulk_inherit_tags` once per contributing
510+ # product. The helper rediscovers each location's full product
511+ # set via LocationProductReference/LocationFindingReference and
512+ # diff-checks before writing, so revisits of shared locations
513+ # across product groups are idempotent.
514+ self ._run_tag_inheritance ()
515+
385516 self ._print_benchmark_summary (i , elapsed )
386517
387518 if self .query_count :
0 commit comments