Skip to content

Commit b571e35

Browse files
perf(tags): skip redundant inheritance on no-change reimport
Two gates eliminate ~14 wasted queries on a reimport that creates no new findings or location refs (the common "scheduled rescan, nothing changed" path): 1. `DefaultReImporter.process_findings` now tracks newly-created findings in `new_findings_in_batch` (populated in the else branch of the matched/unmatched dispatch) and passes ONLY those to `apply_inherited_tags_for_findings`. Matched/existing findings already had inheritance applied at their original creation, so re-running the through-table read + Location prefetch chain on them is pure overhead. 2. `LocationManager._persist_locations` now skips `apply_inherited_tags_for_locations` when no new `LocationProductReference` rows were created. New `LocationFindingReference`s only add findings within `self._product`, so they can't change a Location's Product membership; only a new product ref can. When `all_product_refs` is empty, the Location's inherited target set is unchanged and the helper would do a costly no-op read for nothing. Net effect on the pinned ZAP reimport-no-change baselines: - V2: 75 → 69 (matches the pre-Phase-A baseline of 69) - V3: 101 → 81 (beats the pre-Phase-A baseline of 87)
1 parent bf0912b commit b571e35

3 files changed

Lines changed: 28 additions & 10 deletions

File tree

dojo/importers/default_reimporter.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,11 @@ def _process_findings_internal(
317317

318318
batch_finding_ids: list[int] = []
319319
batch_findings: list[Finding] = []
320+
# Findings that were newly created (else branch below) — pass these to
321+
# `apply_inherited_tags_for_findings` instead of `batch_findings` so
322+
# matched/existing findings (which already have correct inherited tags)
323+
# don't trigger a redundant through-table read on no-change reimports.
324+
new_findings_in_batch: list[Finding] = []
320325
findings_with_parser_tags: list[tuple] = []
321326
# Batch size for deduplication/post-processing (only new findings)
322327
dedupe_batch_max_size = getattr(settings, "IMPORT_REIMPORT_DEDUPE_BATCH_SIZE", 1000)
@@ -399,6 +404,8 @@ def _process_findings_internal(
399404
candidates_by_uid,
400405
candidates_by_key,
401406
)
407+
if finding:
408+
new_findings_in_batch.append(finding)
402409

403410
# This condition __appears__ to always be true, but am afraid to remove it
404411
if finding:
@@ -437,10 +444,14 @@ def _process_findings_internal(
437444
findings_with_parser_tags.clear()
438445
# Apply import-time tags before post-processing so rules/deduplication see them.
439446
self.apply_import_tags_for_batch(batch_findings)
440-
# Apply inherited Product tags to this batch's findings (and
441-
# their endpoints/locations) BEFORE post_process_findings_batch
447+
# Apply inherited Product tags to NEWLY CREATED findings only
448+
# (and their endpoints/locations) BEFORE post_process_findings_batch
442449
# dispatches, so rules/dedup see inherited tags on .tags.
443-
apply_inherited_tags_for_findings(batch_findings)
450+
# Matched/existing findings already have inheritance applied from
451+
# their original creation; re-running it on no-change reimports
452+
# would be ~8 wasted queries per batch.
453+
apply_inherited_tags_for_findings(new_findings_in_batch)
454+
new_findings_in_batch.clear()
444455
batch_findings.clear()
445456
finding_ids_batch = list(batch_finding_ids)
446457
batch_finding_ids.clear()

dojo/importers/location_manager.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -210,11 +210,18 @@ def _persist_locations(self) -> None:
210210
all_product_refs, batch_size=1000, ignore_conflicts=True,
211211
)
212212

213-
# Trigger bulk tag inheritance
214-
tag_inheritance.apply_inherited_tags_for_locations(
215-
[loc.location for loc in saved],
216-
product=self._product,
217-
)
213+
# Trigger bulk tag inheritance only when the Location's product
214+
# membership actually changed. New product refs are the only thing
215+
# that can add a Product to a Location's inherited-tags target set
216+
# (new finding refs are always to findings in `self._product`, so
217+
# they don't introduce a new Product); skipping when `all_product_refs`
218+
# is empty avoids the through-table read on no-change reimports.
219+
if all_product_refs:
220+
new_ref_location_ids = {ref.location_id for ref in all_product_refs}
221+
tag_inheritance.apply_inherited_tags_for_locations(
222+
[loc.location for loc in saved if loc.location_id in new_ref_location_ids],
223+
product=self._product,
224+
)
218225

219226
# Clear accumulators
220227
self._locations_by_finding.clear()

unittests/test_tag_inheritance_perf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -546,5 +546,5 @@ def test_baseline_zap_scan_reimport_no_change_v3(self):
546546
# eliminating the threading bug; full Phase B reductions land in Stage 2.
547547
EXPECTED_ZAP_IMPORT_V2 = 420
548548
EXPECTED_ZAP_IMPORT_V3 = 444
549-
EXPECTED_ZAP_REIMPORT_NO_CHANGE_V2 = 74
550-
EXPECTED_ZAP_REIMPORT_NO_CHANGE_V3 = 100
549+
EXPECTED_ZAP_REIMPORT_NO_CHANGE_V2 = 69
550+
EXPECTED_ZAP_REIMPORT_NO_CHANGE_V3 = 81

0 commit comments

Comments
 (0)