@@ -90,9 +90,12 @@ def set_duplicate(new_finding, existing_finding):
9090 new_finding .duplicate_finding = existing_finding
9191
9292 # Make sure transitive duplication is flattened
93- # if A -> B and B is made a duplicate of C here, aferwards :
93+ # if A -> B and B is made a duplicate of C here, afterwards :
9494 # A -> C and B -> C should be true
95- for find in new_finding .original_finding .all ().order_by ("-id" ):
95+ # Ordering is ensured by the prefetch in post_process_findings_batch
96+ # (we prefetch "original_finding" ordered by -id), so avoid calling
97+ # order_by here to prevent bypassing the prefetch cache.
98+ for find in new_finding .original_finding .all ():
9699 new_finding .original_finding .remove (find )
97100 set_duplicate (find , existing_finding )
98101 existing_finding .found_by .add (new_finding .test .test_type )
@@ -181,10 +184,14 @@ def build_dedupe_scope_queryset(test):
181184 | Q (test__engagement__deduplication_on_engagement = False )
182185 )
183186
184- return Finding .objects .filter (scope_q )
187+ return (
188+ Finding .objects .filter (scope_q )
189+ .select_related ("test" , "test__engagement" , "test__test_type" )
190+ .prefetch_related ("endpoints" )
191+ )
185192
186193
187- def find_candidates_for_deduplication_hash (test , findings , * , include_product_scope_filter ):
194+ def find_candidates_for_deduplication_hash (test , findings ):
188195 base_queryset = build_dedupe_scope_queryset (test )
189196 hash_codes = {f .hash_code for f in findings if getattr (f , "hash_code" , None ) is not None }
190197 if not hash_codes :
@@ -202,7 +209,7 @@ def find_candidates_for_deduplication_hash(test, findings, *, include_product_sc
202209 return existing_by_hash
203210
204211
205- def find_candidates_for_deduplication_unique_id (test , findings , * , include_product_scope_filter ):
212+ def find_candidates_for_deduplication_unique_id (test , findings ):
206213 base_queryset = build_dedupe_scope_queryset (test )
207214 unique_ids = {f .unique_id_from_tool for f in findings if getattr (f , "unique_id_from_tool" , None ) is not None }
208215 if not unique_ids :
@@ -250,7 +257,7 @@ def deduplicate_uid_or_hash_code_old(new_finding):
250257 continue
251258
252259
253- def find_candidates_for_deduplication_uid_or_hash (test , findings , * , include_product_scope_filter ):
260+ def find_candidates_for_deduplication_uid_or_hash (test , findings ):
254261 base_queryset = build_dedupe_scope_queryset (test )
255262 hash_codes = {f .hash_code for f in findings if getattr (f , "hash_code" , None ) is not None }
256263 unique_ids = {f .unique_id_from_tool for f in findings if getattr (f , "unique_id_from_tool" , None ) is not None }
@@ -279,22 +286,15 @@ def find_candidates_for_deduplication_uid_or_hash(test, findings, *, include_pro
279286 return existing_by_uid , existing_by_hash
280287
281288
282- def find_candidates_for_deduplication_legacy (test , findings , * , include_product_scope_filter ):
289+ def find_candidates_for_deduplication_legacy (test , findings ):
283290 base_queryset = build_dedupe_scope_queryset (test )
284291 titles = {f .title for f in findings if getattr (f , "title" , None )}
285292 cwes = {f .cwe for f in findings if getattr (f , "cwe" , 0 )}
286293 cwes .discard (0 )
287294 if not titles and not cwes :
288295 return {}, {}
289296
290- existing_qs = base_queryset .filter (Q (title__in = titles ) | Q (cwe__in = cwes )).exclude (duplicate = True ).prefetch_related (
291- "endpoints" ,
292- "test" ,
293- "test__engagement" ,
294- "found_by" ,
295- "original_finding" ,
296- "test__test_type" ,
297- ).order_by ("id" )
297+ existing_qs = base_queryset .filter (Q (title__in = titles ) | Q (cwe__in = cwes )).exclude (duplicate = True ).order_by ("id" )
298298
299299 by_title = {}
300300 by_cwe = {}
@@ -436,7 +436,7 @@ def _dedupe_batch_hash_code(findings):
436436 if not findings :
437437 return
438438 test = findings [0 ].test
439- candidates_by_hash = find_candidates_for_deduplication_hash (test , findings , include_product_scope_filter = True )
439+ candidates_by_hash = find_candidates_for_deduplication_hash (test , findings )
440440 if not candidates_by_hash :
441441 return
442442 for new_finding in findings :
@@ -453,7 +453,7 @@ def _dedupe_batch_unique_id(findings):
453453 if not findings :
454454 return
455455 test = findings [0 ].test
456- candidates_by_uid = find_candidates_for_deduplication_unique_id (test , findings , include_product_scope_filter = True )
456+ candidates_by_uid = find_candidates_for_deduplication_unique_id (test , findings )
457457 if not candidates_by_uid :
458458 return
459459 for new_finding in findings :
@@ -471,7 +471,7 @@ def _dedupe_batch_uid_or_hash(findings):
471471 return
472472
473473 test = findings [0 ].test
474- candidates_by_uid , existing_by_hash = find_candidates_for_deduplication_uid_or_hash (test , findings , include_product_scope_filter = True )
474+ candidates_by_uid , existing_by_hash = find_candidates_for_deduplication_uid_or_hash (test , findings )
475475 if not (candidates_by_uid or existing_by_hash ):
476476 return
477477 for new_finding in findings :
@@ -492,7 +492,7 @@ def _dedupe_batch_legacy(findings):
492492 if not findings :
493493 return
494494 test = findings [0 ].test
495- candidates_by_title , candidates_by_cwe = find_candidates_for_deduplication_legacy (test , findings , include_product_scope_filter = True )
495+ candidates_by_title , candidates_by_cwe = find_candidates_for_deduplication_legacy (test , findings )
496496 if not (candidates_by_title or candidates_by_cwe ):
497497 return
498498 for new_finding in findings :
0 commit comments