@@ -164,6 +164,190 @@ def bulk_add_tags_to_instances(tag_or_tags, instances, tag_field_name: str = "ta
164164 return total_created
165165
166166
167+ def bulk_add_tag_mapping (
168+ tag_to_instances : dict [str , list ],
169+ tag_field_name : str = "tags" ,
170+ batch_size : int | None = None ,
171+ ) -> int :
172+ """
173+ Add different tags to different sets of instances in ~5 queries regardless of tag count.
174+
175+ Unlike calling ``bulk_add_tags_to_instances`` once per unique tag — which issues
176+ O(unique_tags) queries — this function batches all work:
177+
178+ 1. Fetch all existing tag objects in one query.
179+ 2. Bulk-create any missing tag objects (one INSERT + one re-fetch if needed).
180+ 3. Fetch all pre-existing through-model rows for these (instance, tag) pairs in one query.
181+ 4. Bulk-create all new relationships in one query (batched by ``batch_size``).
182+ 5. Update all tag counts in one ``UPDATE … CASE WHEN …`` query.
183+
184+ Args:
185+ tag_to_instances: mapping of tag_name -> list of instances that should receive
186+ that tag. All instances must be of the same model type.
187+ tag_field_name: name of the TagField on the model (default: ``"tags"``).
188+ batch_size: ``bulk_create`` batch size; defaults to ``TAG_BULK_ADD_BATCH_SIZE``
189+ setting (1000).
190+
191+ Returns:
192+ Total number of new tag relationships created.
193+
194+ """
195+ from collections import defaultdict # noqa: PLC0415
196+
197+ from django .db .models import Case , IntegerField , When # noqa: PLC0415
198+ from django .db .models .functions import Lower # noqa: PLC0415
199+
200+ if not tag_to_instances :
201+ return 0
202+
203+ if batch_size is None :
204+ batch_size = getattr (settings , "TAG_BULK_ADD_BATCH_SIZE" , 1000 )
205+
206+ all_instances = [inst for insts in tag_to_instances .values () for inst in insts ]
207+ if not all_instances :
208+ return 0
209+
210+ model_class = all_instances [0 ].__class__
211+
212+ if model_class is Product :
213+ msg = "bulk_add_tag_mapping: Product instances are not supported; use Product.tags.add() or a propagation-aware helper"
214+ raise ValueError (msg )
215+
216+ try :
217+ tag_field = model_class ._meta .get_field (tag_field_name )
218+ except Exception :
219+ msg = f"Model { model_class .__name__ } does not have field '{ tag_field_name } '"
220+ raise ValueError (msg )
221+
222+ if not hasattr (tag_field , "tag_options" ):
223+ msg = f"Field '{ tag_field_name } ' is not a TagField"
224+ raise ValueError (msg )
225+
226+ tag_model = tag_field .related_model
227+ through_model = tag_field .remote_field .through
228+ case_sensitive = tag_field .tag_options .case_sensitive
229+
230+ source_field_name = None
231+ target_field_name = None
232+ for field in through_model ._meta .fields :
233+ if hasattr (field , "remote_field" ) and field .remote_field :
234+ if field .remote_field .model == model_class :
235+ source_field_name = field .name
236+ elif field .remote_field .model == tag_model :
237+ target_field_name = field .name
238+
239+ all_tag_names = list (tag_to_instances .keys ())
240+
241+ def _key (name : str ) -> str :
242+ return name if case_sensitive else name .lower ()
243+
244+ # --- Query 1: fetch existing tag objects ---
245+ if case_sensitive :
246+ existing_tags : dict [str , object ] = {
247+ t .name : t
248+ for t in tag_model .objects .filter (name__in = all_tag_names )
249+ }
250+ missing_names = [n for n in all_tag_names if n not in existing_tags ]
251+ else :
252+ # Annotate with lowercased name for a case-insensitive IN lookup
253+ existing_tags = {
254+ t .name_lower : t
255+ for t in tag_model .objects .annotate (name_lower = Lower ("name" )).filter (
256+ name_lower__in = [n .lower () for n in all_tag_names ],
257+ )
258+ }
259+ missing_names = [n for n in all_tag_names if n .lower () not in existing_tags ]
260+
261+ # --- Query 2: create missing tag objects ---
262+ # Use get_or_create to call model.save(), which lets tagulous generate the slug field.
263+ # bulk_create bypasses save() so slug is never set, causing unique constraint failures.
264+ if missing_names :
265+ for n in missing_names :
266+ if case_sensitive :
267+ tag , _ = tag_model .objects .get_or_create (name = n , defaults = {"protected" : False })
268+ else :
269+ tag , _ = tag_model .objects .get_or_create (name__iexact = n , defaults = {"name" : n , "protected" : False })
270+ existing_tags [_key (n )] = tag
271+
272+ # --- Query 3: fetch all pre-existing (instance, tag) through-model rows ---
273+ all_instance_ids = {inst .pk for inst in all_instances }
274+ all_tag_pks = {tag .pk for tag in existing_tags .values ()}
275+
276+ existing_pairs : set [tuple ] = set (
277+ through_model .objects .filter (
278+ ** {f"{ source_field_name } __in" : all_instance_ids },
279+ ** {f"{ target_field_name } __in" : all_tag_pks },
280+ ).values_list (source_field_name , target_field_name ),
281+ )
282+
283+ new_relationships = []
284+ created_per_tag : dict [int , int ] = defaultdict (int )
285+
286+ for tag_name , instances in tag_to_instances .items ():
287+ tag = existing_tags .get (_key (tag_name ))
288+ if tag is None :
289+ continue
290+ for instance in instances :
291+ if (instance .pk , tag .pk ) not in existing_pairs :
292+ new_relationships .append (
293+ through_model (** {source_field_name : instance , target_field_name : tag }),
294+ )
295+ created_per_tag [tag .pk ] += 1
296+
297+ if not new_relationships :
298+ return 0
299+
300+ # --- Query 4: bulk-create all new relationships (batched for memory) ---
301+ # Use len(new_relationships) for the count: existing pairs were already filtered out above,
302+ # so every entry here is new. bulk_create return value is unreliable with ignore_conflicts.
303+ total_created = len (new_relationships )
304+ with transaction .atomic ():
305+ for i in range (0 , len (new_relationships ), batch_size ):
306+ batch = new_relationships [i : i + batch_size ]
307+ through_model .objects .bulk_create (batch , ignore_conflicts = True )
308+
309+ # --- Query 5: update all tag counts in one UPDATE … CASE WHEN … ---
310+ tag_model .objects .filter (pk__in = list (created_per_tag .keys ())).update (
311+ count = Case (
312+ * [
313+ When (pk = pk , then = models .F ("count" ) + delta )
314+ for pk , delta in created_per_tag .items ()
315+ ],
316+ output_field = IntegerField (),
317+ ),
318+ )
319+
320+ for instance in all_instances :
321+ prefetch_cache = getattr (instance , "_prefetched_objects_cache" , None )
322+ if prefetch_cache is not None :
323+ prefetch_cache .pop (tag_field_name , None )
324+
325+ return total_created
326+
327+
328+ def bulk_apply_parser_tags (findings_with_tags : list ) -> None :
329+ """
330+ Bulk-apply per-finding parser tags collected during an import loop.
331+
332+ Delegates to ``bulk_add_tag_mapping`` to process all tags in ~5 queries total,
333+ regardless of how many unique tag values the parser produced.
334+
335+ Args:
336+ findings_with_tags: list of ``(finding, [tag_str, ...])`` pairs accumulated
337+ during the import loop (only for findings whose parser supplied tags).
338+
339+ """
340+ from collections import defaultdict # noqa: PLC0415
341+
342+ tag_to_findings : dict = defaultdict (list )
343+ for finding , tag_list in findings_with_tags :
344+ for tag in tag_list :
345+ if tag :
346+ tag_to_findings [tag ].append (finding )
347+
348+ bulk_add_tag_mapping (tag_to_findings )
349+
350+
167351def bulk_remove_all_tags (model_class , instance_ids_qs ):
168352 """
169353 Remove all tags from instances identified by the given ID subquery.
@@ -226,4 +410,4 @@ def bulk_remove_all_tags(model_class, instance_ids_qs):
226410 )
227411
228412
229- __all__ = ["bulk_add_tags_to_instances" , "bulk_remove_all_tags" ]
413+ __all__ = ["bulk_add_tag_mapping" , " bulk_add_tags_to_instances" , "bulk_apply_parser_tags " , "bulk_remove_all_tags" ]
0 commit comments