99import logging
1010import os
1111import warnings
12+ from collections import defaultdict
1213
14+ from django import __version__ as django_version
1315from django .contrib .auth .hashers import make_password
1416from django .core import files as django_files
15- from django .db import IntegrityError
17+ from django .db import IntegrityError , connections , models
18+ from packaging .version import Version
1619
1720from . import base , declarations , errors
1821
2124
2225DEFAULT_DB_ALIAS = 'default' # Same as django.db.DEFAULT_DB_ALIAS
2326
27+ DJANGO_22 = Version (django_version ) < Version ('3.0' )
2428
2529_LAZY_LOADS = {}
2630
@@ -44,11 +48,31 @@ def _lazy_load_get_model():
4448 _LAZY_LOADS ['get_model' ] = django_apps .apps .get_model
4549
4650
51+ def connection_supports_bulk_insert (using ):
52+ """
53+ Does the database support bulk_insert
54+
55+ There are 2 pieces to this puzzle:
56+ * The database needs to support `bulk_insert`
57+ * AND it also needs to be capable of returning all the newly minted objects' id
58+
59+ If any of these is `False`, the database does NOT support bulk_insert
60+ """
61+ connection = connections [using ]
62+ if DJANGO_22 :
63+ can_return_rows_from_bulk_insert = connection .features .can_return_ids_from_bulk_insert
64+ else :
65+ can_return_rows_from_bulk_insert = connection .features .can_return_rows_from_bulk_insert
66+ return (connection .features .has_bulk_insert
67+ and can_return_rows_from_bulk_insert )
68+
69+
4770class DjangoOptions (base .FactoryOptions ):
4871 def _build_default_options (self ):
4972 return super ()._build_default_options () + [
5073 base .OptionDefault ('django_get_or_create' , (), inherit = True ),
5174 base .OptionDefault ('database' , DEFAULT_DB_ALIAS , inherit = True ),
75+ base .OptionDefault ('use_bulk_create' , False , inherit = True ),
5276 base .OptionDefault ('skip_postgeneration_save' , False , inherit = True ),
5377 ]
5478
@@ -159,6 +183,58 @@ def _get_or_create(cls, model_class, *args, **kwargs):
159183
160184 return instance
161185
186+ @classmethod
187+ def supports_bulk_insert (cls ):
188+ return (cls ._meta .use_bulk_create
189+ and connection_supports_bulk_insert (cls ._meta .database ))
190+
191+ @classmethod
192+ def create (cls , ** kwargs ):
193+ """Create an instance of the associated class, with overridden attrs."""
194+ if not cls .supports_bulk_insert ():
195+ return super ().create (** kwargs )
196+
197+ return cls ._bulk_create (1 , ** kwargs )[0 ]
198+
199+ @classmethod
200+ def create_batch (cls , size , ** kwargs ):
201+ if not cls .supports_bulk_insert ():
202+ return super ().create_batch (size , ** kwargs )
203+
204+ return cls ._bulk_create (size , ** kwargs )
205+
206+ @classmethod
207+ def _refresh_database_pks (cls , model_cls , objs ):
208+ """
209+ Before Django 3.0, there is an issue when bulk_insert.
210+
211+ The issue is that if you create an instance of a model,
212+ and reference it in another unsaved instance of a model.
213+ When you create the instance of the first one, the pk/id
214+ is never updated on the sub model that referenced the first.
215+ """
216+ if not DJANGO_22 :
217+ return
218+ fields = [f for f in model_cls ._meta .get_fields ()
219+ if isinstance (f , models .fields .related .ForeignObject )]
220+ if not fields :
221+ return
222+ for obj in objs :
223+ for field in fields :
224+ setattr (obj , field .name , getattr (obj , field .name ))
225+
226+ @classmethod
227+ def _bulk_create (cls , size , ** kwargs ):
228+ models_to_create = cls .build_batch (size , ** kwargs )
229+ collector = DependencyInsertOrderCollector ()
230+ collector .collect (cls , models_to_create )
231+ collector .sort ()
232+ for model_cls , objs in collector .data .items ():
233+ manager = cls ._get_manager (model_cls )
234+ cls ._refresh_database_pks (model_cls , objs )
235+ manager .bulk_create (objs )
236+ return models_to_create
237+
162238 @classmethod
163239 def _create (cls , model_class , * args , ** kwargs ):
164240 """Create an instance of the model, and save it to the database."""
@@ -263,6 +339,129 @@ def _make_data(self, params):
263339 return thumb_io .getvalue ()
264340
265341
342+ class DependencyInsertOrderCollector :
343+ def __init__ (self ):
344+ # Initially, {model: {instances}}, later values become lists.
345+ self .data = defaultdict (list )
346+ # Tracks deletion-order dependency for databases without transactions
347+ # or ability to defer constraint checks. Only concrete model classes
348+ # should be included, as the dependencies exist only between actual
349+ # database tables; proxy models are represented here by their concrete
350+ # parent.
351+ self .dependencies = defaultdict (set ) # {model: {models}}
352+
353+ def add (self , objs , source = None , nullable = False ):
354+ """
355+ Add 'objs' to the collection of objects to be inserted in order. If the call is
356+ the result of a cascade, 'source' should be the model that caused it,
357+ and 'nullable' should be set to True if the relation can be null.
358+ Return a list of all objects that were not already collected.
359+ """
360+ if not objs :
361+ return []
362+ new_objs = []
363+ model = objs [0 ].__class__
364+ instances = self .data [model ]
365+ lookup = [id (instance ) for instance in instances ]
366+ for obj in objs :
367+ if not obj ._state .adding :
368+ continue
369+ if id (obj ) not in lookup :
370+ new_objs .append (obj )
371+ instances .extend (new_objs )
372+ # Nullable relationships can be ignored -- they are nulled out before
373+ # deleting, and therefore do not affect the order in which objects have
374+ # to be deleted.
375+ if source is not None and not nullable :
376+ self .add_dependency (source , model )
377+ return new_objs
378+
379+ def add_dependency (self , model , dependency ):
380+ self .dependencies [model ._meta .concrete_model ].add (
381+ dependency ._meta .concrete_model
382+ )
383+ self .data .setdefault (dependency , self .data .default_factory ())
384+
385+ def collect (
386+ self ,
387+ factory_cls ,
388+ objs ,
389+ source = None ,
390+ nullable = False ,
391+ ):
392+ """
393+ Add 'objs' to the collection of objects to be deleted as well as all
394+ parent instances. 'objs' must be a homogeneous iterable collection of
395+ model instances (e.g. a QuerySet). If 'collect_related' is True,
396+ related objects will be handled by their respective on_delete handler.
397+ If the call is the result of a cascade, 'source' should be the model
398+ that caused it and 'nullable' should be set to True, if the relation
399+ can be null.
400+ If 'keep_parents' is True, data of parent model's will be not deleted.
401+ If 'fail_on_restricted' is False, error won't be raised even if it's
402+ prohibited to delete such objects due to RESTRICT, that defers
403+ restricted object checking in recursive calls where the top-level call
404+ may need to collect more objects to determine whether restricted ones
405+ can be deleted.
406+ """
407+ new_objs = self .add (
408+ objs , source , nullable
409+ )
410+ if not new_objs :
411+ return
412+
413+ model = new_objs [0 ].__class__
414+
415+ # The candidate relations are the ones that come from N-1 and 1-1 relations.
416+ candidate_relations = (
417+ f for f in model ._meta .get_fields (include_hidden = True )
418+ if isinstance (f , models .ForeignKey )
419+ )
420+
421+ collected_objs = []
422+ for field in candidate_relations :
423+ for obj in new_objs :
424+ val = getattr (obj , field .name )
425+ if isinstance (val , models .Model ):
426+ collected_objs .append (val )
427+
428+ for name , in factory_cls ._meta .post_declarations .as_dict ().keys ():
429+ for obj in new_objs :
430+ val = getattr (obj , name , None )
431+ if isinstance (val , models .Model ):
432+ collected_objs .append (val )
433+
434+ if collected_objs :
435+ new_objs = self .collect (
436+ factory_cls = factory_cls , objs = collected_objs , source = model
437+ )
438+
439+ def sort (self ):
440+ """
441+ Sort the model instances by the least dependecies to the most dependencies.
442+
443+ We want to insert the models with no dependencies first, and continue inserting
444+ using the models that the higher models depend on.
445+ """
446+ sorted_models = []
447+ concrete_models = set ()
448+ models = list (self .data )
449+ while len (sorted_models ) < len (models ):
450+ found = False
451+ for model in models :
452+ if model in sorted_models :
453+ continue
454+ dependencies = self .dependencies .get (model ._meta .concrete_model )
455+ if not (dependencies and dependencies .difference (concrete_models )):
456+ sorted_models .append (model )
457+ concrete_models .add (model ._meta .concrete_model )
458+ found = True
459+ if not found :
460+ logger .debug ('dependency order could not be determined' )
461+ return
462+ self .data = {model : self .data [model ] for model in sorted_models }
463+
464+
266465class mute_signals :
267466 """Temporarily disables and then restores any django signals.
268467
@@ -318,6 +517,7 @@ def __call__(self, callable_obj):
318517 if isinstance (callable_obj , base .FactoryMetaClass ):
319518 # Retrieve __func__, the *actual* callable object.
320519 callable_obj ._create = self .wrap_method (callable_obj ._create .__func__ )
520+ callable_obj ._bulk_create = self .wrap_method (callable_obj ._bulk_create .__func__ )
321521 callable_obj ._generate = self .wrap_method (callable_obj ._generate .__func__ )
322522 return callable_obj
323523
0 commit comments