@@ -448,14 +448,45 @@ def _Jobs(self):
448448 def jobs (self ):
449449 return self ._Jobs & {"table_name" : self .target .table_name }
450450
451- def schedule_jobs (self , * restrictions , purge_invalid_jobs = True ):
451+ def schedule_jobs (self , * restrictions , purge_invalid_jobs = True , min_scheduling_interval = None ):
452452 """
453- Schedule new jobs for this autopopulate table
454- :param restrictions: a list of restrictions each restrict
455- (table.key_source - target.proj())
456- :param purge_invalid_jobs: if True, remove invalid entry from the jobs table (potentially expensive operation)
457- :return:
453+ Schedule new jobs for this autopopulate table by finding keys that need computation.
454+
455+ This method implements an optimization strategy to avoid excessive scheduling:
456+ 1. First checks if any jobs were scheduled recently (within min_scheduling_interval)
457+ 2. If recent jobs exist, skips scheduling to prevent database load
458+ 3. Otherwise, finds keys that need computation and schedules them
459+
460+ The method also optionally purges invalid jobs (jobs that no longer exist in key_source)
461+ to maintain database cleanliness.
462+
463+ Args:
464+ restrictions: a list of restrictions each restrict (table.key_source - target.proj())
465+ purge_invalid_jobs: if True, remove invalid entry from the jobs table (potentially expensive operation)
466+ min_scheduling_interval: minimum time in seconds that must have passed since last job scheduling.
467+ If None, uses the value from dj.config["min_scheduling_interval"] (default: None)
468+
469+ Returns:
470+ None
458471 """
472+ if min_scheduling_interval is None :
473+ min_scheduling_interval = config ["min_scheduling_interval" ]
474+
475+ # First check if we have any recent jobs
476+ if min_scheduling_interval > 0 :
477+ recent_jobs = len (
478+ self .jobs
479+ & {"status" : "scheduled" }
480+ & f"timestamp <= UTC_TIMESTAMP()" # Only consider jobs up to current UTC time
481+ & f"timestamp >= DATE_SUB(UTC_TIMESTAMP(), INTERVAL { min_scheduling_interval } SECOND)"
482+ )
483+ if recent_jobs > 0 :
484+ logger .debug (
485+ f"Skipping job scheduling for `{ to_camel_case (self .target .table_name )} ` - "
486+ f"found { recent_jobs } jobs created within last { min_scheduling_interval } seconds"
487+ )
488+ return
489+
459490 try :
460491 with self .connection .transaction :
461492 schedule_count = 0
0 commit comments