diff --git a/CHANGES/7594.bugfix b/CHANGES/7594.bugfix new file mode 100644 index 0000000000..1e211b1491 --- /dev/null +++ b/CHANGES/7594.bugfix @@ -0,0 +1,2 @@ +Optimized cleanup_old_versions() by rewriting protected_versions() to avoid expensive JOINs +on large databases and deferring the content_ids field during version deletion. diff --git a/pulpcore/app/models/repository.py b/pulpcore/app/models/repository.py index a2350072ea..c07ba74ec2 100644 --- a/pulpcore/app/models/repository.py +++ b/pulpcore/app/models/repository.py @@ -323,20 +323,31 @@ def protected_versions(self): """ from .publication import Distribution, Publication + protected_pks = set() + # find all repo versions set on a distribution - qs = self.versions.filter(pk__in=Distribution.objects.values_list("repository_version_id")) + protected_pks.update( + Distribution.objects.filter( + repository_version__repository=self, + ).values_list("repository_version_id", flat=True) + ) # find all repo versions with publications set on a distribution - qs |= self.versions.filter( - publication__pk__in=Distribution.objects.values_list("publication_id") + dist_pub_ids = Distribution.objects.values_list("publication_id", flat=True) + protected_pks.update( + Publication.objects.filter( + pk__in=dist_pub_ids, + repository_version__repository=self, + ).values_list("repository_version_id", flat=True) ) # Protect repo versions of distributed checkpoint publications. if Distribution.objects.filter(repository=self.pk, checkpoint=True).exists(): - qs |= self.versions.filter( - publication__pk__in=Publication.objects.filter(checkpoint=True).values_list( - "pulp_id" - ) + protected_pks.update( + Publication.objects.filter( + checkpoint=True, + repository_version__repository=self, + ).values_list("repository_version_id", flat=True) ) if distro := Distribution.objects.filter(repository=self.pk, checkpoint=False).first(): @@ -352,9 +363,12 @@ def protected_versions(self): version = self.latest_version() if version: - qs |= self.versions.filter(pk=version.pk) + protected_pks.add(version.pk) + + # Discard None values from distributions with no repository_version set + protected_pks.discard(None) - return qs.distinct() + return self.versions.filter(pk__in=protected_pks) def pull_through_add_content(self, content_artifact): """ @@ -416,7 +430,9 @@ def cleanup_old_versions(self): if self.retain_repo_versions: # Consider only completed versions that aren't protected for cleanup versions = self.versions.complete().exclude(pk__in=self.protected_versions()) - for version in versions.order_by("-number")[self.retain_repo_versions :]: + for version in versions.defer("content_ids").order_by("-number")[ + self.retain_repo_versions : + ]: _logger.info( "Deleting repository version {} due to version retention limit.".format(version) )