Skip to content

Commit 7b95a51

Browse files
committed
replace o(n²) comparisons with id sets and sql aggregation
1 parent 5654e1f commit 7b95a51

File tree

2 files changed

+54
-47
lines changed

2 files changed

+54
-47
lines changed

hosts/tasks.py

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,10 @@ def find_all_host_updates():
4141
def find_all_host_updates_homogenous():
4242
""" Task to find updates for all hosts where hosts are expected to be homogenous
4343
"""
44-
updated_hosts = []
44+
updated_host_ids = set()
4545
ts = get_datetime_now()
4646
for host in Host.objects.all().iterator():
47-
if host not in updated_hosts:
47+
if host.id not in updated_host_ids:
4848
host.find_updates()
4949
host.updated_at = ts
5050
host.save()
@@ -56,23 +56,20 @@ def find_all_host_updates_homogenous():
5656
# and exclude hosts with the current timestamp
5757
filtered_hosts = filtered_hosts.exclude(updated_at=ts)
5858

59-
packages = set(host.packages.all())
60-
repos = set(host.repos.all())
61-
updates = host.updates.all()
59+
package_ids = frozenset(host.packages.values_list('id', flat=True))
60+
repo_ids = frozenset(host.repos.values_list('id', flat=True))
61+
updates = list(host.updates.all())
6262

63-
phosts = []
6463
for fhost in filtered_hosts.iterator():
65-
frepos = set(fhost.repos.all())
66-
if repos != frepos:
64+
frepo_ids = frozenset(fhost.repos.values_list('id', flat=True))
65+
if repo_ids != frepo_ids:
6766
continue
68-
fpackages = set(fhost.packages.all())
69-
if packages != fpackages:
67+
fpackage_ids = frozenset(fhost.packages.values_list('id', flat=True))
68+
if package_ids != fpackage_ids:
7069
continue
71-
phosts.append(fhost)
7270

73-
for phost in phosts:
74-
phost.updates.set(updates)
75-
phost.updated_at = ts
76-
phost.save()
77-
updated_hosts.append(phost)
78-
info_message(text=f'Added the same updates to {phost}')
71+
fhost.updates.set(updates)
72+
fhost.updated_at = ts
73+
fhost.save()
74+
updated_host_ids.add(fhost.id)
75+
info_message(text=f'Added the same updates to {fhost}')

packages/utils.py

Lines changed: 40 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -280,21 +280,30 @@ def get_matching_packages(name, epoch, version, release, p_type, arch=None):
280280
def clean_packageupdates():
281281
""" Removes PackageUpdate objects that are no longer linked to any hosts
282282
"""
283-
package_updates = list(PackageUpdate.objects.all())
284-
for update in package_updates:
285-
if not update.host_set.exists():
286-
text = f'Removing unused PackageUpdate {update}'
283+
orphaned = PackageUpdate.objects.filter(host__isnull=True)
284+
for update in orphaned:
285+
text = f'Removing unused PackageUpdate {update}'
286+
info_message(text=text)
287+
update.delete()
288+
289+
duplicate_updates = PackageUpdate.objects.values(
290+
'oldpackage', 'newpackage', 'security'
291+
).annotate(count=Count('id'), keep_id=Min('id')).filter(count__gt=1)
292+
293+
for update in duplicate_updates:
294+
extra_updates = PackageUpdate.objects.filter(
295+
oldpackage=update['oldpackage'],
296+
newpackage=update['newpackage'],
297+
security=update['security']
298+
).exclude(id=update['keep_id'])
299+
keep_update = PackageUpdate.objects.get(id=update['keep_id'])
300+
for extra_update in extra_updates:
301+
text = f'Removing duplicate PackageUpdate: {extra_update}'
287302
info_message(text=text)
288-
update.delete()
289-
for duplicate in package_updates:
290-
if update.oldpackage == duplicate.oldpackage and update.newpackage == duplicate.newpackage and \
291-
update.security == duplicate.security and update.id != duplicate.id:
292-
text = f'Removing duplicate PackageUpdate: {update}'
293-
info_message(text=text)
294-
for host in duplicate.host_set.all():
295-
host.updates.remove(duplicate)
296-
host.updates.add(update)
297-
duplicate.delete()
303+
for host in extra_update.host_set.all():
304+
host.updates.remove(extra_update)
305+
host.updates.add(keep_update)
306+
extra_update.delete()
298307

299308

300309
def clean_packages(remove_duplicates=False):
@@ -316,22 +325,23 @@ def clean_packages(remove_duplicates=False):
316325
packages.delete()
317326
if remove_duplicates:
318327
info_message(text='Checking for duplicate Packages...')
319-
for package in Package.objects.all():
320-
potential_duplicates = Package.objects.filter(
321-
name=package.name,
322-
arch=package.arch,
323-
epoch=package.epoch,
324-
version=package.version,
325-
release=package.release,
326-
packagetype=package.packagetype,
327-
category=package.category,
328-
)
329-
potential_duplicates = list(potential_duplicates)
330-
if len(potential_duplicates) > 1:
331-
for dupe in potential_duplicates:
332-
if dupe.id != package.id:
333-
info_message(text=f'Removing duplicate Package {dupe}')
334-
dupe.delete()
328+
duplicates = Package.objects.values(
329+
'name', 'arch', 'epoch', 'version', 'release', 'packagetype', 'category'
330+
).annotate(count=Count('id'), keep_id=Min('id')).filter(count__gt=1)
331+
332+
for dup in duplicates:
333+
to_delete = Package.objects.filter(
334+
name=dup['name'],
335+
arch=dup['arch'],
336+
epoch=dup['epoch'],
337+
version=dup['version'],
338+
release=dup['release'],
339+
packagetype=dup['packagetype'],
340+
category=dup['category']
341+
).exclude(id=dup['keep_id'])
342+
for package in to_delete:
343+
info_message(text=f'Removing duplicate Package {package}')
344+
package.delete()
335345

336346

337347
def clean_packagenames():

0 commit comments

Comments
 (0)