Skip to content

Commit efe0f5d

Browse files
valentijnscholtenValentijn Scholten
andauthored
async delete: retry on deadlock (#13863)
* delete_chunk: ensure ordering * delete_chunk: wait for chunk_deletion to complete * delete_chunk: retry on deadlock * ruff --------- Co-authored-by: Valentijn Scholten <valentijn.scholten@iodigital.com>
1 parent b4771c0 commit efe0f5d

1 file changed

Lines changed: 61 additions & 18 deletions

File tree

dojo/utils.py

Lines changed: 61 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@
66
import mimetypes
77
import os
88
import pathlib
9+
import random
910
import re
11+
import time
1012
from calendar import monthrange
1113
from collections.abc import Callable
1214
from datetime import date, datetime, timedelta
@@ -29,6 +31,7 @@
2931
from django.contrib.auth.signals import user_logged_in, user_logged_out, user_login_failed
3032
from django.contrib.contenttypes.models import ContentType
3133
from django.core.paginator import Paginator
34+
from django.db import OperationalError
3235
from django.db.models import Case, Count, F, IntegerField, Q, Sum, Value, When
3336
from django.db.models.query import QuerySet
3437
from django.db.models.signals import post_save
@@ -2003,22 +2006,51 @@ def __init__(self, *args, **kwargs):
20032006
@dojo_async_task
20042007
@app.task
20052008
def delete_chunk(self, objects, **kwargs):
2009+
# Now delete all objects with retry for deadlocks
2010+
max_retries = 3
20062011
for obj in objects:
2007-
try:
2008-
obj.delete()
2009-
except AssertionError:
2010-
logger.debug("ASYNC_DELETE: object has already been deleted elsewhere. Skipping")
2011-
# The id must be None
2012-
# The object has already been deleted elsewhere
2013-
except LogEntry.MultipleObjectsReturned:
2014-
# Delete the log entrys first, then delete
2015-
LogEntry.objects.filter(
2016-
content_type=ContentType.objects.get_for_model(obj.__class__),
2017-
object_pk=str(obj.pk),
2018-
action=LogEntry.Action.DELETE,
2019-
).delete()
2020-
# Now delete the object again
2021-
obj.delete()
2012+
retry_count = 0
2013+
while retry_count < max_retries:
2014+
try:
2015+
obj.delete()
2016+
break # Success, exit retry loop
2017+
except OperationalError as e:
2018+
error_msg = str(e)
2019+
if "deadlock detected" in error_msg.lower():
2020+
retry_count += 1
2021+
if retry_count < max_retries:
2022+
# Exponential backoff with jitter
2023+
wait_time = (2 ** retry_count) + random.uniform(0, 1) # noqa: S311
2024+
logger.warning(
2025+
f"ASYNC_DELETE: Deadlock detected deleting {self.get_object_name(obj)} {obj.pk}, "
2026+
f"retrying ({retry_count}/{max_retries}) after {wait_time:.2f}s",
2027+
)
2028+
time.sleep(wait_time)
2029+
# Refresh object from DB before retry
2030+
obj.refresh_from_db()
2031+
else:
2032+
logger.error(
2033+
f"ASYNC_DELETE: Deadlock persisted after {max_retries} retries for {self.get_object_name(obj)} {obj.pk}: {e}",
2034+
)
2035+
raise
2036+
else:
2037+
# Not a deadlock, re-raise
2038+
raise
2039+
except AssertionError:
2040+
logger.debug("ASYNC_DELETE: object has already been deleted elsewhere. Skipping")
2041+
# The id must be None
2042+
# The object has already been deleted elsewhere
2043+
break
2044+
except LogEntry.MultipleObjectsReturned:
2045+
# Delete the log entrys first, then delete
2046+
LogEntry.objects.filter(
2047+
content_type=ContentType.objects.get_for_model(obj.__class__),
2048+
object_pk=str(obj.pk),
2049+
action=LogEntry.Action.DELETE,
2050+
).delete()
2051+
# Now delete the object again (no retry needed for this case)
2052+
obj.delete()
2053+
break
20222054

20232055
@dojo_async_task
20242056
@app.task
@@ -2037,18 +2069,29 @@ def delete(self, obj, **kwargs):
20372069
@app.task
20382070
def crawl(self, obj, model_list, **kwargs):
20392071
logger.debug("ASYNC_DELETE: Crawling " + self.get_object_name(obj) + ": " + str(obj))
2072+
task_results = []
20402073
for model_info in model_list:
20412074
model = model_info[0]
20422075
model_query = model_info[1]
20432076
filter_dict = {model_query: obj}
20442077
# Only fetch the IDs since we will make a list of IDs in the following function call
2045-
objects_to_delete = model.objects.only("id").filter(**filter_dict)
2078+
objects_to_delete = model.objects.only("id").filter(**filter_dict).distinct().order_by("id")
20462079
logger.debug("ASYNC_DELETE: Deleting " + str(len(objects_to_delete)) + " " + self.get_object_name(model) + "s in chunks")
20472080
chunks = self.chunk_list(model, objects_to_delete)
20482081
for chunk in chunks:
20492082
logger.debug(f"deleting {len(chunk)} {self.get_object_name(model)}")
2050-
self.delete_chunk(chunk)
2051-
self.delete_chunk([obj])
2083+
result = self.delete_chunk(chunk)
2084+
# Collect async task results to wait for them all at once
2085+
if hasattr(result, "get"):
2086+
task_results.append(result)
2087+
# Wait for all chunk deletions to complete (they run in parallel)
2088+
for task_result in task_results:
2089+
task_result.get(timeout=300) # 5 minute timeout per chunk
2090+
# Now delete the main object after all chunks are done
2091+
result = self.delete_chunk([obj])
2092+
# Wait for final deletion to complete
2093+
if hasattr(result, "get"):
2094+
result.get(timeout=300) # 5 minute timeout
20522095
logger.debug("ASYNC_DELETE: Successfully deleted " + self.get_object_name(obj) + ": " + str(obj))
20532096

20542097
def chunk_list(self, model, full_list):

0 commit comments

Comments
 (0)