Skip to content

Commit 458a707

Browse files
Merge pull request #2731 from IFRCGo/fix/translate-model-n+1-fix
Fix N+1 translation cache lookups in celery translation tasks
2 parents 74e8000 + 0d45ce7 commit 458a707

3 files changed

Lines changed: 73 additions & 24 deletions

File tree

api/management/commands/sync_appealdocs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,12 +48,12 @@ def handle(self, *args, **options):
4848
print("Doing a full scan of all Appeals")
4949
qset = Appeal.objects.all()
5050
else:
51-
# By default, only check appeals for the past 6 months where Appeal Documents is 0
51+
# By default, only check appeals modified in the past 6 months where Appeal Documents is 0
5252
now = datetime.now().replace(tzinfo=timezone.utc)
5353
six_months_ago = now - relativedelta(months=6)
5454
# This was the original qset, but it wouldn't get newer docs for the same Appeals
5555
# qset = Appeal.objects.filter(appealdocument__isnull=True).filter(end_date__gt=six_months_ago)
56-
qset = Appeal.objects.filter(end_date__gt=six_months_ago)
56+
qset = Appeal.objects.filter(modified_at__gt=six_months_ago)
5757

5858
# qset = Appeal.objects.filter(code='Something') # could help debug
5959
# First get all Appeal Codes

lang/tasks.py

Lines changed: 44 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -39,34 +39,56 @@ def translate_fields_object(self, obj, field):
3939
if not initial_value or not initial_lang:
4040
return
4141

42-
for lang in AVAILABLE_LANGUAGES:
43-
lang_field = build_localized_fieldname(field, lang)
44-
value = getattr(obj, lang_field, None)
45-
if value:
46-
continue
42+
model = type(obj)
43+
table_field = f"{model._meta.app_label}:{model._meta.model_name}:{field}"
44+
field_max_length = model._meta.get_field(field).max_length
45+
46+
pending_langs = {
47+
lang: build_localized_fieldname(field, lang)
48+
for lang in AVAILABLE_LANGUAGES
49+
if not getattr(obj, build_localized_fieldname(field, lang), None)
50+
}
51+
if not pending_langs:
52+
return
4753

48-
model = type(obj)
49-
app_label = model._meta.app_label
50-
model_name = model._meta.model_name
51-
table_field = f"{app_label}:{model_name}:{field}"
54+
cached = self.translator.get_cached_translations(
55+
initial_value,
56+
list(pending_langs.keys()),
57+
source_language=initial_lang,
58+
table_field=table_field,
59+
)
5260

53-
new_value = self.translator.translate_text(
54-
initial_value,
55-
lang,
56-
source_language=initial_lang,
57-
table_field=table_field,
58-
)
61+
for lang, lang_field in pending_langs.items():
62+
if lang in cached:
63+
translated = cached[lang]
64+
else:
65+
translated = self.translator.translate_text(
66+
initial_value,
67+
lang,
68+
source_language=initial_lang,
69+
table_field=table_field,
70+
)
5971

60-
if new_value is None:
61-
logger.warning(f"Translation failed for Model ({type(obj)}<{lang_field}>) pk: ({obj.pk})")
72+
if translated is None:
73+
logger.warning(
74+
"Translation failed for %s.%s pk=%s",
75+
model.__name__,
76+
lang_field,
77+
obj.pk,
78+
)
6279
continue
6380

64-
field_max_length = model._meta.get_field(field).max_length
65-
if field_max_length and len(new_value) > field_max_length:
66-
logger.warning(f"Greater then max_length found for Model ({type(obj)}<{lang_field}>) pk: ({obj.pk})")
67-
new_value = new_value[:field_max_length]
81+
if field_max_length and len(translated) > field_max_length:
82+
logger.warning(
83+
"Translation exceeds max_length (%d) for %s.%s pk=%s",
84+
field_max_length,
85+
model.__name__,
86+
lang_field,
87+
obj.pk,
88+
)
89+
translated = translated[:field_max_length]
6890

69-
setattr(obj, lang_field, new_value)
91+
setattr(obj, lang_field, translated)
7092
yield lang_field
7193

7294
@staticmethod

lang/translation.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ def sha256_hash(text):
2727

2828

2929
class BaseTranslator:
30+
def get_cached_translations(self, text, dest_languages, source_language=None, table_field=""):
31+
return {}
32+
3033
def _fake_translation(self, text, dest_language, source_language, table_field=""):
3134
"""
3235
This is only used for test
@@ -200,6 +203,30 @@ def translate_text(self, text, dest_language, source_language=None, table_field=
200203
)
201204
return translated + textTail
202205

206+
def get_cached_translations(self, text, dest_languages, source_language=None, table_field=""):
207+
if not dest_languages or len(text) >= 300:
208+
return {}
209+
210+
text_hash = sha256_hash(text)
211+
source_language = source_language or ""
212+
caches = TranslationCache.objects.filter(
213+
text_hash=text_hash,
214+
source_language=source_language,
215+
dest_language__in=dest_languages,
216+
)
217+
cache_by_lang = {cache.dest_language: cache for cache in caches}
218+
if not cache_by_lang:
219+
return {}
220+
cache_ids = [cache.id for cache in cache_by_lang.values()]
221+
TranslationCache.objects.filter(id__in=cache_ids).update(
222+
last_used=timezone.now(),
223+
num_calls=F("num_calls") + 1,
224+
)
225+
TranslationCache.objects.filter(id__in=cache_ids, other_fields=False).exclude(table_field=table_field).update(
226+
other_fields=True,
227+
)
228+
return {lang: cache.translated_text for lang, cache in cache_by_lang.items()}
229+
203230

204231
def get_translator_class():
205232
return import_string(settings.AUTO_TRANSLATION_TRANSLATOR)

0 commit comments

Comments
 (0)