diff --git a/api/management/commands/sync_appealdocs.py b/api/management/commands/sync_appealdocs.py index d311fdd2b..08e5612c1 100644 --- a/api/management/commands/sync_appealdocs.py +++ b/api/management/commands/sync_appealdocs.py @@ -48,12 +48,12 @@ def handle(self, *args, **options): print("Doing a full scan of all Appeals") qset = Appeal.objects.all() else: - # By default, only check appeals for the past 6 months where Appeal Documents is 0 + # By default, only check appeals modified in the past 6 months where Appeal Documents is 0 now = datetime.now().replace(tzinfo=timezone.utc) six_months_ago = now - relativedelta(months=6) # This was the original qset, but it wouldn't get newer docs for the same Appeals # qset = Appeal.objects.filter(appealdocument__isnull=True).filter(end_date__gt=six_months_ago) - qset = Appeal.objects.filter(end_date__gt=six_months_ago) + qset = Appeal.objects.filter(modified_at__gt=six_months_ago) # qset = Appeal.objects.filter(code='Something') # could help debug # First get all Appeal Codes diff --git a/lang/tasks.py b/lang/tasks.py index 83a79f8c0..e27947ffd 100644 --- a/lang/tasks.py +++ b/lang/tasks.py @@ -39,34 +39,56 @@ def translate_fields_object(self, obj, field): if not initial_value or not initial_lang: return - for lang in AVAILABLE_LANGUAGES: - lang_field = build_localized_fieldname(field, lang) - value = getattr(obj, lang_field, None) - if value: - continue + model = type(obj) + table_field = f"{model._meta.app_label}:{model._meta.model_name}:{field}" + field_max_length = model._meta.get_field(field).max_length + + pending_langs = { + lang: build_localized_fieldname(field, lang) + for lang in AVAILABLE_LANGUAGES + if not getattr(obj, build_localized_fieldname(field, lang), None) + } + if not pending_langs: + return - model = type(obj) - app_label = model._meta.app_label - model_name = model._meta.model_name - table_field = f"{app_label}:{model_name}:{field}" + cached = self.translator.get_cached_translations( + initial_value, + list(pending_langs.keys()), + source_language=initial_lang, + table_field=table_field, + ) - new_value = self.translator.translate_text( - initial_value, - lang, - source_language=initial_lang, - table_field=table_field, - ) + for lang, lang_field in pending_langs.items(): + if lang in cached: + translated = cached[lang] + else: + translated = self.translator.translate_text( + initial_value, + lang, + source_language=initial_lang, + table_field=table_field, + ) - if new_value is None: - logger.warning(f"Translation failed for Model ({type(obj)}<{lang_field}>) pk: ({obj.pk})") + if translated is None: + logger.warning( + "Translation failed for %s.%s pk=%s", + model.__name__, + lang_field, + obj.pk, + ) continue - field_max_length = model._meta.get_field(field).max_length - if field_max_length and len(new_value) > field_max_length: - logger.warning(f"Greater then max_length found for Model ({type(obj)}<{lang_field}>) pk: ({obj.pk})") - new_value = new_value[:field_max_length] + if field_max_length and len(translated) > field_max_length: + logger.warning( + "Translation exceeds max_length (%d) for %s.%s pk=%s", + field_max_length, + model.__name__, + lang_field, + obj.pk, + ) + translated = translated[:field_max_length] - setattr(obj, lang_field, new_value) + setattr(obj, lang_field, translated) yield lang_field @staticmethod diff --git a/lang/translation.py b/lang/translation.py index c308cec91..9e920a914 100644 --- a/lang/translation.py +++ b/lang/translation.py @@ -27,6 +27,9 @@ def sha256_hash(text): class BaseTranslator: + def get_cached_translations(self, text, dest_languages, source_language=None, table_field=""): + return {} + def _fake_translation(self, text, dest_language, source_language, table_field=""): """ This is only used for test @@ -200,6 +203,30 @@ def translate_text(self, text, dest_language, source_language=None, table_field= ) return translated + textTail + def get_cached_translations(self, text, dest_languages, source_language=None, table_field=""): + if not dest_languages or len(text) >= 300: + return {} + + text_hash = sha256_hash(text) + source_language = source_language or "" + caches = TranslationCache.objects.filter( + text_hash=text_hash, + source_language=source_language, + dest_language__in=dest_languages, + ) + cache_by_lang = {cache.dest_language: cache for cache in caches} + if not cache_by_lang: + return {} + cache_ids = [cache.id for cache in cache_by_lang.values()] + TranslationCache.objects.filter(id__in=cache_ids).update( + last_used=timezone.now(), + num_calls=F("num_calls") + 1, + ) + TranslationCache.objects.filter(id__in=cache_ids, other_fields=False).exclude(table_field=table_field).update( + other_fields=True, + ) + return {lang: cache.translated_text for lang, cache in cache_by_lang.items()} + def get_translator_class(): return import_string(settings.AUTO_TRANSLATION_TRANSLATOR)