speed up unpoify

burner1024 · burner1024 · commit 6dcf7c1b96e2 · 2026-03-08T15:33:54.000+07:00
diff --git a/msg2po/encoding.py b/msg2po/encoding.py
@@ -89,7 +89,10 @@ def _build_cp1258_transliteration_rules() -> dict[str, str]:
 ]
 
 
-def get_enc(lang_path: str = "", file_path: str = ""):
+_UTF_NAME_RE = re.compile(r".*_ee\.tra$")
+
+
+def get_enc(lang_path: str = "", file_path: str = "") -> str:
     """
     Infers encoding based on dir/PO name and file path.
     lang_path can be PO path or translation path, only basename is used.
@@ -124,8 +127,7 @@ def get_enc(lang_path: str = "", file_path: str = ""):
     if filename in UTF_FILENAMES:
         encoding = "utf-8"
 
-    utf_name = re.compile(r".*_ee\.tra$")
-    if utf_name.match(filename):
+    if _UTF_NAME_RE.match(filename):
         encoding = "utf-8"
 
     return encoding
@@ -164,9 +166,11 @@ def encode_custom(text: str, encoding: str = "utf-8") -> str:
     Encodes and decodes the given text using the specified encoding,
     replacing invalid characters.
     If encoding is 'cp1258', it uses the encode_vietnamese function.
+    UTF-8 is a passthrough since all Python strings are valid UTF-8.
     """
+    if encoding == "utf-8":
+        return text
     if encoding == "cp1258":
         return encode_vietnamese(text)
-    else:
-        # Graceful fallback for replace, can't really protect against invalid characters being entered in Weblate?
-        return text.encode(encoding, "replace").decode(encoding)
+    # Graceful fallback for replace, can't really protect against invalid characters being entered in Weblate?
+    return text.encode(encoding, "replace").decode(encoding)
diff --git a/msg2po/languages.py b/msg2po/languages.py
@@ -1,35 +1,39 @@
 # Language slug mapping and PO-to-directory resolution.
 
+import functools
+
 from msg2po.common import find_files
 from msg2po.config import CONFIG
 from msg2po.core import basename, strip_ext
 
+_SLUG_MAP = {
+    "cs": "czech",
+    "de": "german",
+    "fr": "french",
+    "it": "italian",
+    "hu": "hungarian",
+    "pl": "polish",
+    "pt": "portuguese",
+    "pt_br": "portuguese",
+    "es": "spanish",
+    "ru": "russian",
+    "sv": "swedish",
+    "tchinese": "tchinese",
+    "uk": "ukrainian",
+    "vi": "vietnamese",
+}
+
 
-def language_slug(po_filename):
+@functools.lru_cache(maxsize=64)
+def language_slug(po_filename: str) -> str:
     """
     Allows to extract PO files into simplified language names: pt_BR.po -> portuguese/1.msg.
     Working with language codes is not convenient in mods.
     A temporary hack until a better solution is found.
     """
-    slug_map = {
-        "cs": "czech",
-        "de": "german",
-        "fr": "french",
-        "it": "italian",
-        "hu": "hungarian",
-        "pl": "polish",
-        "pt": "portuguese",
-        "pt_br": "portuguese",
-        "es": "spanish",
-        "ru": "russian",
-        "sv": "swedish",
-        "tchinese": "tchinese",
-        "uk": "ukrainian",
-        "vi": "vietnamese",
-    }
     slug = strip_ext(basename(po_filename)).lower()
     if CONFIG.simple_languages:
-        slug = slug_map.get(slug, slug)
+        slug = _SLUG_MAP.get(slug, slug)
     return slug