Skip to content

Commit 6dcf7c1

Browse files
committed
speed up unpoify
1 parent 31c4563 commit 6dcf7c1

File tree

2 files changed

+32
-24
lines changed

2 files changed

+32
-24
lines changed

msg2po/encoding.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,10 @@ def _build_cp1258_transliteration_rules() -> dict[str, str]:
8989
]
9090

9191

92-
def get_enc(lang_path: str = "", file_path: str = ""):
92+
_UTF_NAME_RE = re.compile(r".*_ee\.tra$")
93+
94+
95+
def get_enc(lang_path: str = "", file_path: str = "") -> str:
9396
"""
9497
Infers encoding based on dir/PO name and file path.
9598
lang_path can be PO path or translation path, only basename is used.
@@ -124,8 +127,7 @@ def get_enc(lang_path: str = "", file_path: str = ""):
124127
if filename in UTF_FILENAMES:
125128
encoding = "utf-8"
126129

127-
utf_name = re.compile(r".*_ee\.tra$")
128-
if utf_name.match(filename):
130+
if _UTF_NAME_RE.match(filename):
129131
encoding = "utf-8"
130132

131133
return encoding
@@ -164,9 +166,11 @@ def encode_custom(text: str, encoding: str = "utf-8") -> str:
164166
Encodes and decodes the given text using the specified encoding,
165167
replacing invalid characters.
166168
If encoding is 'cp1258', it uses the encode_vietnamese function.
169+
UTF-8 is a passthrough since all Python strings are valid UTF-8.
167170
"""
171+
if encoding == "utf-8":
172+
return text
168173
if encoding == "cp1258":
169174
return encode_vietnamese(text)
170-
else:
171-
# Graceful fallback for replace, can't really protect against invalid characters being entered in Weblate?
172-
return text.encode(encoding, "replace").decode(encoding)
175+
# Graceful fallback for replace, can't really protect against invalid characters being entered in Weblate?
176+
return text.encode(encoding, "replace").decode(encoding)

msg2po/languages.py

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,39 @@
11
# Language slug mapping and PO-to-directory resolution.
22

3+
import functools
4+
35
from msg2po.common import find_files
46
from msg2po.config import CONFIG
57
from msg2po.core import basename, strip_ext
68

9+
_SLUG_MAP = {
10+
"cs": "czech",
11+
"de": "german",
12+
"fr": "french",
13+
"it": "italian",
14+
"hu": "hungarian",
15+
"pl": "polish",
16+
"pt": "portuguese",
17+
"pt_br": "portuguese",
18+
"es": "spanish",
19+
"ru": "russian",
20+
"sv": "swedish",
21+
"tchinese": "tchinese",
22+
"uk": "ukrainian",
23+
"vi": "vietnamese",
24+
}
25+
726

8-
def language_slug(po_filename):
27+
@functools.lru_cache(maxsize=64)
28+
def language_slug(po_filename: str) -> str:
929
"""
1030
Allows to extract PO files into simplified language names: pt_BR.po -> portuguese/1.msg.
1131
Working with language codes is not convenient in mods.
1232
A temporary hack until a better solution is found.
1333
"""
14-
slug_map = {
15-
"cs": "czech",
16-
"de": "german",
17-
"fr": "french",
18-
"it": "italian",
19-
"hu": "hungarian",
20-
"pl": "polish",
21-
"pt": "portuguese",
22-
"pt_br": "portuguese",
23-
"es": "spanish",
24-
"ru": "russian",
25-
"sv": "swedish",
26-
"tchinese": "tchinese",
27-
"uk": "ukrainian",
28-
"vi": "vietnamese",
29-
}
3034
slug = strip_ext(basename(po_filename)).lower()
3135
if CONFIG.simple_languages:
32-
slug = slug_map.get(slug, slug)
36+
slug = _SLUG_MAP.get(slug, slug)
3337
return slug
3438

3539

0 commit comments

Comments
 (0)