Skip to content

Commit c77887c

Browse files
committed
Scope female entries and clean unpoify output
1 parent ed6e849 commit c77887c

7 files changed

Lines changed: 190 additions & 85 deletions

File tree

msg2po/conversion.py

Lines changed: 58 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -118,10 +118,10 @@ def po2file(
118118
resulting_entries = []
119119
extract_fuzzy = CONFIG.extract_fuzzy
120120

121-
for file_trans in trans_map[occurrence_path]:
122-
file_index = file_trans["file_index"]
123-
po_index = file_trans["po_index"]
124-
entry = po[po_index]
121+
for file_trans in trans_map[occurrence_path]:
122+
file_index = file_trans["file_index"]
123+
po_index = file_trans["po_index"]
124+
entry = po[po_index]
125125

126126
if entry.msgstr == "" or ("fuzzy" in entry.flags and not extract_fuzzy): # if not translated, keep msgid
127127
value = entry.msgid
@@ -135,13 +135,14 @@ def po2file(
135135
# context
136136
context = entry.msgctxt
137137

138-
# female strings
139-
female = None
140-
if entry.msgid in female_map:
141-
fe_entry = female_map[entry.msgid]
142-
if fe_entry.msgstr == "" or "fuzzy" in fe_entry.flags and not extract_fuzzy:
143-
female = fe_entry.msgid
144-
else:
138+
# female strings
139+
female = None
140+
female_occurrence = (occurrence_path, str(file_index))
141+
if female_occurrence in female_map:
142+
fe_entry = female_map[female_occurrence]
143+
if fe_entry.msgstr == "" or "fuzzy" in fe_entry.flags and not extract_fuzzy:
144+
female = fe_entry.msgid
145+
else:
145146
female = fe_entry.msgstr
146147

147148
resulting_entries.append({"index": file_index, "value": value, "female": female, "context": context})
@@ -287,40 +288,42 @@ class FemaleUpdate:
287288

288289

289290
@dataclass(frozen=True)
290-
class NewFemaleEntry:
291-
"""A new female PO entry to append."""
292-
293-
msgid: str
294-
msgstr: str
295-
296-
297-
def _compute_female_update(
298-
e: polib.POEntry,
299-
female_value: str,
300-
female_map: dict[str, polib.POEntry],
301-
overwrite: bool,
302-
same: bool,
303-
) -> FemaleUpdate | NewFemaleEntry | None:
304-
"""Decide what to do with a female translation value."""
305-
if e.msgid in female_map:
306-
fe = female_map[e.msgid]
307-
if not fe or fe.msgstr == female_value:
308-
return None
309-
logger.info(f"female translation change: ORIG: {e.msgid} | OLD: {fe.msgstr} | NEW: {female_value}")
291+
class NewFemaleEntry:
292+
"""A new female PO entry to append."""
293+
294+
msgid: str
295+
msgstr: str
296+
occurrence: tuple[str, str]
297+
298+
299+
def _compute_female_update(
300+
e: polib.POEntry,
301+
female_value: str,
302+
female_map: dict[tuple[str, str], polib.POEntry],
303+
occurrence: tuple[str, str],
304+
overwrite: bool,
305+
same: bool,
306+
) -> FemaleUpdate | NewFemaleEntry | None:
307+
"""Decide what to do with a female translation value."""
308+
if occurrence in female_map:
309+
fe = female_map[occurrence]
310+
if not fe or fe.msgstr == female_value:
311+
return None
312+
logger.info(f"female translation change: ORIG: {e.msgid} | OLD: {fe.msgstr} | NEW: {female_value}")
310313
if not overwrite:
311314
logger.debug("Female translation already exists, overwrite disabled, skipping")
312315
return None
313316
if e.msgid == female_value:
314317
if same:
315318
logger.info(f"source and female translation are the same, using regardless: {e.msgid}")
316319
else:
317-
logger.info(f"source and female translation are the same for {e.occurrences}, skipping")
318-
return None
319-
return FemaleUpdate(entry=fe, new_msgstr=female_value, clear_fuzzy="fuzzy" in fe.flags)
320-
elif e.msgstr != female_value:
321-
logger.info(f"new female translation detected: ORIG: {e.msgid} | MALE: {e.msgstr} | FEMALE: {female_value}")
322-
return NewFemaleEntry(msgid=e.msgid, msgstr=female_value)
323-
return None
320+
logger.info(f"source and female translation are the same for {e.occurrences}, skipping")
321+
return None
322+
return FemaleUpdate(entry=fe, new_msgstr=female_value, clear_fuzzy="fuzzy" in fe.flags)
323+
elif e.msgstr != female_value:
324+
logger.info(f"new female translation detected: ORIG: {e.msgid} | MALE: {e.msgstr} | FEMALE: {female_value}")
325+
return NewFemaleEntry(msgid=e.msgid, msgstr=female_value, occurrence=occurrence)
326+
return None
324327

325328

326329
def _compute_entry_update(
@@ -364,13 +367,13 @@ def _compute_entry_update(
364367

365368

366369
def compute_msgstr_updates(
367-
trans_entries: list,
368-
occurrence_path: str,
369-
entries_dict: OrderedDict,
370-
female_map: dict[str, polib.POEntry],
371-
overwrite: bool,
372-
same: bool,
373-
extract_fuzzy: bool,
370+
trans_entries: list,
371+
occurrence_path: str,
372+
entries_dict: OrderedDict,
373+
female_map: dict[tuple[str, str], polib.POEntry],
374+
overwrite: bool,
375+
same: bool,
376+
extract_fuzzy: bool,
374377
) -> tuple[list[EntryUpdate], list[FemaleUpdate], list[NewFemaleEntry]]:
375378
"""Pure decision logic: compute what updates to apply without mutating anything."""
376379
entry_updates: list[EntryUpdate] = []
@@ -385,13 +388,14 @@ def compute_msgstr_updates(
385388
if (occurrence_path, t.index) not in entries_dict:
386389
continue
387390

388-
e: polib.POEntry = entries_dict[(occurrence_path, t.index)]
389-
390-
if t.female:
391-
result = _compute_female_update(e, t.female, female_map, overwrite, same)
392-
if isinstance(result, FemaleUpdate):
393-
female_updates.append(result)
394-
elif isinstance(result, NewFemaleEntry):
391+
e: polib.POEntry = entries_dict[(occurrence_path, t.index)]
392+
occurrence = (occurrence_path, t.index)
393+
394+
if t.female:
395+
result = _compute_female_update(e, t.female, female_map, occurrence, overwrite, same)
396+
if isinstance(result, FemaleUpdate):
397+
female_updates.append(result)
398+
elif isinstance(result, NewFemaleEntry):
395399
new_females.append(result)
396400

397401
update = _compute_entry_update(e, t.value, t.context, overwrite, same, extract_fuzzy)
@@ -424,8 +428,8 @@ def apply_msgstr_updates(
424428
fu.entry.flags.remove("fuzzy")
425429
fu.entry.previous_msgid = None
426430

427-
for nf in new_females:
428-
po.append(polib.POEntry(msgid=nf.msgid, msgstr=nf.msgstr, msgctxt=CONTEXT_FEMALE))
431+
for nf in new_females:
432+
po.append(polib.POEntry(msgid=nf.msgid, msgstr=nf.msgstr, msgctxt=CONTEXT_FEMALE, occurrences=[nf.occurrence]))
429433

430434

431435
def file2msgstr(

msg2po/indexed_po.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,11 @@
1111

1212

1313
@dataclass(frozen=True)
14-
class IndexedPO:
15-
po: polib.POFile
16-
trans_map: dict
17-
female_map: dict[str, polib.POEntry]
18-
occ_dict: OrderedDict
14+
class IndexedPO:
15+
po: polib.POFile
16+
trans_map: dict
17+
female_map: dict[tuple[str, str], polib.POEntry]
18+
occ_dict: OrderedDict
1919

2020
@classmethod
2121
def from_po(cls, po: polib.POFile) -> "IndexedPO":

msg2po/po_utils.py

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# PO file manipulation utilities: sorting, deduplication, female entry management,
22
# metadata generation, and fuzzy flag cleanup.
33

4-
from collections import OrderedDict
4+
from collections import OrderedDict, defaultdict
55
from datetime import datetime
66

77
import polib
@@ -54,32 +54,37 @@ def translation_entries(po: polib.POFile):
5454
return entries
5555

5656

57-
def female_entries(po: polib.POFile) -> dict[str, polib.POEntry]:
57+
def female_entries(po: polib.POFile) -> dict[tuple[str, str], polib.POEntry]:
5858
"""
59-
Returns mapping of male msgids to corresponding female PO entries.
60-
Uses pre-built dicts for O(n) lookup instead of O(n^2) linear scans.
59+
Returns mapping of male occurrences to corresponding female PO entries.
60+
61+
Female entries with explicit occurrences are scoped to those occurrences.
62+
Legacy female entries without occurrences are only mapped when they have
63+
exactly one possible male occurrence; ambiguous global matches are skipped.
6164
"""
62-
# Pre-build lookup dicts: msgid -> entry for non-female entries
63-
male_no_ctx = {} # entries with no context
64-
male_any_ctx = {} # entries with context != female
65+
male_occurrences: dict[str, list[tuple[str, str]]] = defaultdict(list)
6566
for e in po:
6667
if e.msgctxt == CONTEXT_FEMALE:
6768
continue
68-
if not e.msgctxt:
69-
male_no_ctx.setdefault(e.msgid, e)
70-
else:
71-
male_any_ctx.setdefault(e.msgid, e)
69+
for occurrence in e.occurrences:
70+
male_occurrences[e.msgid].append(occurrence)
7271

7372
entries = {}
7473
for e in po:
75-
if e.msgctxt != CONTEXT_FEMALE or len(e.occurrences) != 0:
74+
if e.msgctxt != CONTEXT_FEMALE:
7675
continue
77-
# Prefer male entry without context, fall back to any non-female context
78-
me = male_no_ctx.get(e.msgid) or male_any_ctx.get(e.msgid)
79-
if me:
80-
entries[me.msgid] = e
81-
else:
76+
if e.occurrences:
77+
for occurrence in e.occurrences:
78+
entries[occurrence] = e
79+
continue
80+
81+
matches = male_occurrences.get(e.msgid, [])
82+
if len(matches) == 1:
83+
entries[matches[0]] = e
84+
elif len(matches) == 0:
8285
logger.warning(f"couldn't find a corresponding male counterpart for a female entry: {e}")
86+
else:
87+
logger.warning(f"ambiguous female entry without occurrences, skipping auto-match: {e.msgid}")
8388
return entries
8489

8590

msg2po/unpoify.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import argparse
66
import concurrent.futures
77
import os
8+
import shutil
89
import sys
910

1011
from loguru import logger
@@ -20,6 +21,17 @@
2021
from msg2po.log import cli_entry, setup_logging
2122

2223

24+
def clean_generated_female_dirs(dst_dir: str) -> None:
25+
"""Remove generated *_female directories before regenerating translated files."""
26+
if not os.path.isdir(dst_dir):
27+
return
28+
29+
female_suffix = CONFIG.female_dir_suffix
30+
for entry in os.scandir(dst_dir):
31+
if entry.is_dir() and entry.name.endswith(female_suffix):
32+
shutil.rmtree(entry.path)
33+
34+
2335
def extract_po(pf: str, language_map: LanguageMap, base_dir: str):
2436
"""
2537
pf is po file basename
@@ -34,6 +46,7 @@ def extract_po(pf: str, language_map: LanguageMap, base_dir: str):
3446

3547
dst_dir = language_map.po2slug[pf]
3648
abs_dst_dir = os.path.join(base_dir, dst_dir)
49+
clean_generated_female_dirs(abs_dst_dir)
3750

3851
for ef in sorted(ipo.trans_map):
3952
enc = get_enc(abs_po_path, ef)

tests/test_conversion.py

Lines changed: 54 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,25 @@ def test_removes_stale_dialog_female_file_when_falling_back_to_male(self, tmp_pa
168168
assert output.exists()
169169
assert not stale_female.exists()
170170

171+
def test_skips_ambiguous_occurrence_less_female_entry(self, tmp_path, monkeypatch):
172+
po = polib.POFile()
173+
po.append(polib.POEntry(msgid="Shared", msgstr="Male A", occurrences=[("dialog/a.msg", "100")]))
174+
po.append(polib.POEntry(msgid="Shared", msgstr="Male B", occurrences=[("dialog/b.msg", "200")]))
175+
po.append(polib.POEntry(msgid="Shared", msgstr="Female shared", msgctxt=CONTEXT_FEMALE))
176+
177+
config = replace(CONFIG, extract_format="sfall")
178+
monkeypatch.setattr("msg2po.conversion.CONFIG", config)
179+
monkeypatch.setattr("msg2po.po_utils.CONFIG", config)
180+
181+
dst_dir = tmp_path / "lang"
182+
output = dst_dir / "dialog" / "b.msg"
183+
184+
po2file(po, str(output), "utf-8", "dialog/b.msg", dst_dir=str(dst_dir))
185+
186+
female_output = dst_dir / "dialog_female" / "b.msg"
187+
assert output.read_text(encoding="utf-8") == "{200}{}{Male B}\n"
188+
assert not female_output.exists()
189+
171190

172191
class TestFile2Msgstr:
173192
def test_loads_translations(self, msg_file, msg_translated_file):
@@ -216,8 +235,39 @@ def test_tra_female_entries(self, tra_file, tra_translated_file):
216235
encoding="utf-8",
217236
)
218237
fe_map = female_entries(po)
219-
assert "Male greeting" in fe_map
220-
assert fe_map["Male greeting"].msgstr == "Salutation feminine"
238+
assert (tra_file, "103") in fe_map
239+
assert fe_map[(tra_file, "103")].msgstr == "Salutation feminine"
240+
241+
def test_new_female_entry_keeps_occurrence_scope(self, tmp_path, monkeypatch):
242+
source = tmp_path / "dialog" / "scope.msg"
243+
source.parent.mkdir(parents=True)
244+
source.write_text("{100}{}{Shared}\n", encoding="utf-8")
245+
246+
translated = tmp_path / "lang" / "dialog" / "scope.msg"
247+
translated.parent.mkdir(parents=True)
248+
translated.write_text("{100}{}{Male}\n", encoding="utf-8")
249+
250+
translated_female = tmp_path / "lang" / "dialog_female" / "scope.msg"
251+
translated_female.parent.mkdir(parents=True)
252+
translated_female.write_text("{100}{}{Female}\n", encoding="utf-8")
253+
254+
config = replace(CONFIG, extract_format="sfall")
255+
monkeypatch.setattr("msg2po.transfile.CONFIG", config)
256+
monkeypatch.setattr("msg2po.conversion.CONFIG", config)
257+
monkeypatch.setattr("msg2po.po_utils.CONFIG", config)
258+
259+
po = file2po(str(source), encoding="utf-8", occurrence_path="dialog/scope.msg")
260+
file2msgstr(
261+
input_file=str(translated),
262+
po=po,
263+
occurrence_path="dialog/scope.msg",
264+
encoding="utf-8",
265+
same=True,
266+
)
267+
268+
female = next(e for e in po if e.msgctxt == CONTEXT_FEMALE)
269+
assert female.msgstr == "Female"
270+
assert female.occurrences == [("dialog/scope.msg", "100")]
221271

222272

223273
class TestSortPo:
@@ -292,8 +342,8 @@ def test_finds_female_entries(self):
292342
po.append(male)
293343
po.append(female)
294344
fe_map = female_entries(po)
295-
assert "Hello" in fe_map
296-
assert fe_map["Hello"].msgstr == "Bonjour F"
345+
assert ("f.tra", "1") in fe_map
346+
assert fe_map[("f.tra", "1")].msgstr == "Bonjour F"
297347

298348
def test_empty_when_no_female(self, msg_file):
299349
po = file2po(msg_file, encoding="utf-8")

tests/test_indexed_po.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,11 @@ def test_from_po_builds_trans_map(self):
2222
assert "dialog/1.msg" in ipo.trans_map
2323
assert len(ipo.trans_map["dialog/1.msg"]) == 1
2424

25-
def test_from_po_builds_female_map(self):
26-
po = self._make_po()
27-
ipo = IndexedPO.from_po(po)
28-
assert "Hello" in ipo.female_map
29-
assert ipo.female_map["Hello"].msgstr == "Bonjour F"
25+
def test_from_po_builds_female_map(self):
26+
po = self._make_po()
27+
ipo = IndexedPO.from_po(po)
28+
assert ("dialog/1.msg", "100") in ipo.female_map
29+
assert ipo.female_map[("dialog/1.msg", "100")].msgstr == "Bonjour F"
3030

3131
def test_from_po_builds_occ_dict(self):
3232
po = self._make_po()

0 commit comments

Comments
 (0)