|
6 | 6 | 2. 扫描所有Python文件中的国际化代码 |
7 | 7 | 3. 合并已有翻译和新发现的内容 |
8 | 8 | 4. 生成排序后的翻译文件 |
| 9 | +5. 扫描所有Python文件中未使用国际化功能的代码,并写入 `not_internationalized.txt` 文件中 |
9 | 10 | """ |
10 | 11 |
|
11 | 12 | import re |
|
39 | 40 | # 说明:因为有VCS,所以默认不备份。 |
40 | 41 | BACKUP_PO = False |
41 | 42 |
|
| 43 | +# 配置5:是否扫描未国际化的代码? |
| 44 | +SCAN_NOT_INTERNATIONALIZED = True |
| 45 | + |
| 46 | +# 配置6:忽略 `未国际化的代码` 数组 |
| 47 | +NOT_INTERNATIONALIZED_IGNORE_ARRAYS = frozenset([ |
| 48 | + "Error: ", |
| 49 | + "data: ", |
| 50 | + "date: ", |
| 51 | + "v: ", |
| 52 | +]) |
| 53 | + |
42 | 54 |
|
43 | 55 | class I18nAutomation: |
44 | 56 | """国际化自动化工具类""" |
@@ -580,6 +592,158 @@ def write_po_files(self): |
580 | 592 |
|
581 | 593 | print("\n所有翻译文件已生成完成!!!") |
582 | 594 |
|
| 595 | + def scan_non_i18n_strings(self): |
| 596 | + """ |
| 597 | + 步骤5: 扫描 Python 文件中应该国际化但未国际化的字符串 |
| 598 | +
|
| 599 | + Returns: |
| 600 | + 包含未国际化字符串信息的列表 |
| 601 | + """ |
| 602 | + print("\n" + "="*60) |
| 603 | + print("步骤5: 扫描未国际化的字符串") |
| 604 | + print("="*60) |
| 605 | + |
| 606 | + non_i18n_list = [] |
| 607 | + apps_dir = self.base_dir / 'apps' |
| 608 | + |
| 609 | + # 匹配常见的需要国际化的字符串模式 |
| 610 | + # 1. 用户可见的提示消息、错误消息等 |
| 611 | + patterns = [ |
| 612 | + # 匹配 raise Exception("...") 或 raise ValidationError("...") 等异常消息 |
| 613 | + re.compile(r'\braise\s+\w+[\w\.]*\s*\(\s*r?["\']((?:[^"\'\\]|\\.)*?)["\']', re.MULTILINE | re.DOTALL), |
| 614 | + # 匹配 logger.error("..."), logger.warning("..."), logger.info("...") 等日志消息 |
| 615 | + re.compile(r'\blogger\.(?:error|warning|info|debug|critical)\s*\(\s*r?["\']((?:[^"\'\\]|\\.)*?)["\']', re.MULTILINE | re.DOTALL), |
| 616 | + # 匹配 print("...") |
| 617 | + re.compile(r'\bprint\s*\(\s*r?["\']((?:[^"\'\\]|\\.)*?)["\']', re.MULTILINE | re.DOTALL), |
| 618 | + # 匹配 return "..." (常见于 API 响应消息) |
| 619 | + re.compile(r'\breturn\s+r?["\']((?:[^"\'\\]|\\.)*?)["\']', re.MULTILINE | re.DOTALL), |
| 620 | + ] |
| 621 | + |
| 622 | + # 排除模式:不应该被国际化的内容 |
| 623 | + exclude_patterns = [ |
| 624 | + re.compile(r'^[a-zA-Z0-9_\-\.\/\:\{\}]+$', re.MULTILINE), # 纯技术标识符、路径、JSON等 |
| 625 | + re.compile(r'^\s*$'), # 空字符串 |
| 626 | + re.compile(r'^(?:[{}[\]:,.\-\s*\n<>|│├─]|%s)+$'), # 纯标点符号 |
| 627 | + ] |
| 628 | + |
| 629 | + # 递归查找所有 .py 文件 |
| 630 | + py_files = list(apps_dir.rglob('*.py')) |
| 631 | + print(f"找到 {len(py_files)} 个 Python 文件") |
| 632 | + |
| 633 | + scanned_count = 0 |
| 634 | + for py_file in py_files: |
| 635 | + # 跳过 __pycache__、migrations 和 locales 目录 |
| 636 | + if '__pycache__' in str(py_file) or 'migrations' in str(py_file) or 'locales' in str(py_file): |
| 637 | + continue |
| 638 | + |
| 639 | + try: |
| 640 | + with open(py_file, 'r', encoding='utf-8') as f: |
| 641 | + content = f.read() |
| 642 | + |
| 643 | + file_has_non_i18n = False |
| 644 | + for pattern in patterns: |
| 645 | + matches = pattern.finditer(content) |
| 646 | + for match in matches: |
| 647 | + msgid = match.group(1) if match.lastindex else None |
| 648 | + |
| 649 | + if msgid is None: |
| 650 | + continue |
| 651 | + |
| 652 | + msgid = self._unescape_string(msgid) |
| 653 | + |
| 654 | + # 过滤掉不应该国际化的内容 |
| 655 | + if not msgid or not msgid.strip(): |
| 656 | + continue |
| 657 | + |
| 658 | + should_exclude = False |
| 659 | + for exclude_pattern in exclude_patterns: |
| 660 | + if exclude_pattern.match(msgid): |
| 661 | + should_exclude = True |
| 662 | + break |
| 663 | + |
| 664 | + if should_exclude: |
| 665 | + continue |
| 666 | + |
| 667 | + # 检查该字符串是否已经被国际化(在已扫描的 i18n 列表中) |
| 668 | + if hasattr(self, 'scanned_i18n') and msgid in self.scanned_i18n: |
| 669 | + continue |
| 670 | + |
| 671 | + # 记录未国际化的字符串 |
| 672 | + relative_path = str(py_file.relative_to(self.base_dir)).replace("\\", "/") |
| 673 | + line_no = content[:match.start()].count('\n') + 1 |
| 674 | + |
| 675 | + if msgid not in NOT_INTERNATIONALIZED_IGNORE_ARRAYS: |
| 676 | + non_i18n_list.append({ |
| 677 | + 'file': relative_path, |
| 678 | + 'line_no': line_no, |
| 679 | + 'msgid': msgid |
| 680 | + }) |
| 681 | + file_has_non_i18n = True |
| 682 | + |
| 683 | + if file_has_non_i18n: |
| 684 | + scanned_count += 1 |
| 685 | + |
| 686 | + except Exception as e: |
| 687 | + print(f"处理文件 {py_file} 时出错: {e}") |
| 688 | + |
| 689 | + print(f"扫描了 {scanned_count} 个文件") |
| 690 | + print(f"发现 {len(non_i18n_list)} 条未国际化的字符串") |
| 691 | + |
| 692 | + self.non_i18n_strings = non_i18n_list |
| 693 | + return non_i18n_list |
| 694 | + |
| 695 | + def write_not_internationalized_file(self): |
| 696 | + """ |
| 697 | + 将未国际化的字符串写入 not_internationalized.txt 文件 |
| 698 | + """ |
| 699 | + print("\n" + "="*60) |
| 700 | + print("步骤6: 写入 not_internationalized.txt 文件") |
| 701 | + print("="*60) |
| 702 | + |
| 703 | + output_file = self.locales_dir / 'not_internationalized.txt' |
| 704 | + |
| 705 | + if not hasattr(self, 'non_i18n_strings'): |
| 706 | + self.non_i18n_strings = [] |
| 707 | + |
| 708 | + # 按 msgid 分组,相同 msgid 的合并到一起 |
| 709 | + msgid_groups = {} |
| 710 | + for item in self.non_i18n_strings: |
| 711 | + msgid = item['msgid'] |
| 712 | + if msgid not in msgid_groups: |
| 713 | + msgid_groups[msgid] = [] |
| 714 | + msgid_groups[msgid].append({ |
| 715 | + 'file': item['file'], |
| 716 | + 'line_no': item['line_no'] |
| 717 | + }) |
| 718 | + |
| 719 | + # 按 msgid 排序 |
| 720 | + sorted_msgids = sorted(msgid_groups.keys()) |
| 721 | + |
| 722 | + # 生成文件内容 |
| 723 | + lines = [] |
| 724 | + for msgid in sorted_msgids: |
| 725 | + locations = msgid_groups[msgid] |
| 726 | + |
| 727 | + # 对同一 msgid 的位置按文件路径和行号排序 |
| 728 | + locations.sort(key=lambda x: (x['file'], x['line_no'])) |
| 729 | + |
| 730 | + # 添加所有位置信息 |
| 731 | + for loc in locations: |
| 732 | + lines.append(f"#: {loc['file']}:{loc['line_no']}") |
| 733 | + |
| 734 | + # 添加 msgid |
| 735 | + escaped_content = self._escape_string(msgid) |
| 736 | + lines.append(f'msgid "{escaped_content}"') |
| 737 | + lines.append('') # 空行分隔 |
| 738 | + |
| 739 | + content = '\n'.join(lines) |
| 740 | + |
| 741 | + # 写入文件 |
| 742 | + with open(output_file, 'w', encoding='utf-8') as f: |
| 743 | + f.write(content) |
| 744 | + |
| 745 | + print(f"已写入 {len(sorted_msgids)} 条未国际化字符串到: {output_file}") |
| 746 | + |
583 | 747 | def generate_report(self): |
584 | 748 | """ |
585 | 749 | 生成翻译报告 |
@@ -677,6 +841,13 @@ def run(self): |
677 | 841 | # 步骤4: 写入文件 |
678 | 842 | self.write_po_files() |
679 | 843 |
|
| 844 | + if SCAN_NOT_INTERNATIONALIZED: |
| 845 | + # 步骤5: 扫描未国际化的字符串 |
| 846 | + self.scan_non_i18n_strings() |
| 847 | + |
| 848 | + # 步骤6: 写入 not_internationalized.txt |
| 849 | + self.write_not_internationalized_file() |
| 850 | + |
680 | 851 | # 生成报告 |
681 | 852 | self.generate_report() |
682 | 853 |
|
|
0 commit comments