Skip to content

Commit 3fbcd31

Browse files
committed
Add dir_rule.normalize_zh option (default 'zh-cn') and implement configurable zh conversion; add JmcomicText.to_zh
1 parent c2ef2dd commit 3fbcd31

3 files changed

Lines changed: 47 additions & 9 deletions

File tree

src/jmcomic/jm_config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,7 @@ def new_postman(cls, session=False, **kwargs):
411411

412412
DEFAULT_OPTION_DICT: dict = {
413413
'log': None,
414-
'dir_rule': {'rule': 'Bd_Pname', 'base_dir': None},
414+
'dir_rule': {'rule': 'Bd_Pname', 'base_dir': None, 'normalize_zh': 'zh-cn'},
415415
'download': {
416416
'cache': True,
417417
'image': {'decode': True, 'suffix': None},

src/jmcomic/jm_option.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,16 @@ def enable_client_cache_on_condition(cls,
6161
class DirRule:
6262
RULE_BASE_DIR = 'Bd'
6363

64-
def __init__(self, rule: str, base_dir=None):
64+
def __init__(self, rule: str, base_dir=None, normalize_zh='zh-cn'):
65+
"""
66+
:param rule: DSL rule
67+
:param base_dir: base directory
68+
:param normalize_zh: 'zh-cn'|'zh-tw'|'none' or None. 控制是否以及如何进行繁简体归一化,默认 'zh-cn'
69+
"""
6570
base_dir = JmcomicText.parse_to_abspath(base_dir)
6671
self.base_dir = base_dir
6772
self.rule_dsl = rule
73+
self.normalize_zh = normalize_zh
6874
self.parser_list: List[Tuple[str, Callable]] = self.get_rule_parser_list(rule)
6975

7076
def decide_image_save_dir(self,
@@ -89,14 +95,18 @@ def apply_rule_to_path(self, album, photo, only_album_rules=False) -> str:
8995
jm_log('dir_rule', f'路径规则"{rule}"的解析出错: {e}, album={album}, photo={photo}')
9096
raise e
9197
if parser != self.parse_bd_rule:
92-
# 统一将路径段转换为简体,避免繁体/简体导致的重复下载目录
98+
# 根据配置 normalize_zh 进行繁简体统一或跳过
9399
try:
94-
path = JmcomicText.to_zh_cn(str(path))
100+
target = getattr(self, 'normalize_zh', None)
101+
if target is None:
102+
# 默认为不转换
103+
conv_path = str(path)
104+
else:
105+
conv_path = JmcomicText.to_zh(str(path), target)
95106
except Exception:
96-
# 如果转换不可用(例如缺少zhconv),退回原字符串
97-
path = str(path)
107+
conv_path = str(path)
98108

99-
path = fix_windir_name(path).strip()
109+
path = fix_windir_name(conv_path).strip()
100110

101111
path_ls.append(path)
102112

@@ -209,6 +219,7 @@ def copy_option(self):
209219
dir_rule={
210220
'rule': self.dir_rule.rule_dsl,
211221
'base_dir': self.dir_rule.base_dir,
222+
'normalize_zh': getattr(self.dir_rule, 'normalize_zh', None),
212223
},
213224
download=self.download.src_dict,
214225
client=self.client.src_dict,
@@ -334,6 +345,7 @@ def deconstruct(self) -> Dict:
334345
'dir_rule': {
335346
'rule': self.dir_rule.rule_dsl,
336347
'base_dir': self.dir_rule.base_dir,
348+
'normalize_zh': getattr(self.dir_rule, 'normalize_zh', None),
337349
},
338350
'download': self.download.src_dict,
339351
'client': self.client.src_dict,

src/jmcomic/jm_toolkit.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -329,8 +329,34 @@ def find_right_pair(left_pair, i):
329329

330330
@classmethod
331331
def to_zh_cn(cls, s):
332-
import zhconv
333-
return zhconv.convert(s, 'zh-cn')
332+
# 兼容旧接口,默认转换为简体
333+
return cls.to_zh(s, 'zh-cn')
334+
335+
@classmethod
336+
def to_zh(cls, s, target='zh-cn'):
337+
"""
338+
通用的繁简体转换接口。
339+
340+
:param s: 待转换字符串
341+
:param target: 目标编码: 'zh-cn'(简体), 'zh-tw'(繁体),或 None/'none' 表示不转换
342+
:return: 转换后的字符串(若转换失败或未安装 zhconv,返回原始字符串)
343+
"""
344+
if s is None:
345+
return s
346+
347+
if target is None:
348+
return s
349+
350+
t = str(target).strip().lower()
351+
if t in ('none', ''):
352+
return s
353+
354+
try:
355+
import zhconv
356+
return zhconv.convert(s, t)
357+
except Exception:
358+
# 如果 zhconv 不可用或转换失败,则回退原字符串
359+
return s
334360

335361
@classmethod
336362
def try_mkdir(cls, save_dir: str):

0 commit comments

Comments
 (0)