Skip to content

Commit 7eba94a

Browse files
committed
Fix: remove Erhua handling in pinyin_group to avoid misprocessing
1 parent 065a014 commit 7eba94a

File tree

2 files changed

+12
-142
lines changed

2 files changed

+12
-142
lines changed

pypinyin/core.py

Lines changed: 12 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,6 @@ def pinyin_group(self, hans, style=Style.TONE, heteronym=False,
142142
143143
每个分组包含原始汉字和对应的拼音。拼音会根据情况进行处理:
144144
- 词语中的多个字的拼音会用空格连接
145-
- 儿化音会合并处理(如:花儿 -> huar)
146145
- 需要隔音符的拼音会自动添加(如:西安 -> xi'an)
147146
148147
:param hans: 汉字字符串( ``'你好吗'`` )或列表( ``['你好', '吗']`` ).
@@ -179,18 +178,6 @@ def pinyin_group(self, hans, style=Style.TONE, heteronym=False,
179178
while i < len(han_list):
180179
word = han_list[i]
181180

182-
# 检查是否需要与下一个字符合并(儿化音处理)
183-
# 如果当前字是汉字且下一个字是"儿",合并处理
184-
if (i + 1 < len(han_list) and
185-
han_list[i + 1] == '儿' and
186-
RE_HANS.match(word)):
187-
# 合并当前字和"儿"
188-
word = word + '儿'
189-
i += 1 # 跳过下一个字符
190-
is_erhua = True
191-
else:
192-
is_erhua = False
193-
194181
# 获取该词的拼音
195182
pys = self.pinyin(
196183
word, style=style, heteronym=heteronym,
@@ -209,69 +196,19 @@ def pinyin_group(self, hans, style=Style.TONE, heteronym=False,
209196
i += 1
210197
continue
211198

212-
# 处理儿化音
213-
if is_erhua and len(pys) >= 2:
214-
# 获取倒数第二个拼音(花)和最后一个拼音(儿)
215-
base_pinyin_list = pys[-2]
216-
er_pinyin_list = pys[-1]
217-
218-
if heteronym:
219-
# 多音字模式:生成所有组合
220-
combined = []
221-
for base in base_pinyin_list:
222-
for er in er_pinyin_list:
223-
# 移除 er 的声母,只保留 r
224-
er_suffix = 'r' if er else ''
225-
combined.append(base + er_suffix)
226-
227-
# 前面的拼音保持不变
228-
if len(pys) > 2:
229-
# 如果有多个字,前面的用空格连接
230-
prev_pinyins = []
231-
for j in range(len(pys) - 2):
232-
prev_pinyins.append(pys[j])
233-
# 为前面的拼音生成所有组合
234-
if prev_pinyins:
235-
prev_combinations = [
236-
' '.join(p) for p in product(*prev_pinyins)]
237-
final_pinyins = [
238-
prev + ' ' + comb
239-
for prev in prev_combinations
240-
for comb in combined]
241-
else:
242-
final_pinyins = combined
243-
else:
244-
final_pinyins = combined
245-
else:
246-
# 非多音字模式:只取第一个
247-
base = base_pinyin_list[0] if base_pinyin_list else ''
248-
er = er_pinyin_list[0] if er_pinyin_list else ''
249-
er_suffix = 'r' if er else ''
250-
combined = base + er_suffix
251-
252-
# 前面的拼音用空格连接
253-
if len(pys) > 2:
254-
prev_pinyins = [p[0] for p in pys[:-2]]
255-
final_pinyins = [' '.join(prev_pinyins + [combined])]
256-
else:
257-
final_pinyins = [combined]
258-
259-
result.append({'hanzi': word, 'pinyin': final_pinyins})
199+
if heteronym:
200+
# 多音字模式:生成所有组合
201+
# 检查是否需要添加隔音符
202+
combinations = []
203+
for combo in product(*pys):
204+
joined = _join_pinyin_with_separator(list(combo))
205+
combinations.append(joined)
206+
result.append({'hanzi': word, 'pinyin': combinations})
260207
else:
261-
# 非儿化音处理
262-
if heteronym:
263-
# 多音字模式:生成所有组合
264-
# 检查是否需要添加隔音符
265-
combinations = []
266-
for combo in product(*pys):
267-
joined = _join_pinyin_with_separator(list(combo))
268-
combinations.append(joined)
269-
result.append({'hanzi': word, 'pinyin': combinations})
270-
else:
271-
# 非多音字模式:只取第一个
272-
pinyin_list = [p[0] if p else '' for p in pys]
273-
joined = _join_pinyin_with_separator(pinyin_list)
274-
result.append({'hanzi': word, 'pinyin': [joined]})
208+
# 非多音字模式:只取第一个
209+
pinyin_list = [p[0] if p else '' for p in pys]
210+
joined = _join_pinyin_with_separator(pinyin_list)
211+
result.append({'hanzi': word, 'pinyin': [joined]})
275212

276213
i += 1
277214

@@ -286,7 +223,6 @@ def lazy_pinyin_group(self, hans, style=Style.NORMAL,
286223
287224
每个分组包含原始汉字和对应的拼音字符串。拼音会根据情况进行处理:
288225
- 词语中的多个字的拼音会用空格连接
289-
- 儿化音会合并处理(如:花儿 -> huar)
290226
- 需要隔音符的拼音会自动添加(如:西安 -> xi'an)
291227
292228
:param hans: 汉字字符串( ``'你好吗'`` )或列表( ``['你好', '吗']`` ).
@@ -519,13 +455,6 @@ def pinyin_group(hans, style=Style.TONE, heteronym=False,
519455
{'hanzi': '吗', 'pinyin': ['ma']},
520456
{'hanzi': '?', 'pinyin': []}]
521457
>>> # 如果西安在词库中,会输出 [{'hanzi': '西安', 'pinyin': ["xi'an"]}]
522-
>>> # 如果花儿在词库中,会输出 [{'hanzi': '花儿', 'pinyin': ['huar']}]
523-
>>> # 演示儿化音处理:如果词语以"儿"结尾,会自动合并
524-
>>> result = pinyin_group('玩儿', style=Style.NORMAL)
525-
>>> result[0]['hanzi']
526-
'玩儿'
527-
>>> 'r' in result[0]['pinyin'][0] # 儿化音包含 r
528-
True
529458
"""
530459
_pinyin = Pinyin(UltimateConverter(
531460
v_to_u=v_to_u, neutral_tone_with_five=neutral_tone_with_five))
@@ -580,12 +509,6 @@ def lazy_pinyin_group(hans, style=Style.NORMAL,
580509
[{'hanzi': '你好', 'pinyin': 'ni hao'},
581510
{'hanzi': '吗', 'pinyin': 'ma'},
582511
{'hanzi': '?', 'pinyin': ''}]
583-
>>> # 演示儿化音处理:如果词语以"儿"结尾,会自动合并
584-
>>> result = lazy_pinyin_group('玩儿', style=Style.NORMAL)
585-
>>> result[0]['hanzi']
586-
'玩儿'
587-
>>> 'r' in result[0]['pinyin'] # 儿化音包含 r
588-
True
589512
"""
590513
_pinyin = Pinyin(UltimateConverter(
591514
v_to_u=v_to_u, neutral_tone_with_five=neutral_tone_with_five))

tests/test_pinyin_group.py

Lines changed: 0 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -66,26 +66,6 @@ def test_pinyin_group_with_apostrophe():
6666
assert result[0]['pinyin'][0] == "xi'an"
6767

6868

69-
def test_pinyin_group_with_erhua():
70-
"""测试儿化音处理"""
71-
result = pinyin_group('花儿', style=Style.NORMAL)
72-
assert len(result) == 1
73-
assert result[0]['hanzi'] == '花儿'
74-
assert len(result[0]['pinyin']) == 1
75-
# 儿化音应该合并为 huar
76-
assert result[0]['pinyin'][0] == 'huar'
77-
78-
79-
def test_pinyin_group_with_erhua_tone():
80-
"""测试儿化音带声调"""
81-
result = pinyin_group('花儿', style=Style.TONE)
82-
assert len(result) == 1
83-
assert result[0]['hanzi'] == '花儿'
84-
assert len(result[0]['pinyin']) == 1
85-
# 儿化音应该合并
86-
assert 'r' in result[0]['pinyin'][0]
87-
88-
8969
def test_pinyin_group_heteronym():
9070
"""测试多音字模式"""
9171
result = pinyin_group('中', heteronym=True)
@@ -116,21 +96,6 @@ def test_pinyin_group_style_tone():
11696
assert has_tone
11797

11898

119-
def test_pinyin_group_multiple_erhua():
120-
"""测试多个字带儿化音"""
121-
# 测试 小孩儿
122-
load_phrases_dict({
123-
'小孩': [['xiǎo'], ['hái']],
124-
})
125-
126-
result = pinyin_group('小孩儿', style=Style.NORMAL)
127-
assert len(result) == 1
128-
assert result[0]['hanzi'] == '小孩儿'
129-
assert len(result[0]['pinyin']) == 1
130-
# 应该是 xiao hair
131-
assert 'hair' in result[0]['pinyin'][0]
132-
133-
13499
def test_pinyin_group_mixed():
135100
"""测试混合场景"""
136101
load_phrases_dict({
@@ -168,15 +133,6 @@ def test_pinyin_group_with_list_input():
168133
assert result[1]['hanzi'] == '吗'
169134

170135

171-
def test_pinyin_group_with_list_erhua():
172-
"""测试列表输入的儿化音处理"""
173-
result = pinyin_group(['玩', '儿'], style=Style.NORMAL)
174-
# 儿化音应该被合并
175-
assert len(result) == 1
176-
assert result[0]['hanzi'] == '玩儿'
177-
assert 'r' in result[0]['pinyin'][0]
178-
179-
180136
def test_pinyin_group_method_exists():
181137
"""测试 Pinyin 类有 pinyin_group 方法"""
182138
from pypinyin.core import Pinyin
@@ -223,15 +179,6 @@ def test_lazy_pinyin_group_with_punctuation():
223179
assert result[2]['pinyin'] == '' # 空字符串而不是空列表
224180

225181

226-
def test_lazy_pinyin_group_with_erhua():
227-
"""测试 lazy_pinyin_group 儿化音处理"""
228-
result = lazy_pinyin_group('花儿', style=Style.NORMAL)
229-
assert len(result) == 1
230-
assert result[0]['hanzi'] == '花儿'
231-
assert isinstance(result[0]['pinyin'], str)
232-
assert result[0]['pinyin'] == 'huar'
233-
234-
235182
def test_lazy_pinyin_group_with_apostrophe():
236183
"""测试 lazy_pinyin_group 隔音符处理"""
237184
load_phrases_dict({

0 commit comments

Comments
 (0)