File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -172,9 +172,9 @@ def build_tagger(self):
172172 # 5. 添加"中文数字+英文字母"的规则,如"四a" -> "4a"
173173 # 匹配一个或多个英文字母(大小写)
174174 from pynini import union
175- english_letters = union (* [accep (c ) for c in "abcdABCD " ])
175+ english_letters = union (* [accep (c ) for c in "abcdqABCD " ])
176176 # 数字+字母的组合,如"四a" -> "4a"
177- number_with_letter = number + english_letters .plus
177+ number_with_letter = number . plus + english_letters .plus
178178 cardinal |= add_weight (number_with_letter , 0.05 ) # 使用较高优先级
179179
180180 # 6. 添加两个连续完整数字的范围规则(如"二十一二十二" -> "21-22")
Original file line number Diff line number Diff line change @@ -56,6 +56,16 @@ def build_tagger(self):
5656 + insert ("%" )
5757 )
5858
59+ # 二十二个百分点, 零点六个百分点, 负二十二个百分点
60+ percent_point = (
61+ (sign + delete ("的" ).ques ).ques
62+ + Cardinal ().number
63+ + delete ("个" ).ques
64+ + delete ("百分" )
65+ + (delete ("点" ) | delete ("比" ))
66+ + insert ("%" )
67+ )
68+
5969 # 十千米每小时 => 10km/h, 十一到一百千米每小时 => 11~100km/h
6070 # measure = number + (to + number).ques + units
6171 measure = number + (insert ("、" ) + number ).star + (to + number ).ques + units
@@ -96,7 +106,7 @@ def build_tagger(self):
96106 - 0.5 ,
97107 )
98108
99- tagger = insert ('value: "' ) + (measure | measure_sp | percent ) + insert ('"' )
109+ tagger = insert ('value: "' ) + (measure | measure_sp | percent | percent_point ) + insert ('"' )
100110 # 每小时十千米 => 10km/h, 每小时三十到三百一十一千米 => 30~311km/h
101111 tagger |= insert ('denominator: "' ) + delete ("每" ) + units + insert ('" numerator: "' ) + measure + insert ('"' )
102112
You can’t perform that action at this time.
0 commit comments