File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -176,6 +176,22 @@ def build_tagger(self):
176176 # 数字+字母的组合,如"四a" -> "4a"
177177 number_with_letter = number + english_letters .plus
178178 cardinal |= add_weight (number_with_letter , 0.05 ) # 使用较高优先级
179+
180+ # 6. 添加两个连续完整数字的范围规则(如"二十一二十二" -> "21-22")
181+ # 定义完整数字(不包括单个数字0-9,避免误匹配)
182+ complete_number = teen | tens | hundred | thousand | ten_thousand
183+ complete_number = (
184+ (complete_number + accep ("兆" ) + delete ("零" ).ques ).ques
185+ + (complete_number + accep ("亿" ) + delete ("零" ).ques ).ques
186+ + complete_number
187+ )
188+ complete_number = sign .ques + complete_number + (dot + digits .plus ).ques
189+
190+ # 两个连续完整数字的范围模式(优先级高于单独的数字)
191+ # 如:二十一二十二 -> 21-22, 三十一三十二 -> 31-32
192+ number_range = complete_number + insert ("~" ) + complete_number
193+ # 将这个规则添加到 cardinal,使用较高优先级(负权重)
194+ cardinal |= add_weight (number_range , - 0.05 )
179195
180196 tagger = insert ('value: "' ) + cardinal + (insert (" " ) + cardinal ).star + insert ('"' )
181197 self .tagger = self .add_tokens (tagger )
You can’t perform that action at this time.
0 commit comments