@@ -28,26 +28,31 @@ def __init__(self, deterministic: bool = True):
2828 graph_digit = pynini .string_file (get_abs_path ("data/number/digit.tsv" ))
2929
3030 digit_except_one = pynini .difference (NEMO_DIGIT , "1" )
31- digit_except_zero_one = pynini .difference (digit_except_one , "0" )
31+ digit_except_zero_one = pynini .difference (digit_except_one , "0" ) << < << << HEAD
3232
3333 graph_digit_alt = digit_except_zero_one @ graph_digit
3434 graph_ty = pynini .string_file (get_abs_path ("data/number/ty.tsv" ))
3535 graph_teen = pynini .string_file (get_abs_path ("data/number/teen.tsv" ))
36+ == == == =
37+
38+ graph_digit_no_zero_one = digit_except_zero_one @ graph_digit
39+ graph_ty = pynini .string_file (get_abs_path ("data/number/ty.tsv" ))
40+ >> >> >> > 68 b18fa8 (Refactor Korean TN cardinal and postprocessing logic based on review feedback )
3641
3742 # Compose all basic number forms
38- graph_all = (graph_ty + (graph_digit | pynutil .delete ('0' ))) | graph_teen | graph_digit
43+ graph_1_to_99 = (graph_ty + (graph_digit | pynutil .delete ('0' ))) | graph_digit
3944
4045 hundreds = NEMO_DIGIT ** 3
41- graph_hundred_component = (pynini .cross ('1' , '백' ) | (graph_digit_alt + pynutil .insert ('백' ))) + pynini .union (
42- pynini .closure (pynutil .delete ('0' )), (pynini .closure (pynutil .delete ('0' )) + graph_all )
46+ graph_hundred_component = (pynini .cross ('1' , '백' ) | (graph_digit_no_zero_one + pynutil .insert ('백' ))) + pynini .union (
47+ pynini .closure (pynutil .delete ('0' )), (pynini .closure (pynutil .delete ('0' )) + graph_1_to_99 )
4348 )
4449 graph_hundred = hundreds @ graph_hundred_component
4550
4651 thousands = NEMO_DIGIT ** 4
47- graph_thousand_component = (pynini .cross ('1' , '천' ) | (graph_digit_alt + pynutil .insert ('천' ))) + pynini .union (
52+ graph_thousand_component = (pynini .cross ('1' , '천' ) | (graph_digit_no_zero_one + pynutil .insert ('천' ))) + pynini .union (
4853 pynini .closure (pynutil .delete ('0' )),
4954 graph_hundred_component ,
50- (pynini .closure (pynutil .delete ('0' )) + graph_all ),
55+ (pynini .closure (pynutil .delete ('0' )) + graph_1_to_99 ),
5156 )
5257 graph_thousand = thousands @ graph_thousand_component
5358
@@ -56,36 +61,44 @@ def __init__(self, deterministic: bool = True):
5661 pynini .closure (pynutil .delete ('0' )),
5762 graph_thousand_component ,
5863 (pynutil .delete ('0' ) + graph_hundred_component ),
59- (pynini .closure (pynutil .delete ('0' )) + graph_all ),
64+ (pynini .closure (pynutil .delete ('0' )) + graph_1_to_99 ),
6065 )
6166 graph_ten_thousand = ten_thousands @ graph_ten_thousand_component
6267
6368 hundred_thousands = NEMO_DIGIT ** 6
69+ < << << << HEAD
6470 graph_hundred_thousand_component = ((NEMO_DIGIT ** 2 @ graph_all ) + pynutil .insert ('만' )) + pynini .union (
71+ == == == =
72+ graph_hundred_thousand_component = ((NEMO_DIGIT ** 2 @ graph_1_to_99 ) + pynutil .insert ('만' )) + pynini .union (
73+ >> >> >> > 68 b18fa8 (Refactor Korean TN cardinal and postprocessing logic based on review feedback )
6574 pynini .closure (pynutil .delete ('0' )),
6675 graph_thousand_component ,
6776 (pynutil .delete ('0' ) + graph_hundred_component ),
68- (pynini .closure (pynutil .delete ('0' )) + graph_all ),
77+ (pynini .closure (pynutil .delete ('0' )) + graph_1_to_99 ),
6978 )
7079 graph_hundred_thousand = hundred_thousands @ graph_hundred_thousand_component
7180
7281 millions = NEMO_DIGIT ** 7
73- graph_million_component = ((NEMO_DIGIT ** 3 @ graph_hundred_component ) + pynutil .insert ('만' )) + pynini .union (
82+ graph_million_component = ((graph_hundred ) + pynutil .insert ('만' )) + pynini .union (
7483 pynini .closure (pynutil .delete ('0' )),
7584 graph_thousand_component ,
7685 (pynutil .delete ('0' ) + graph_hundred_component ),
77- (pynini .closure (pynutil .delete ('0' )) + graph_all ),
86+ (pynini .closure (pynutil .delete ('0' )) + graph_1_to_99 ),
7887 )
7988 graph_million = millions @ graph_million_component
8089
8190 ten_millions = NEMO_DIGIT ** 8
91+ << < << << HEAD
8292 graph_ten_million_component = (
8393 (NEMO_DIGIT ** 4 @ graph_thousand_component ) + pynutil .insert ('만' )
8494 ) + pynini .union (
95+ == == == =
96+ graph_ten_million_component = ((graph_thousand ) + pynutil .insert ('만' )) + pynini .union (
97+ >> >> >> > 68 b18fa8 (Refactor Korean TN cardinal and postprocessing logic based on review feedback )
8598 pynini .closure (pynutil .delete ('0' )),
8699 graph_thousand_component ,
87100 (pynutil .delete ('0' ) + graph_hundred_component ),
88- (pynini .closure (pynutil .delete ('0' )) + graph_all ),
101+ (pynini .closure (pynutil .delete ('0' )) + graph_1_to_99 ),
89102 )
90103 graph_ten_million = ten_millions @ graph_ten_million_component
91104
@@ -98,48 +111,52 @@ def __init__(self, deterministic: bool = True):
98111 (pynutil .delete ('000' ) + graph_ten_thousand_component ),
99112 (pynutil .delete ('0000' ) + graph_thousand_component ),
100113 ((pynutil .delete ('00000' ) + graph_hundred_component )),
101- (pynini .closure (pynutil .delete ('0' )) + graph_all ),
114+ (pynini .closure (pynutil .delete ('0' )) + graph_1_to_99 ),
102115 )
103116 graph_hundred_million = hundred_millions @ graph_hundred_million_component
104117
105118 thousand_millions = NEMO_DIGIT ** 10
106- graph_thousand_million_component = ((NEMO_DIGIT ** 2 @ graph_all ) + pynutil .insert ('억' )) + pynini .union (
119+ graph_thousand_million_component = ((NEMO_DIGIT ** 2 @ graph_1_to_99 ) + pynutil .insert ('억' )) + pynini .union (
107120 pynini .closure (pynutil .delete ('0' )),
108121 graph_ten_million_component ,
109122 (pynutil .delete ('0' ) + graph_million_component ),
110123 (pynutil .delete ('00' ) + graph_hundred_thousand_component ),
111124 (pynutil .delete ('000' ) + graph_ten_thousand_component ),
112125 (pynutil .delete ('0000' ) + graph_thousand_component ),
113126 ((pynutil .delete ('00000' ) + graph_hundred_component )),
114- (pynini .closure (pynutil .delete ('0' )) + graph_all ),
127+ (pynini .closure (pynutil .delete ('0' )) + graph_1_to_99 ),
115128 )
116129 graph_thousand_million = thousand_millions @ graph_thousand_million_component
117130
118131 billions = NEMO_DIGIT ** 11
119- graph_billions_component = ((NEMO_DIGIT ** 3 @ graph_hundred_component ) + pynutil .insert ('억' )) + pynini .union (
132+ graph_billions_component = ((graph_hundred ) + pynutil .insert ('억' )) + pynini .union (
120133 pynini .closure (pynutil .delete ('0' )),
121134 graph_ten_million_component ,
122135 (pynutil .delete ('0' ) + graph_million_component ),
123136 (pynutil .delete ('00' ) + graph_hundred_thousand_component ),
124137 (pynutil .delete ('000' ) + graph_ten_thousand_component ),
125138 (pynutil .delete ('0000' ) + graph_thousand_component ),
126139 ((pynutil .delete ('00000' ) + graph_hundred_component )),
127- (pynini .closure (pynutil .delete ('0' )) + graph_all ),
140+ (pynini .closure (pynutil .delete ('0' )) + graph_1_to_99 ),
128141 )
129142 graph_billions = billions @ graph_billions_component
130143
131144 ten_billions = NEMO_DIGIT ** 12
145+ << < << << HEAD
132146 graph_ten_billions_component = (
133147 (NEMO_DIGIT ** 4 @ graph_thousand_component ) + pynutil .insert ('억' )
134148 ) + pynini .union (
149+ == == == =
150+ graph_ten_billions_component = ((graph_thousand ) + pynutil .insert ('억' )) + pynini .union (
151+ >> >> >> > 68 b18fa8 (Refactor Korean TN cardinal and postprocessing logic based on review feedback )
135152 pynini .closure (pynutil .delete ('0' )),
136153 graph_ten_million_component ,
137154 (pynutil .delete ('0' ) + graph_million_component ),
138155 (pynutil .delete ('00' ) + graph_hundred_thousand_component ),
139156 (pynutil .delete ('000' ) + graph_ten_thousand_component ),
140157 (pynutil .delete ('0000' ) + graph_thousand_component ),
141158 ((pynutil .delete ('00000' ) + graph_hundred_component )),
142- (pynini .closure (pynutil .delete ('0' )) + graph_all ),
159+ (pynini .closure (pynutil .delete ('0' )) + graph_1_to_99 ),
143160 )
144161 graph_ten_billions = ten_billions @ graph_ten_billions_component
145162
@@ -156,11 +173,12 @@ def __init__(self, deterministic: bool = True):
156173 pynutil .delete ('0000000' ) + graph_ten_thousand_component ,
157174 pynutil .delete ('00000000' ) + graph_thousand_component ,
158175 pynutil .delete ('000000000' ) + graph_hundred_component ,
159- (pynini .closure (pynutil .delete ('0' )) + graph_all ),
176+ (pynini .closure (pynutil .delete ('0' )) + graph_1_to_99 ),
160177 )
161178 graph_hundred_billions = hundred_billions @ graph_hundred_billions_component
162179
163180 trillion = NEMO_DIGIT ** 14
181+ << < << << HEAD
164182 graph_trillion_component = (
165183 (NEMO_DIGIT ** 2 @ graph_all )
166184 + pynutil .insert ('조' )
@@ -177,11 +195,27 @@ def __init__(self, deterministic: bool = True):
177195 pynutil .delete ('00000000' ) + graph_thousand_component ,
178196 pynutil .delete ('000000000' ) + graph_hundred_component ,
179197 (pynini .closure (pynutil .delete ('0' )) + graph_all ),
198+ == == == =
199+ graph_trillion_component = ((NEMO_DIGIT ** 2 @ graph_1_to_99 ) + pynutil .insert ('조' ) + pynini .union (
200+ pynini .closure (pynutil .delete ('0' )),
201+ graph_ten_billions_component ,
202+ pynutil .delete ('0' ) + graph_billions_component ,
203+ pynutil .delete ('00' ) + graph_thousand_million_component ,
204+ pynutil .delete ('000' ) + graph_hundred_million_component ,
205+ pynutil .delete ('0000' ) + graph_ten_million_component ,
206+ pynutil .delete ('00000' ) + graph_million_component ,
207+ pynutil .delete ('000000' ) + graph_hundred_thousand_component ,
208+ pynutil .delete ('0000000' ) + graph_ten_thousand_component ,
209+ pynutil .delete ('00000000' ) + graph_thousand_component ,
210+ pynutil .delete ('000000000' ) + graph_hundred_component ,
211+ (pynini .closure (pynutil .delete ('0' )) + graph_1_to_99 )
212+ >> > >> >> 68 b18fa8 (Refactor Korean TN cardinal and postprocessing logic based on review feedback )
180213 )
181214 )
182215 graph_trillions = trillion @ graph_trillion_component
183216
184217 ten_trillions = NEMO_DIGIT ** 15
218+ << < << << HEAD
185219 graph_ten_trillions_component = (
186220 (NEMO_DIGIT ** 3 @ graph_hundred_component )
187221 + pynutil .insert ('조' )
@@ -199,10 +233,27 @@ def __init__(self, deterministic: bool = True):
199233 pynutil .delete ('000000000' ) + graph_hundred_component ,
200234 (pynini .closure (pynutil .delete ('0' )) + graph_all ),
201235 )
236+ == == == =
237+ graph_ten_trillions_component = ((graph_hundred ) + pynutil .insert ('조' ) + pynini .union (
238+ pynini .closure (pynutil .delete ('0' )),
239+ graph_ten_billions_component ,
240+ pynutil .delete ('0' ) + graph_billions_component ,
241+ pynutil .delete ('00' ) + graph_thousand_million_component ,
242+ pynutil .delete ('000' ) + graph_hundred_million_component ,
243+ pynutil .delete ('0000' ) + graph_ten_million_component ,
244+ pynutil .delete ('00000' ) + graph_million_component ,
245+ pynutil .delete ('000000' ) + graph_hundred_thousand_component ,
246+ pynutil .delete ('0000000' ) + graph_ten_thousand_component ,
247+ pynutil .delete ('00000000' ) + graph_thousand_component ,
248+ pynutil .delete ('000000000' ) + graph_hundred_component ,
249+ (pynini .closure (pynutil .delete ('0' )) + graph_1_to_99 )
250+ )
251+ >> > >> >> 68 b18fa8 (Refactor Korean TN cardinal and postprocessing logic based on review feedback )
202252 )
203253 graph_ten_trillions = ten_trillions @ graph_ten_trillions_component
204254
205255 hundred_trillions = NEMO_DIGIT ** 16
256+ << < << << HEAD
206257 graph_hundred_trillions_component = (
207258 (NEMO_DIGIT ** 4 @ graph_thousand_component )
208259 + pynutil .insert ('조' )
@@ -219,11 +270,27 @@ def __init__(self, deterministic: bool = True):
219270 pynutil .delete ('00000000' ) + graph_thousand_component ,
220271 pynutil .delete ('000000000' ) + graph_hundred_component ,
221272 (pynini .closure (pynutil .delete ('0' )) + graph_all ),
273+ == == == =
274+ graph_hundred_trillions_component = ((graph_thousand ) + pynutil .insert ('조' ) + pynini .union (
275+ pynini .closure (pynutil .delete ('0' )),
276+ graph_ten_billions_component ,
277+ pynutil .delete ('0' ) + graph_billions_component ,
278+ pynutil .delete ('00' ) + graph_thousand_million_component ,
279+ pynutil .delete ('000' ) + graph_hundred_million_component ,
280+ pynutil .delete ('0000' ) + graph_ten_million_component ,
281+ pynutil .delete ('00000' ) + graph_million_component ,
282+ pynutil .delete ('000000' ) + graph_hundred_thousand_component ,
283+ pynutil .delete ('0000000' ) + graph_ten_thousand_component ,
284+ pynutil .delete ('00000000' ) + graph_thousand_component ,
285+ pynutil .delete ('000000000' ) + graph_hundred_component ,
286+ (pynini .closure (pynutil .delete ('0' )) + graph_1_to_99 )
287+ >> > >> >> 68 b18fa8 (Refactor Korean TN cardinal and postprocessing logic based on review feedback )
222288 )
223289 )
224290 graph_hundred_trillions = hundred_trillions @ graph_hundred_trillions_component
225291
226292 thousand_trillions = NEMO_DIGIT ** 17
293+ << < << << HEAD
227294 graph_thousand_trillions_component = (
228295 graph_digit
229296 + pynutil .insert ('경' )
@@ -244,6 +311,25 @@ def __init__(self, deterministic: bool = True):
244311 pynutil .delete ('000000000000' ) + graph_thousand_component ,
245312 pynutil .delete ('0000000000000' ) + graph_hundred_component ,
246313 (pynini .closure (pynutil .delete ('0' )) + graph_all ),
314+ == == == =
315+ graph_thousand_trillions_component = (graph_digit + pynutil .insert ('경' ) + pynini .union (
316+ pynini .closure (pynutil .delete ('0' )),
317+ graph_hundred_trillions_component ,
318+ pynutil .delete ('0' ) + graph_ten_trillions_component ,
319+ pynutil .delete ('00' ) + graph_trillion_component ,
320+ pynutil .delete ('000' ) + graph_hundred_billions_component ,
321+ pynutil .delete ('0000' ) + graph_ten_billions_component ,
322+ pynutil .delete ('00000' ) + graph_billions_component ,
323+ pynutil .delete ('000000' ) + graph_thousand_million_component ,
324+ pynutil .delete ('0000000' ) + graph_hundred_million_component ,
325+ pynutil .delete ('00000000' ) + graph_ten_million_component ,
326+ pynutil .delete ('000000000' ) + graph_million_component ,
327+ pynutil .delete ('0000000000' ) + graph_hundred_thousand_component ,
328+ pynutil .delete ('00000000000' ) + graph_ten_thousand_component ,
329+ pynutil .delete ('000000000000' ) + graph_thousand_component ,
330+ pynutil .delete ('0000000000000' ) + graph_hundred_component ,
331+ (pynini .closure (pynutil .delete ('0' )) + graph_1_to_99 )
332+ >> > >> >> 68 b18fa8 (Refactor Korean TN cardinal and postprocessing logic based on review feedback )
247333 )
248334 )
249335 graph_thousand_trillions = thousand_trillions @ graph_thousand_trillions_component
@@ -265,7 +351,7 @@ def __init__(self, deterministic: bool = True):
265351 graph_ten_thousand ,
266352 graph_thousand ,
267353 graph_hundred ,
268- graph_all ,
354+ graph_1_to_99 ,
269355 graph_zero ,
270356 ).optimize ()
271357
0 commit comments