@@ -24,15 +24,15 @@ class CardinalFst(GraphFst):
2424 def __init__ (self , deterministic : bool = True ):
2525 super ().__init__ (name = "cardinal" , kind = "classify" , deterministic = deterministic )
2626 # Load base .tsv files
27- graph_zero = pynini .string_file (get_abs_path ("data/number/zero.tsv" ))
28- graph_digit = pynini .string_file (get_abs_path ("data/number/digit.tsv" ))
29-
27+ graph_zero = pynini .string_file (get_abs_path ("data/number/zero.tsv" ))
28+ graph_digit = pynini .string_file (get_abs_path ("data/number/digit.tsv" ))
29+
3030 digit_except_one = pynini .difference (NEMO_DIGIT , "1" )
3131 digit_except_zero_one = pynini .difference (digit_except_one , "0" )
32-
32+
3333 graph_digit_alt = digit_except_zero_one @ graph_digit
3434 graph_ty = pynini .string_file (get_abs_path ("data/number/ty.tsv" ))
35- graph_teen = pynini .string_file (get_abs_path ("data/number/teen.tsv" ))
35+ graph_teen = pynini .string_file (get_abs_path ("data/number/teen.tsv" ))
3636
3737 # Compose all basic number forms
3838 graph_all = (graph_ty + (graph_digit | pynutil .delete ('0' ))) | graph_teen | graph_digit
@@ -50,7 +50,7 @@ def __init__(self, deterministic: bool = True):
5050 (pynini .closure (pynutil .delete ('0' )) + graph_all ),
5151 )
5252 graph_thousand = thousands @ graph_thousand_component
53-
53+
5454 ten_thousands = NEMO_DIGIT ** 5
5555 graph_ten_thousand_component = (pynini .cross ('1' , '만' ) | (graph_digit + pynutil .insert ('만' ))) + pynini .union (
5656 pynini .closure (pynutil .delete ('0' )),
@@ -59,16 +59,16 @@ def __init__(self, deterministic: bool = True):
5959 (pynini .closure (pynutil .delete ('0' )) + graph_all ),
6060 )
6161 graph_ten_thousand = ten_thousands @ graph_ten_thousand_component
62-
62+
6363 hundred_thousands = NEMO_DIGIT ** 6
64- graph_hundred_thousand_component = ((NEMO_DIGIT ** 2 @ graph_all ) + pynutil .insert ('만' )) + pynini .union (
64+ graph_hundred_thousand_component = ((NEMO_DIGIT ** 2 @ graph_all ) + pynutil .insert ('만' )) + pynini .union (
6565 pynini .closure (pynutil .delete ('0' )),
6666 graph_thousand_component ,
6767 (pynutil .delete ('0' ) + graph_hundred_component ),
6868 (pynini .closure (pynutil .delete ('0' )) + graph_all ),
6969 )
7070 graph_hundred_thousand = hundred_thousands @ graph_hundred_thousand_component
71-
71+
7272 millions = NEMO_DIGIT ** 7
7373 graph_million_component = ((NEMO_DIGIT ** 3 @ graph_hundred_component ) + pynutil .insert ('만' )) + pynini .union (
7474 pynini .closure (pynutil .delete ('0' )),
@@ -79,15 +79,17 @@ def __init__(self, deterministic: bool = True):
7979 graph_million = millions @ graph_million_component
8080
8181 ten_millions = NEMO_DIGIT ** 8
82- graph_ten_million_component = ((NEMO_DIGIT ** 4 @ graph_thousand_component ) + pynutil .insert ('만' )) + pynini .union (
82+ graph_ten_million_component = (
83+ (NEMO_DIGIT ** 4 @ graph_thousand_component ) + pynutil .insert ('만' )
84+ ) + pynini .union (
8385 pynini .closure (pynutil .delete ('0' )),
8486 graph_thousand_component ,
8587 (pynutil .delete ('0' ) + graph_hundred_component ),
8688 (pynini .closure (pynutil .delete ('0' )) + graph_all ),
8789 )
8890 graph_ten_million = ten_millions @ graph_ten_million_component
89-
90- hundred_millions = NEMO_DIGIT ** 9
91+
92+ hundred_millions = NEMO_DIGIT ** 9
9193 graph_hundred_million_component = (graph_digit + pynutil .insert ('억' )) + pynini .union (
9294 pynini .closure (pynutil .delete ('0' )),
9395 graph_ten_million_component ,
@@ -127,7 +129,9 @@ def __init__(self, deterministic: bool = True):
127129 graph_billions = billions @ graph_billions_component
128130
129131 ten_billions = NEMO_DIGIT ** 12
130- graph_ten_billions_component = ((NEMO_DIGIT ** 4 @ graph_thousand_component ) + pynutil .insert ('억' )) + pynini .union (
132+ graph_ten_billions_component = (
133+ (NEMO_DIGIT ** 4 @ graph_thousand_component ) + pynutil .insert ('억' )
134+ ) + pynini .union (
131135 pynini .closure (pynutil .delete ('0' )),
132136 graph_ten_million_component ,
133137 (pynutil .delete ('0' ) + graph_million_component ),
@@ -138,7 +142,7 @@ def __init__(self, deterministic: bool = True):
138142 (pynini .closure (pynutil .delete ('0' )) + graph_all ),
139143 )
140144 graph_ten_billions = ten_billions @ graph_ten_billions_component
141-
145+
142146 hundred_billions = NEMO_DIGIT ** 13
143147 graph_hundred_billions_component = (graph_digit + pynutil .insert ('조' )) + pynini .union (
144148 pynini .closure (pynutil .delete ('0' )),
@@ -155,79 +159,91 @@ def __init__(self, deterministic: bool = True):
155159 (pynini .closure (pynutil .delete ('0' )) + graph_all ),
156160 )
157161 graph_hundred_billions = hundred_billions @ graph_hundred_billions_component
158-
162+
159163 trillion = NEMO_DIGIT ** 14
160- graph_trillion_component = ((NEMO_DIGIT ** 2 @ graph_all ) + pynutil .insert ('조' ) + pynini .union (
161- pynini .closure (pynutil .delete ('0' )),
162- graph_ten_billions_component ,
163- pynutil .delete ('0' ) + graph_billions_component ,
164- pynutil .delete ('00' ) + graph_thousand_million_component ,
165- pynutil .delete ('000' ) + graph_hundred_million_component ,
166- pynutil .delete ('0000' ) + graph_ten_million_component ,
167- pynutil .delete ('00000' ) + graph_million_component ,
168- pynutil .delete ('000000' ) + graph_hundred_thousand_component ,
169- pynutil .delete ('0000000' ) + graph_ten_thousand_component ,
170- pynutil .delete ('00000000' ) + graph_thousand_component ,
171- pynutil .delete ('000000000' ) + graph_hundred_component ,
172- (pynini .closure (pynutil .delete ('0' )) + graph_all )
164+ graph_trillion_component = (
165+ (NEMO_DIGIT ** 2 @ graph_all )
166+ + pynutil .insert ('조' )
167+ + pynini .union (
168+ pynini .closure (pynutil .delete ('0' )),
169+ graph_ten_billions_component ,
170+ pynutil .delete ('0' ) + graph_billions_component ,
171+ pynutil .delete ('00' ) + graph_thousand_million_component ,
172+ pynutil .delete ('000' ) + graph_hundred_million_component ,
173+ pynutil .delete ('0000' ) + graph_ten_million_component ,
174+ pynutil .delete ('00000' ) + graph_million_component ,
175+ pynutil .delete ('000000' ) + graph_hundred_thousand_component ,
176+ pynutil .delete ('0000000' ) + graph_ten_thousand_component ,
177+ pynutil .delete ('00000000' ) + graph_thousand_component ,
178+ pynutil .delete ('000000000' ) + graph_hundred_component ,
179+ (pynini .closure (pynutil .delete ('0' )) + graph_all ),
173180 )
174181 )
175182 graph_trillions = trillion @ graph_trillion_component
176183
177184 ten_trillions = NEMO_DIGIT ** 15
178- graph_ten_trillions_component = ((NEMO_DIGIT ** 3 @ graph_hundred_component ) + pynutil .insert ('조' ) + pynini .union (
179- pynini .closure (pynutil .delete ('0' )),
180- graph_ten_billions_component ,
181- pynutil .delete ('0' ) + graph_billions_component ,
182- pynutil .delete ('00' ) + graph_thousand_million_component ,
183- pynutil .delete ('000' ) + graph_hundred_million_component ,
184- pynutil .delete ('0000' ) + graph_ten_million_component ,
185- pynutil .delete ('00000' ) + graph_million_component ,
186- pynutil .delete ('000000' ) + graph_hundred_thousand_component ,
187- pynutil .delete ('0000000' ) + graph_ten_thousand_component ,
188- pynutil .delete ('00000000' ) + graph_thousand_component ,
189- pynutil .delete ('000000000' ) + graph_hundred_component ,
190- (pynini .closure (pynutil .delete ('0' )) + graph_all )
191- )
185+ graph_ten_trillions_component = (
186+ (NEMO_DIGIT ** 3 @ graph_hundred_component )
187+ + pynutil .insert ('조' )
188+ + pynini .union (
189+ pynini .closure (pynutil .delete ('0' )),
190+ graph_ten_billions_component ,
191+ pynutil .delete ('0' ) + graph_billions_component ,
192+ pynutil .delete ('00' ) + graph_thousand_million_component ,
193+ pynutil .delete ('000' ) + graph_hundred_million_component ,
194+ pynutil .delete ('0000' ) + graph_ten_million_component ,
195+ pynutil .delete ('00000' ) + graph_million_component ,
196+ pynutil .delete ('000000' ) + graph_hundred_thousand_component ,
197+ pynutil .delete ('0000000' ) + graph_ten_thousand_component ,
198+ pynutil .delete ('00000000' ) + graph_thousand_component ,
199+ pynutil .delete ('000000000' ) + graph_hundred_component ,
200+ (pynini .closure (pynutil .delete ('0' )) + graph_all ),
201+ )
192202 )
193203 graph_ten_trillions = ten_trillions @ graph_ten_trillions_component
194204
195205 hundred_trillions = NEMO_DIGIT ** 16
196- graph_hundred_trillions_component = ((NEMO_DIGIT ** 4 @ graph_thousand_component ) + pynutil .insert ('조' ) + pynini .union (
197- pynini .closure (pynutil .delete ('0' )),
198- graph_ten_billions_component ,
199- pynutil .delete ('0' ) + graph_billions_component ,
200- pynutil .delete ('00' ) + graph_thousand_million_component ,
201- pynutil .delete ('000' ) + graph_hundred_million_component ,
202- pynutil .delete ('0000' ) + graph_ten_million_component ,
203- pynutil .delete ('00000' ) + graph_million_component ,
204- pynutil .delete ('000000' ) + graph_hundred_thousand_component ,
205- pynutil .delete ('0000000' ) + graph_ten_thousand_component ,
206- pynutil .delete ('00000000' ) + graph_thousand_component ,
207- pynutil .delete ('000000000' ) + graph_hundred_component ,
208- (pynini .closure (pynutil .delete ('0' )) + graph_all )
206+ graph_hundred_trillions_component = (
207+ (NEMO_DIGIT ** 4 @ graph_thousand_component )
208+ + pynutil .insert ('조' )
209+ + pynini .union (
210+ pynini .closure (pynutil .delete ('0' )),
211+ graph_ten_billions_component ,
212+ pynutil .delete ('0' ) + graph_billions_component ,
213+ pynutil .delete ('00' ) + graph_thousand_million_component ,
214+ pynutil .delete ('000' ) + graph_hundred_million_component ,
215+ pynutil .delete ('0000' ) + graph_ten_million_component ,
216+ pynutil .delete ('00000' ) + graph_million_component ,
217+ pynutil .delete ('000000' ) + graph_hundred_thousand_component ,
218+ pynutil .delete ('0000000' ) + graph_ten_thousand_component ,
219+ pynutil .delete ('00000000' ) + graph_thousand_component ,
220+ pynutil .delete ('000000000' ) + graph_hundred_component ,
221+ (pynini .closure (pynutil .delete ('0' )) + graph_all ),
209222 )
210223 )
211224 graph_hundred_trillions = hundred_trillions @ graph_hundred_trillions_component
212225
213226 thousand_trillions = NEMO_DIGIT ** 17
214- graph_thousand_trillions_component = (graph_digit + pynutil .insert ('경' ) + pynini .union (
215- pynini .closure (pynutil .delete ('0' )),
216- graph_hundred_trillions_component ,
217- pynutil .delete ('0' ) + graph_ten_trillions_component ,
218- pynutil .delete ('00' ) + graph_trillion_component ,
219- pynutil .delete ('000' ) + graph_hundred_billions_component ,
220- pynutil .delete ('0000' ) + graph_ten_billions_component ,
221- pynutil .delete ('00000' ) + graph_billions_component ,
222- pynutil .delete ('000000' ) + graph_thousand_million_component ,
223- pynutil .delete ('0000000' ) + graph_hundred_million_component ,
224- pynutil .delete ('00000000' ) + graph_ten_million_component ,
225- pynutil .delete ('000000000' ) + graph_million_component ,
226- pynutil .delete ('0000000000' ) + graph_hundred_thousand_component ,
227- pynutil .delete ('00000000000' ) + graph_ten_thousand_component ,
228- pynutil .delete ('000000000000' ) + graph_thousand_component ,
229- pynutil .delete ('0000000000000' ) + graph_hundred_component ,
230- (pynini .closure (pynutil .delete ('0' )) + graph_all )
227+ graph_thousand_trillions_component = (
228+ graph_digit
229+ + pynutil .insert ('경' )
230+ + pynini .union (
231+ pynini .closure (pynutil .delete ('0' )),
232+ graph_hundred_trillions_component ,
233+ pynutil .delete ('0' ) + graph_ten_trillions_component ,
234+ pynutil .delete ('00' ) + graph_trillion_component ,
235+ pynutil .delete ('000' ) + graph_hundred_billions_component ,
236+ pynutil .delete ('0000' ) + graph_ten_billions_component ,
237+ pynutil .delete ('00000' ) + graph_billions_component ,
238+ pynutil .delete ('000000' ) + graph_thousand_million_component ,
239+ pynutil .delete ('0000000' ) + graph_hundred_million_component ,
240+ pynutil .delete ('00000000' ) + graph_ten_million_component ,
241+ pynutil .delete ('000000000' ) + graph_million_component ,
242+ pynutil .delete ('0000000000' ) + graph_hundred_thousand_component ,
243+ pynutil .delete ('00000000000' ) + graph_ten_thousand_component ,
244+ pynutil .delete ('000000000000' ) + graph_thousand_component ,
245+ pynutil .delete ('0000000000000' ) + graph_hundred_component ,
246+ (pynini .closure (pynutil .delete ('0' )) + graph_all ),
231247 )
232248 )
233249 graph_thousand_trillions = thousand_trillions @ graph_thousand_trillions_component
@@ -254,14 +270,7 @@ def __init__(self, deterministic: bool = True):
254270 ).optimize ()
255271
256272 # Sign and final formatting
257- optional_sign = pynini .closure (
258- pynutil .insert ('negative: "true" ' ) + pynini .cross ("-" , "" ), 0 , 1
259- )
260- final_graph = (
261- optional_sign
262- + pynutil .insert ('integer: "' )
263- + graph_num
264- + pynutil .insert ('"' )
265- )
273+ optional_sign = pynini .closure (pynutil .insert ('negative: "true" ' ) + pynini .cross ("-" , "" ), 0 , 1 )
274+ final_graph = optional_sign + pynutil .insert ('integer: "' ) + graph_num + pynutil .insert ('"' )
266275 final_graph = self .add_tokens (final_graph )
267276 self .fst = final_graph .optimize ()
0 commit comments