@@ -46,9 +46,12 @@ def make_million(number: str, non_zero_no_one: 'pynini.FstLike', deterministic:
4646 for one in ["en" , "ett" ]:
4747 graph |= pynutil .add_weight (pynini .cross ("001" , f"{ one } { number } " ), - 0.001 )
4848 graph |= pynutil .add_weight (pynini .cross ("001" , f"{ one } { old_orth } " ), - 0.001 )
49+ graph |= pynutil .add_weight (pynini .cross ("001" , f"{ one } { number } " ), - 0.001 )
50+ graph |= pynutil .add_weight (pynini .cross ("001" , f"{ one } { old_orth } " ), - 0.001 )
4951 graph |= non_zero_no_one + pynutil .insert (f" { number } er" )
5052 if not deterministic :
5153 graph |= pynutil .add_weight (non_zero_no_one + pynutil .insert (f" { old_orth } er" ), - 0.001 )
54+ graph |= pynutil .add_weight (non_zero_no_one + pynutil .insert (f"{ old_orth } er" ), - 0.001 )
5255 graph |= pynutil .delete ("000" )
5356 graph += insert_space
5457 return graph
@@ -100,17 +103,17 @@ def __init__(self, deterministic: bool = True):
100103 digit = pynini .invert (pynini .string_file (get_abs_path ("data/numbers/digit.tsv" )))
101104 teen = pynini .invert (pynini .string_file (get_abs_path ("data/numbers/teen.tsv" )))
102105 ties = pynini .invert (pynini .string_file (get_abs_path ("data/numbers/ties.tsv" )))
103- ett_to_en = pynini .string_map ([( "ett" , "en" )] )
106+ ett_to_en = pynini .cross ( "ett" , "en" )
104107 ties_alt_endings = pynini .string_map ([("go" , "gi" ), ("tio" , "ti" )])
105108
106109 # Any single digit
107110 graph_digit = digit
108111 digits_no_one = (NEMO_DIGIT - "1" ) @ graph_digit
109- both_ones = pynini .cross ("1" , "en" ) | pynini .cross ("1" , "ett" )
110112 if deterministic :
111113 final_digit = digit
112114 else :
113- final_digit = digits_no_one | both_ones
115+ final_digit = digit | pynini .cross ("1" , "en" )
116+ graph_digit = final_digit
114117 self .digit = final_digit
115118
116119 single_digits_graph = graph_digit | zero
@@ -131,14 +134,13 @@ def __init__(self, deterministic: bool = True):
131134 else :
132135 graph_tens |= pynutil .add_weight (pynini .cross ("18" , "aderton" ), - 0.001 )
133136 graph_tens |= pynutil .add_weight (
134- graph_ties + (pynutil .delete ('0' ) | (graph_digit | pynutil . insert ( ' ' ) + graph_digit )), - 0.001
137+ graph_ties + (pynutil .delete ('0' ) | (graph_digit | insert_space + graph_digit )), - 0.001
135138 )
136139
137140 hundreds = digits_no_one + pynutil .insert ("hundra" )
138141 hundreds |= pynini .cross ("1" , "hundra" )
139142 if not deterministic :
140143 hundreds |= pynutil .add_weight (pynini .cross ("1" , "etthundra" ), - 0.001 )
141- hundreds |= pynutil .add_weight (pynini .cross ("1" , "ett hundra" ), - 0.001 )
142144 hundreds |= pynutil .add_weight (digit + pynutil .insert (NEMO_SPACE ) + pynutil .insert ("hundra" ), - 0.001 )
143145
144146 self .tens = graph_tens .optimize ()
@@ -180,6 +182,7 @@ def __init__(self, deterministic: bool = True):
180182 graph_hundreds_component_at_least_one_non_zero_digit = graph_hundreds_component | (
181183 pynutil .delete ("00" ) + graph_digit
182184 )
185+
183186 graph_hundreds_component_at_least_one_non_zero_digit_no_one = graph_hundreds_component | (
184187 pynutil .delete ("00" ) + digits_no_one
185188 )
@@ -192,18 +195,21 @@ def __init__(self, deterministic: bool = True):
192195 if not deterministic :
193196 tusen |= pynutil .add_weight (pynutil .insert (" tusen" ), - 0.001 )
194197 etttusen = tusen
195- etttusen |= pynutil .add_weight (pynutil .insert ("ettusen " ), - 0.001 )
196- etttusen |= pynutil .add_weight (pynutil .insert (" ettusen " ), - 0.001 )
198+ etttusen |= pynutil .add_weight (pynutil .insert ("etttusen " ), - 0.001 )
199+ etttusen |= pynutil .add_weight (pynutil .insert (" etttusen " ), - 0.001 )
197200 etttusen |= pynutil .add_weight (pynutil .insert ("ett tusen" ), - 0.001 )
198201 etttusen |= pynutil .add_weight (pynutil .insert (" ett tusen" ), - 0.001 )
199202
203+ following_hundred = insert_space + graph_hundreds_component_at_least_one_non_zero_digit
204+ if not deterministic :
205+ following_hundred |= graph_hundreds_component_at_least_one_non_zero_digit
206+
200207 graph_thousands_component_at_least_one_non_zero_digit = pynini .union (
201208 pynutil .delete ("000" ) + graph_hundreds_component_at_least_one_non_zero_digit ,
202209 graph_hundreds_component_at_least_one_non_zero_digit_no_one
203210 + tusen
204- + ((insert_space + graph_hundreds_component_at_least_one_non_zero_digit ) | pynutil .delete ("000" )),
205- pynini .cross ("001" , etttusen )
206- + ((insert_space + graph_hundreds_component_at_least_one_non_zero_digit ) | pynutil .delete ("000" )),
211+ + (following_hundred | pynutil .delete ("000" )),
212+ pynini .cross ("001" , etttusen ) + (following_hundred | pynutil .delete ("000" )),
207213 )
208214 self .graph_thousands_component_at_least_one_non_zero_digit = (
209215 graph_thousands_component_at_least_one_non_zero_digit .optimize ()
@@ -213,9 +219,8 @@ def __init__(self, deterministic: bool = True):
213219 pynutil .delete ("000" ) + graph_hundreds_component_at_least_one_non_zero_digit_no_one ,
214220 graph_hundreds_component_at_least_one_non_zero_digit_no_one
215221 + tusen
216- + ((insert_space + graph_hundreds_component_at_least_one_non_zero_digit ) | pynutil .delete ("000" )),
217- pynini .cross ("001" , etttusen )
218- + ((insert_space + graph_hundreds_component_at_least_one_non_zero_digit ) | pynutil .delete ("000" )),
222+ + (following_hundred | pynutil .delete ("000" )),
223+ pynini .cross ("001" , etttusen ) + (following_hundred | pynutil .delete ("000" )),
219224 )
220225 self .graph_thousands_component_at_least_one_non_zero_digit_no_one = (
221226 graph_thousands_component_at_least_one_non_zero_digit_no_one .optimize ()
@@ -326,11 +331,19 @@ def __init__(self, deterministic: bool = True):
326331
327332 self .graph |= zero
328333
334+ self .graph_unfiltered = self .graph
329335 self .graph = filter_punctuation (self .graph ).optimize ()
330336 self .graph_en = self .graph @ pynini .cdrewrite (ett_to_en , "" , "[EOS]" , NEMO_SIGMA )
331337 self .graph_no_one = (pynini .project (self .graph , "input" ) - "1" ) @ self .graph
332338 self .graph_no_one_en = (pynini .project (self .graph_en , "input" ) - "1" ) @ self .graph_en
333339
340+ joiner_chars = pynini .union ("-" , "–" , "—" )
341+ joiner = pynini .cross (joiner_chars , " till " )
342+ self .range = self .graph + joiner + self .graph
343+ if not deterministic :
344+ either_one = self .graph | self .graph_en
345+ self .range = either_one + joiner + either_one
346+
334347 optional_minus_graph = pynini .closure (pynutil .insert ("negative: " ) + pynini .cross ("-" , "\" true\" " ), 0 , 1 )
335348
336349 final_graph = optional_minus_graph + pynutil .insert ("integer: \" " ) + self .graph + pynutil .insert ("\" " )
0 commit comments