@@ -29,7 +29,7 @@ def __init__(self, deterministic: bool = False):
2929 deterministic: if True will provide a single transduction option,
3030 for False multiple transduction are generated (used for audio-based normalization)
3131 """
32- super ().__init__ (' cardinal' , ordertype = "en_tn" )
32+ super ().__init__ (" cardinal" , ordertype = "en_tn" )
3333 self .deterministic = deterministic
3434 self .build_tagger ()
3535 self .build_verbalizer ()
@@ -41,123 +41,151 @@ def build_tagger(self):
4141 """
4242 # TODO replace to have "oh" as a default for "0"
4343 graph = pynini .Far (
44- get_abs_path (
45- "english/data/number/cardinal_number_name.far" ) ).get_fst ()
44+ get_abs_path ("english/data/number/cardinal_number_name.far" )
45+ ).get_fst ()
4646 graph_au = pynini .Far (
47- get_abs_path (
48- "english/data/number/cardinal_number_name_au.far" ) ).get_fst ()
47+ get_abs_path ("english/data/number/cardinal_number_name_au.far" )
48+ ).get_fst ()
4949 self .graph_hundred_component_at_least_one_none_zero_digit = (
5050 pynini .closure (self .DIGIT , 2 , 3 )
51- | pynini .difference (self .DIGIT , pynini .accep ("0" ))) @ graph
51+ | pynini .difference (self .DIGIT , pynini .accep ("0" ))
52+ ) @ graph
5253
53- graph_digit = pynini .string_file (
54- get_abs_path ("english/data/number/digit.tsv" ))
55- graph_zero = pynini .string_file (
56- get_abs_path ("english/data/number/zero.tsv" ))
54+ graph_digit = pynini .string_file (get_abs_path ("english/data/number/digit.tsv" ))
55+ graph_zero = pynini .string_file (get_abs_path ("english/data/number/zero.tsv" ))
5756
5857 single_digits_graph = pynini .invert (graph_digit | graph_zero )
59- self .single_digits_graph = single_digits_graph + pynini .closure (
60- self .INSERT_SPACE + single_digits_graph )
58+ self .single_digits_graph = (
59+ single_digits_graph + (self .INSERT_SPACE + single_digits_graph ).star
60+ )
6161
6262 if not self .deterministic :
6363 # for a single token allow only the same normalization
6464 # "007" -> {"oh oh seven", "zero zero seven"} not {"oh zero seven"}
6565 single_digits_graph_zero = pynini .invert (graph_digit | graph_zero )
6666 single_digits_graph_oh = pynini .invert (graph_digit ) | pynini .cross (
67- "0" , "oh" )
68-
69- self .single_digits_graph = single_digits_graph_zero + pynini .closure (
70- self .INSERT_SPACE + single_digits_graph_zero )
71- self .single_digits_graph |= single_digits_graph_oh + pynini .closure (
72- self .INSERT_SPACE + single_digits_graph_oh )
73-
74- single_digits_graph_with_commas = pynini .closure (
75- self .single_digits_graph + self .INSERT_SPACE , 1 ,
76- 3 ) + pynini .closure (
77- pynutil .delete ("," ) + single_digits_graph +
78- self .INSERT_SPACE + single_digits_graph +
79- self .INSERT_SPACE + single_digits_graph , 1 )
80-
81- graph = (pynini .closure (self .DIGIT , 1 , 3 ) +
82- (pynini .closure (pynutil .delete ("," ) + self .DIGIT ** 3 )
83- | pynini .closure (self .DIGIT ** 3 ))) @ graph
67+ "0" , "oh"
68+ )
69+
70+ self .single_digits_graph = (
71+ single_digits_graph_zero
72+ + (self .INSERT_SPACE + single_digits_graph_zero ).star
73+ )
74+ self .single_digits_graph |= (
75+ single_digits_graph_oh
76+ + (self .INSERT_SPACE + single_digits_graph_oh ).star
77+ )
78+
79+ single_digits_graph_with_commas = (
80+ pynini .closure (self .single_digits_graph + self .INSERT_SPACE , 1 , 3 )
81+ + (
82+ pynutil .delete ("," )
83+ + single_digits_graph
84+ + self .INSERT_SPACE
85+ + single_digits_graph
86+ + self .INSERT_SPACE
87+ + single_digits_graph
88+ ).plus
89+ )
90+
91+ graph = (
92+ pynini .closure (self .DIGIT , 1 , 3 )
93+ + ((pynutil .delete ("," ) + self .DIGIT ** 3 ).star | (self .DIGIT ** 3 ).star )
94+ ) @ graph
8495
8596 self .graph = graph
8697 self .graph_with_and = self .add_optional_and (graph )
8798
8899 if self .deterministic :
89- long_numbers = pynini .compose (self .DIGIT ** (5 , ...),
90- self .single_digits_graph ).optimize ()
100+ long_numbers = pynini .compose (
101+ self .DIGIT ** (5 , ...), self .single_digits_graph
102+ ).optimize ()
91103 self .long_numbers = plurals ._priority_union (
92- long_numbers , self .graph_with_and ,
93- pynini . closure ( self . VCHAR ) ).optimize ()
104+ long_numbers , self .graph_with_and , self . VCHAR . star
105+ ).optimize ()
94106 cardinal_with_leading_zeros = pynini .compose (
95- pynini .accep ("0" ) + pynini . closure ( self .DIGIT ),
96- self . single_digits_graph )
107+ pynini .accep ("0" ) + self .DIGIT . star , self . single_digits_graph
108+ )
97109 final_graph = self .long_numbers | cardinal_with_leading_zeros
98110 final_graph |= self .add_optional_and (graph_au )
99111 else :
100112 leading_zeros = pynini .compose (
101- pynini .closure (pynini .accep ("0" ), 1 ), self .single_digits_graph )
113+ pynini .accep ("0" ).plus , self .single_digits_graph
114+ )
102115 cardinal_with_leading_zeros = (
103- leading_zeros + self .INSERT_SPACE + pynini .compose (
104- pynini .closure (self .DIGIT ), self .graph_with_and ))
116+ leading_zeros
117+ + self .INSERT_SPACE
118+ + pynini .compose (self .DIGIT .star , self .graph_with_and )
119+ )
105120 self .long_numbers = self .graph_with_and | pynutil .add_weight (
106- self .single_digits_graph , 0.0001 )
121+ self .single_digits_graph , 0.0001
122+ )
107123 # add small weight to non-default graphs to make sure the deterministic option is listed first
108- final_graph = (self .long_numbers
109- | pynutil .add_weight (
110- single_digits_graph_with_commas , 0.0001 )
111- | cardinal_with_leading_zeros ).optimize ()
124+ final_graph = (
125+ self .long_numbers
126+ | pynutil .add_weight (single_digits_graph_with_commas , 0.0001 )
127+ | cardinal_with_leading_zeros
128+ ).optimize ()
112129
113130 one_to_a_replacement_graph = (
114131 pynini .cross ("one hundred" , "a hundred" )
115132 | pynini .cross ("one thousand" , "thousand" )
116- | pynini .cross ("one million" , "a million" ))
133+ | pynini .cross ("one million" , "a million" )
134+ )
117135 final_graph |= pynini .compose (
118- final_graph ,
119- one_to_a_replacement_graph .optimize () +
120- pynini .closure (self .VCHAR )).optimize ()
136+ final_graph , one_to_a_replacement_graph .optimize () + self .VCHAR .star
137+ ).optimize ()
121138 # remove commas for 4 digits numbers
122139 four_digit_comma_graph = (
123- self .DIGIT - "0" ) + pynutil .delete ("," ) + self .DIGIT ** 3
124- final_graph |= pynini .compose (four_digit_comma_graph .optimize (),
125- final_graph ).optimize ()
140+ (self .DIGIT - "0" ) + pynutil .delete ("," ) + self .DIGIT ** 3
141+ )
142+ final_graph |= pynini .compose (
143+ four_digit_comma_graph .optimize (), final_graph
144+ ).optimize ()
126145
127146 self .final_graph = final_graph
128- optional_minus_graph = pynini .closure (
129- pynutil .insert ("negative: " ) + pynini .cross ("-" , "\" true\" " ), 0 ,
130- 1 )
131- final_graph = optional_minus_graph + pynutil .insert (
132- "integer: \" " ) + final_graph + pynutil .insert ("\" " )
147+ optional_minus_graph = (
148+ pynutil .insert ("negative: " ) + pynini .cross ("-" , '"true" ' )
149+ ).ques
150+ final_graph = (
151+ optional_minus_graph
152+ + pynutil .insert ('integer: "' )
153+ + final_graph
154+ + pynutil .insert ('"' )
155+ )
133156 final_graph = self .add_tokens (final_graph )
134157 self .tagger = final_graph .optimize ()
135158
136159 def add_optional_and (self , graph ):
137160 graph_with_and = graph
138161
139162 graph_with_and = pynutil .add_weight (graph , 0.00001 )
140- not_quote = pynini . closure ( self .NOT_QUOTE )
163+ not_quote = self .NOT_QUOTE . star
141164 no_thousand_million = pynini .difference (
142- not_quote , not_quote + pynini .union ("thousand" , "million" ) +
143- not_quote ).optimize ()
144- integer = (not_quote + pynutil .add_weight (
145- pynini .cross ("hundred " , "hundred and " ) + no_thousand_million ,
146- - 0.0001 )).optimize ()
165+ not_quote , not_quote + pynini .union ("thousand" , "million" ) + not_quote
166+ ).optimize ()
167+ integer = (
168+ not_quote
169+ + pynutil .add_weight (
170+ pynini .cross ("hundred " , "hundred and " ) + no_thousand_million , - 0.0001
171+ )
172+ ).optimize ()
147173
148174 no_hundred = pynini .difference (
149- pynini .closure (self .VCHAR ),
150- not_quote + pynini .accep ("hundred" ) + not_quote ).optimize ()
151- integer |= (not_quote + pynutil .add_weight (
152- pynini .cross ("thousand " , "thousand and " ) + no_hundred ,
153- - 0.0001 )).optimize ()
154-
155- optional_hundred = pynini .compose ((self .DIGIT - "0" )** 3 ,
156- graph ).optimize ()
175+ self .VCHAR .star , not_quote + pynini .accep ("hundred" ) + not_quote
176+ ).optimize ()
177+ integer |= (
178+ not_quote
179+ + pynutil .add_weight (
180+ pynini .cross ("thousand " , "thousand and " ) + no_hundred , - 0.0001
181+ )
182+ ).optimize ()
183+
184+ optional_hundred = pynini .compose ((self .DIGIT - "0" ) ** 3 , graph ).optimize ()
157185 optional_hundred = pynini .compose (
158186 optional_hundred ,
159- pynini . closure ( self .VCHAR ) + pynini .cross (" hundred" , "" ) +
160- pynini . closure ( self . VCHAR ) )
187+ self .VCHAR . star + pynini .cross (" hundred" , "" ) + self . VCHAR . star ,
188+ )
161189 graph_with_and |= pynini .compose (graph , integer ).optimize ()
162190 graph_with_and |= optional_hundred
163191 return graph_with_and
@@ -167,18 +195,18 @@ def build_verbalizer(self):
167195 Finite state transducer for verbalizing cardinal, e.g.
168196 cardinal { negative: "true" integer: "23" } -> minus twenty three
169197 """
170- optional_sign = pynini .cross (" negative: \ " true\" " , "minus " )
198+ optional_sign = pynini .cross (' negative: "true"' , "minus " )
171199 if not self .deterministic :
172- optional_sign |= pynini .cross (" negative: \ " true\" " , "negative " )
173- optional_sign |= pynini .cross (" negative: \ " true\" " , "dash " )
200+ optional_sign |= pynini .cross (' negative: "true"' , "negative " )
201+ optional_sign |= pynini .cross (' negative: "true"' , "dash " )
174202
175- self .optional_sign = pynini .closure (optional_sign + self .DELETE_SPACE ,
176- 0 , 1 )
203+ self .optional_sign = (optional_sign + self .DELETE_SPACE ).ques
177204
178- integer = pynini . closure ( self .NOT_QUOTE )
205+ integer = self .NOT_QUOTE . star
179206
180- self .integer = self .DELETE_SPACE + pynutil .delete (
181- "\" " ) + integer + pynutil .delete ("\" " )
207+ self .integer = (
208+ self .DELETE_SPACE + pynutil .delete ('"' ) + integer + pynutil .delete ('"' )
209+ )
182210 integer = pynutil .delete ("integer:" ) + self .integer
183211
184212 self .numbers = self .optional_sign + integer
0 commit comments