Skip to content

Commit d1e2090

Browse files
authored
[tn] use perl-style expressions (#277)
* [tn] use perl-style expressions * [tn] format code with black
1 parent a8efdf7 commit d1e2090

File tree

16 files changed

+1509
-946
lines changed

16 files changed

+1509
-946
lines changed

tn/english/rules/cardinal.py

Lines changed: 107 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def __init__(self, deterministic: bool = False):
2929
deterministic: if True will provide a single transduction option,
3030
for False multiple transduction are generated (used for audio-based normalization)
3131
"""
32-
super().__init__('cardinal', ordertype="en_tn")
32+
super().__init__("cardinal", ordertype="en_tn")
3333
self.deterministic = deterministic
3434
self.build_tagger()
3535
self.build_verbalizer()
@@ -41,123 +41,151 @@ def build_tagger(self):
4141
"""
4242
# TODO replace to have "oh" as a default for "0"
4343
graph = pynini.Far(
44-
get_abs_path(
45-
"english/data/number/cardinal_number_name.far")).get_fst()
44+
get_abs_path("english/data/number/cardinal_number_name.far")
45+
).get_fst()
4646
graph_au = pynini.Far(
47-
get_abs_path(
48-
"english/data/number/cardinal_number_name_au.far")).get_fst()
47+
get_abs_path("english/data/number/cardinal_number_name_au.far")
48+
).get_fst()
4949
self.graph_hundred_component_at_least_one_none_zero_digit = (
5050
pynini.closure(self.DIGIT, 2, 3)
51-
| pynini.difference(self.DIGIT, pynini.accep("0"))) @ graph
51+
| pynini.difference(self.DIGIT, pynini.accep("0"))
52+
) @ graph
5253

53-
graph_digit = pynini.string_file(
54-
get_abs_path("english/data/number/digit.tsv"))
55-
graph_zero = pynini.string_file(
56-
get_abs_path("english/data/number/zero.tsv"))
54+
graph_digit = pynini.string_file(get_abs_path("english/data/number/digit.tsv"))
55+
graph_zero = pynini.string_file(get_abs_path("english/data/number/zero.tsv"))
5756

5857
single_digits_graph = pynini.invert(graph_digit | graph_zero)
59-
self.single_digits_graph = single_digits_graph + pynini.closure(
60-
self.INSERT_SPACE + single_digits_graph)
58+
self.single_digits_graph = (
59+
single_digits_graph + (self.INSERT_SPACE + single_digits_graph).star
60+
)
6161

6262
if not self.deterministic:
6363
# for a single token allow only the same normalization
6464
# "007" -> {"oh oh seven", "zero zero seven"} not {"oh zero seven"}
6565
single_digits_graph_zero = pynini.invert(graph_digit | graph_zero)
6666
single_digits_graph_oh = pynini.invert(graph_digit) | pynini.cross(
67-
"0", "oh")
68-
69-
self.single_digits_graph = single_digits_graph_zero + pynini.closure(
70-
self.INSERT_SPACE + single_digits_graph_zero)
71-
self.single_digits_graph |= single_digits_graph_oh + pynini.closure(
72-
self.INSERT_SPACE + single_digits_graph_oh)
73-
74-
single_digits_graph_with_commas = pynini.closure(
75-
self.single_digits_graph + self.INSERT_SPACE, 1,
76-
3) + pynini.closure(
77-
pynutil.delete(",") + single_digits_graph +
78-
self.INSERT_SPACE + single_digits_graph +
79-
self.INSERT_SPACE + single_digits_graph, 1)
80-
81-
graph = (pynini.closure(self.DIGIT, 1, 3) +
82-
(pynini.closure(pynutil.delete(",") + self.DIGIT**3)
83-
| pynini.closure(self.DIGIT**3))) @ graph
67+
"0", "oh"
68+
)
69+
70+
self.single_digits_graph = (
71+
single_digits_graph_zero
72+
+ (self.INSERT_SPACE + single_digits_graph_zero).star
73+
)
74+
self.single_digits_graph |= (
75+
single_digits_graph_oh
76+
+ (self.INSERT_SPACE + single_digits_graph_oh).star
77+
)
78+
79+
single_digits_graph_with_commas = (
80+
pynini.closure(self.single_digits_graph + self.INSERT_SPACE, 1, 3)
81+
+ (
82+
pynutil.delete(",")
83+
+ single_digits_graph
84+
+ self.INSERT_SPACE
85+
+ single_digits_graph
86+
+ self.INSERT_SPACE
87+
+ single_digits_graph
88+
).plus
89+
)
90+
91+
graph = (
92+
pynini.closure(self.DIGIT, 1, 3)
93+
+ ((pynutil.delete(",") + self.DIGIT**3).star | (self.DIGIT**3).star)
94+
) @ graph
8495

8596
self.graph = graph
8697
self.graph_with_and = self.add_optional_and(graph)
8798

8899
if self.deterministic:
89-
long_numbers = pynini.compose(self.DIGIT**(5, ...),
90-
self.single_digits_graph).optimize()
100+
long_numbers = pynini.compose(
101+
self.DIGIT ** (5, ...), self.single_digits_graph
102+
).optimize()
91103
self.long_numbers = plurals._priority_union(
92-
long_numbers, self.graph_with_and,
93-
pynini.closure(self.VCHAR)).optimize()
104+
long_numbers, self.graph_with_and, self.VCHAR.star
105+
).optimize()
94106
cardinal_with_leading_zeros = pynini.compose(
95-
pynini.accep("0") + pynini.closure(self.DIGIT),
96-
self.single_digits_graph)
107+
pynini.accep("0") + self.DIGIT.star, self.single_digits_graph
108+
)
97109
final_graph = self.long_numbers | cardinal_with_leading_zeros
98110
final_graph |= self.add_optional_and(graph_au)
99111
else:
100112
leading_zeros = pynini.compose(
101-
pynini.closure(pynini.accep("0"), 1), self.single_digits_graph)
113+
pynini.accep("0").plus, self.single_digits_graph
114+
)
102115
cardinal_with_leading_zeros = (
103-
leading_zeros + self.INSERT_SPACE + pynini.compose(
104-
pynini.closure(self.DIGIT), self.graph_with_and))
116+
leading_zeros
117+
+ self.INSERT_SPACE
118+
+ pynini.compose(self.DIGIT.star, self.graph_with_and)
119+
)
105120
self.long_numbers = self.graph_with_and | pynutil.add_weight(
106-
self.single_digits_graph, 0.0001)
121+
self.single_digits_graph, 0.0001
122+
)
107123
# add small weight to non-default graphs to make sure the deterministic option is listed first
108-
final_graph = (self.long_numbers
109-
| pynutil.add_weight(
110-
single_digits_graph_with_commas, 0.0001)
111-
| cardinal_with_leading_zeros).optimize()
124+
final_graph = (
125+
self.long_numbers
126+
| pynutil.add_weight(single_digits_graph_with_commas, 0.0001)
127+
| cardinal_with_leading_zeros
128+
).optimize()
112129

113130
one_to_a_replacement_graph = (
114131
pynini.cross("one hundred", "a hundred")
115132
| pynini.cross("one thousand", "thousand")
116-
| pynini.cross("one million", "a million"))
133+
| pynini.cross("one million", "a million")
134+
)
117135
final_graph |= pynini.compose(
118-
final_graph,
119-
one_to_a_replacement_graph.optimize() +
120-
pynini.closure(self.VCHAR)).optimize()
136+
final_graph, one_to_a_replacement_graph.optimize() + self.VCHAR.star
137+
).optimize()
121138
# remove commas for 4 digits numbers
122139
four_digit_comma_graph = (
123-
self.DIGIT - "0") + pynutil.delete(",") + self.DIGIT**3
124-
final_graph |= pynini.compose(four_digit_comma_graph.optimize(),
125-
final_graph).optimize()
140+
(self.DIGIT - "0") + pynutil.delete(",") + self.DIGIT**3
141+
)
142+
final_graph |= pynini.compose(
143+
four_digit_comma_graph.optimize(), final_graph
144+
).optimize()
126145

127146
self.final_graph = final_graph
128-
optional_minus_graph = pynini.closure(
129-
pynutil.insert("negative: ") + pynini.cross("-", "\"true\" "), 0,
130-
1)
131-
final_graph = optional_minus_graph + pynutil.insert(
132-
"integer: \"") + final_graph + pynutil.insert("\"")
147+
optional_minus_graph = (
148+
pynutil.insert("negative: ") + pynini.cross("-", '"true" ')
149+
).ques
150+
final_graph = (
151+
optional_minus_graph
152+
+ pynutil.insert('integer: "')
153+
+ final_graph
154+
+ pynutil.insert('"')
155+
)
133156
final_graph = self.add_tokens(final_graph)
134157
self.tagger = final_graph.optimize()
135158

136159
def add_optional_and(self, graph):
137160
graph_with_and = graph
138161

139162
graph_with_and = pynutil.add_weight(graph, 0.00001)
140-
not_quote = pynini.closure(self.NOT_QUOTE)
163+
not_quote = self.NOT_QUOTE.star
141164
no_thousand_million = pynini.difference(
142-
not_quote, not_quote + pynini.union("thousand", "million") +
143-
not_quote).optimize()
144-
integer = (not_quote + pynutil.add_weight(
145-
pynini.cross("hundred ", "hundred and ") + no_thousand_million,
146-
-0.0001)).optimize()
165+
not_quote, not_quote + pynini.union("thousand", "million") + not_quote
166+
).optimize()
167+
integer = (
168+
not_quote
169+
+ pynutil.add_weight(
170+
pynini.cross("hundred ", "hundred and ") + no_thousand_million, -0.0001
171+
)
172+
).optimize()
147173

148174
no_hundred = pynini.difference(
149-
pynini.closure(self.VCHAR),
150-
not_quote + pynini.accep("hundred") + not_quote).optimize()
151-
integer |= (not_quote + pynutil.add_weight(
152-
pynini.cross("thousand ", "thousand and ") + no_hundred,
153-
-0.0001)).optimize()
154-
155-
optional_hundred = pynini.compose((self.DIGIT - "0")**3,
156-
graph).optimize()
175+
self.VCHAR.star, not_quote + pynini.accep("hundred") + not_quote
176+
).optimize()
177+
integer |= (
178+
not_quote
179+
+ pynutil.add_weight(
180+
pynini.cross("thousand ", "thousand and ") + no_hundred, -0.0001
181+
)
182+
).optimize()
183+
184+
optional_hundred = pynini.compose((self.DIGIT - "0") ** 3, graph).optimize()
157185
optional_hundred = pynini.compose(
158186
optional_hundred,
159-
pynini.closure(self.VCHAR) + pynini.cross(" hundred", "") +
160-
pynini.closure(self.VCHAR))
187+
self.VCHAR.star + pynini.cross(" hundred", "") + self.VCHAR.star,
188+
)
161189
graph_with_and |= pynini.compose(graph, integer).optimize()
162190
graph_with_and |= optional_hundred
163191
return graph_with_and
@@ -167,18 +195,18 @@ def build_verbalizer(self):
167195
Finite state transducer for verbalizing cardinal, e.g.
168196
cardinal { negative: "true" integer: "23" } -> minus twenty three
169197
"""
170-
optional_sign = pynini.cross("negative: \"true\"", "minus ")
198+
optional_sign = pynini.cross('negative: "true"', "minus ")
171199
if not self.deterministic:
172-
optional_sign |= pynini.cross("negative: \"true\"", "negative ")
173-
optional_sign |= pynini.cross("negative: \"true\"", "dash ")
200+
optional_sign |= pynini.cross('negative: "true"', "negative ")
201+
optional_sign |= pynini.cross('negative: "true"', "dash ")
174202

175-
self.optional_sign = pynini.closure(optional_sign + self.DELETE_SPACE,
176-
0, 1)
203+
self.optional_sign = (optional_sign + self.DELETE_SPACE).ques
177204

178-
integer = pynini.closure(self.NOT_QUOTE)
205+
integer = self.NOT_QUOTE.star
179206

180-
self.integer = self.DELETE_SPACE + pynutil.delete(
181-
"\"") + integer + pynutil.delete("\"")
207+
self.integer = (
208+
self.DELETE_SPACE + pynutil.delete('"') + integer + pynutil.delete('"')
209+
)
182210
integer = pynutil.delete("integer:") + self.integer
183211

184212
self.numbers = self.optional_sign + integer

0 commit comments

Comments
 (0)