Skip to content

Commit d8e057d

Browse files
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
1 parent 374749f commit d8e057d

File tree

14 files changed

+68
-92
lines changed

14 files changed

+68
-92
lines changed

nemo_text_processing/inverse_text_normalization/inverse_normalize.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ def __init__(
135135
from nemo_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import ClassifyFst
136136
from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize_final import (
137137
VerbalizeFinalFst,
138-
)
138+
)
139139

140140
self.tagger = ClassifyFst(
141141
cache_dir=cache_dir, whitelist=whitelist, overwrite_cache=overwrite_cache, input_case=input_case
@@ -180,7 +180,7 @@ def parse_args():
180180
parser.add_argument(
181181
"--language",
182182
help="language",
183-
choices=['en', 'de', 'es', 'pt', 'ru', 'fr', 'sv', 'vi', 'ar', 'es_en', 'zh', 'hi', 'hy', 'mr', 'ja','ko'],
183+
choices=['en', 'de', 'es', 'pt', 'ru', 'fr', 'sv', 'vi', 'ar', 'es_en', 'zh', 'hi', 'hy', 'mr', 'ja', 'ko'],
184184
default="en",
185185
type=str,
186186
)

nemo_text_processing/inverse_text_normalization/ko/clean_eval_data.py

Lines changed: 20 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -282,41 +282,24 @@ def process_address_1(instance: Instance) -> Instance:
282282

283283

284284
filters = []
285-
filters.append(Filter(class_type="CARDINAL",
286-
process_func=process_cardinal_1, filter_func=filter_cardinal_1))
287-
filters.append(Filter(class_type="ORDINAL",
288-
process_func=process_ordinal_1, filter_func=filter_ordinal_1))
289-
filters.append(Filter(class_type="DECIMAL",
290-
process_func=process_decimal_1, filter_func=filter_decimal_1))
291-
filters.append(Filter(class_type="MEASURE",
292-
process_func=process_measure_1, filter_func=filter_measure_1))
293-
filters.append(Filter(class_type="MONEY",
294-
process_func=process_money_1, filter_func=filter_money_1))
295-
filters.append(Filter(class_type="TIME",
296-
process_func=process_time_1, filter_func=filter_time_1))
297-
298-
filters.append(Filter(class_type="DATE",
299-
process_func=process_date_1, filter_func=filter_date_1))
300-
filters.append(Filter(class_type="PLAIN",
301-
process_func=process_plain_1, filter_func=filter_plain_1))
302-
filters.append(Filter(class_type="PUNCT",
303-
process_func=process_punct_1, filter_func=filter_punct_1))
304-
filters.append(Filter(class_type="LETTERS",
305-
process_func=process_letters_1, filter_func=filter_letters_1))
306-
filters.append(Filter(class_type="VERBATIM",
307-
process_func=process_verbatim_1, filter_func=filter_verbatim_1))
308-
filters.append(Filter(class_type="DIGIT",
309-
process_func=process_digit_1, filter_func=filter_digit_1))
310-
filters.append(Filter(class_type="TELEPHONE",
311-
process_func=process_telephone_1, filter_func=filter_telephone_1))
312-
filters.append(Filter(class_type="ELECTRONIC",
313-
process_func=process_electronic_1, filter_func=filter_electronic_1))
314-
filters.append(Filter(class_type="FRACTION",
315-
process_func=process_fraction_1, filter_func=filter_fraction_1))
316-
filters.append(Filter(class_type="ADDRESS",
317-
process_func=process_address_1, filter_func=filter_address_1))
318-
filters.append(Filter(class_type=EOS_TYPE,
319-
process_func=lambda x: x, filter_func=lambda x: True))
285+
filters.append(Filter(class_type="CARDINAL", process_func=process_cardinal_1, filter_func=filter_cardinal_1))
286+
filters.append(Filter(class_type="ORDINAL", process_func=process_ordinal_1, filter_func=filter_ordinal_1))
287+
filters.append(Filter(class_type="DECIMAL", process_func=process_decimal_1, filter_func=filter_decimal_1))
288+
filters.append(Filter(class_type="MEASURE", process_func=process_measure_1, filter_func=filter_measure_1))
289+
filters.append(Filter(class_type="MONEY", process_func=process_money_1, filter_func=filter_money_1))
290+
filters.append(Filter(class_type="TIME", process_func=process_time_1, filter_func=filter_time_1))
291+
292+
filters.append(Filter(class_type="DATE", process_func=process_date_1, filter_func=filter_date_1))
293+
filters.append(Filter(class_type="PLAIN", process_func=process_plain_1, filter_func=filter_plain_1))
294+
filters.append(Filter(class_type="PUNCT", process_func=process_punct_1, filter_func=filter_punct_1))
295+
filters.append(Filter(class_type="LETTERS", process_func=process_letters_1, filter_func=filter_letters_1))
296+
filters.append(Filter(class_type="VERBATIM", process_func=process_verbatim_1, filter_func=filter_verbatim_1))
297+
filters.append(Filter(class_type="DIGIT", process_func=process_digit_1, filter_func=filter_digit_1))
298+
filters.append(Filter(class_type="TELEPHONE", process_func=process_telephone_1, filter_func=filter_telephone_1))
299+
filters.append(Filter(class_type="ELECTRONIC", process_func=process_electronic_1, filter_func=filter_electronic_1))
300+
filters.append(Filter(class_type="FRACTION", process_func=process_fraction_1, filter_func=filter_fraction_1))
301+
filters.append(Filter(class_type="ADDRESS", process_func=process_address_1, filter_func=filter_address_1))
302+
filters.append(Filter(class_type=EOS_TYPE, process_func=lambda x: x, filter_func=lambda x: True))
320303

321304

322305
def filter_loaded_data(data: List[Instance], verbose: bool = False) -> List[Instance]:
@@ -344,10 +327,8 @@ def filter_loaded_data(data: List[Instance], verbose: bool = False) -> List[Inst
344327

345328
def parse_args():
346329
parser = ArgumentParser()
347-
parser.add_argument("--input", help="input file path",
348-
type=str, default='./en_with_types/output-00001-of-00100')
349-
parser.add_argument(
350-
"--verbose", help="print filtered instances", action='store_true')
330+
parser.add_argument("--input", help="input file path", type=str, default='./en_with_types/output-00001-of-00100')
331+
parser.add_argument("--verbose", help="print filtered instances", action='store_true')
351332
return parser.parse_args()
352333

353334

nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_DIGIT, GraphFst, delete_space
2020
from nemo_text_processing.inverse_text_normalization.ko.utils import get_abs_path
2121

22+
2223
class CardinalFst(GraphFst):
2324
"""
2425
Finite state transducer for classifying cardinals
@@ -37,14 +38,14 @@ def __init__(self):
3738

3839
graph_negative = pynini.cross("마이너스", "-")
3940
graph_negative += delete_space
40-
41+
4142
ten = pynutil.delete("십")
4243
ten_alt = pynini.cross("십", "1")
4344
### Responsible for second digit of two digit number. ex) 20's 2
4445
graph_ten_component = pynini.union((graph_digit + ten) | ten_alt, pynutil.insert("0"))
4546
### Responsible for the first digit of number. ex) 1,2,3,4,5,,,
4647
graph_ten_component += graph_digit | pynutil.insert("0")
47-
48+
4849
hundred = pynutil.delete("백")
4950
hundred_alt = pynini.cross("백", "1")
5051
graph_hundred_component = pynini.union(((graph_digit + hundred) | hundred_alt), pynutil.insert("0"))
@@ -59,46 +60,55 @@ def __init__(self):
5960
tenthousand_alt = pynini.cross("만", "1")
6061
### "만" can express next four digits of numbers until the next unit "억", so insert "0000" to allocate four digit worth of space
6162
### From "만", keep adding four digits and graph_thousand_component(0000-9999), because Korean units increase every four digits
62-
graph_tenthousand_component = pynini.union(((graph_thousand_component + tenthousand) | tenthousand_alt), pynutil.insert("0000"))
63+
graph_tenthousand_component = pynini.union(
64+
((graph_thousand_component + tenthousand) | tenthousand_alt), pynutil.insert("0000")
65+
)
6366
graph_tenthousand_component += graph_thousand_component
6467

6568
hundredmillion = pynutil.delete("억")
6669
hundredmillion_alt = pynini.cross("억", "1")
67-
graph_hundredmillion_component = pynini.union(((graph_thousand_component + hundredmillion) | hundredmillion_alt), pynutil.insert("0000"))
68-
graph_hundredmillion_component += graph_tenthousand_component
69-
70+
graph_hundredmillion_component = pynini.union(
71+
((graph_thousand_component + hundredmillion) | hundredmillion_alt), pynutil.insert("0000")
72+
)
73+
graph_hundredmillion_component += graph_tenthousand_component
74+
7075
trillion = pynutil.delete("조")
7176
trillion_alt = pynini.cross("조", "1")
72-
graph_trillion_component = pynini.union(((graph_thousand_component + trillion) | trillion_alt), pynutil.insert("0000"))
77+
graph_trillion_component = pynini.union(
78+
((graph_thousand_component + trillion) | trillion_alt), pynutil.insert("0000")
79+
)
7380
graph_trillion_component += graph_hundredmillion_component
7481

7582
tenquadrillion = pynutil.delete("경")
7683
tenquadrillion_alt = pynini.cross("경", "1")
77-
graph_tenquadrillion_component = pynini.union(((graph_thousand_component + tenquadrillion) | tenquadrillion_alt), pynutil.insert("0000"))
84+
graph_tenquadrillion_component = pynini.union(
85+
((graph_thousand_component + tenquadrillion) | tenquadrillion_alt), pynutil.insert("0000")
86+
)
7887
graph_tenquadrillion_component += graph_trillion_component
7988

80-
8189
graph = pynini.union(
8290
### From biggest unit to smallest, everything is included
83-
graph_tenquadrillion_component|
84-
graph_zero
91+
graph_tenquadrillion_component
92+
| graph_zero
8593
)
8694

8795
leading_zero = (
8896
pynutil.delete(pynini.closure("0")) + pynini.difference(NEMO_DIGIT, "0") + pynini.closure(NEMO_DIGIT)
8997
)
9098
graph_nonzero = graph @ leading_zero
9199
graph = pynini.union(graph_nonzero, graph_zero)
92-
100+
93101
graph = graph @ leading_zero | graph_zero
94102

95103
self.just_cardinals = graph
96104

97-
optional_sign = pynini.closure((pynini.cross("마이너스", 'negative: "-"') | pynini.cross("-", 'negative: "-"')) + delete_space,0, 1)
105+
optional_sign = pynini.closure(
106+
(pynini.cross("마이너스", 'negative: "-"') | pynini.cross("-", 'negative: "-"')) + delete_space, 0, 1
107+
)
98108

99109
final_graph = (
100110
optional_sign + pynutil.insert(" ") + pynutil.insert("integer: \"") + graph + pynutil.insert("\"")
101111
) | (pynutil.insert("integer: \"") + graph + pynutil.insert("\""))
102112

103113
final_graph = self.add_tokens(final_graph)
104-
self.fst = final_graph.optimize()
114+
self.fst = final_graph.optimize()

nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,15 @@
1919
import pynini
2020
from pynini.lib import pynutil
2121

22-
from nemo_text_processing.inverse_text_normalization.ko.taggers.cardinal import CardinalFst
23-
from nemo_text_processing.inverse_text_normalization.ko.taggers.word import WordFst
24-
from nemo_text_processing.inverse_text_normalization.ko.graph_utils import (
22+
from nemo_text_processing.inverse_text_normalization.ko.graph_utils import (
2523
INPUT_LOWER_CASED,
2624
GraphFst,
2725
delete_extra_space,
2826
delete_space,
2927
generator_main,
3028
)
29+
from nemo_text_processing.inverse_text_normalization.ko.taggers.cardinal import CardinalFst
30+
from nemo_text_processing.inverse_text_normalization.ko.taggers.word import WordFst
3131

3232

3333
class ClassifyFst(GraphFst):
@@ -64,13 +64,13 @@ def __init__(
6464
cardinal = CardinalFst()
6565
cardinal_graph = cardinal.fst
6666
word_graph = WordFst().fst
67-
classify = (pynutil.add_weight(cardinal_graph, 1.1)| pynutil.add_weight(word_graph, 100))
68-
67+
classify = pynutil.add_weight(cardinal_graph, 1.1) | pynutil.add_weight(word_graph, 100)
68+
6969
token = pynutil.insert("tokens { ") + classify + pynutil.insert(" } ")
7070
tagger = pynini.closure(token, 1)
7171

7272
self.fst = tagger
7373

7474
if far_file:
7575
generator_main(far_file, {"tokenize_and_classify": self.fst})
76-
logging.info(f"ClassifyFst grammars are saved to {far_file}.")
76+
logging.info(f"ClassifyFst grammars are saved to {far_file}.")

nemo_text_processing/inverse_text_normalization/ko/taggers/word.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,5 @@ class WordFst(GraphFst):
2727

2828
def __init__(self):
2929
super().__init__(name="word", kind="classify")
30-
word = pynutil.insert(
31-
"name: \"") + pynini.closure(NEMO_NOT_SPACE, 1) + pynutil.insert("\"")
30+
word = pynutil.insert("name: \"") + pynini.closure(NEMO_NOT_SPACE, 1) + pynutil.insert("\"")
3231
self.fst = word.optimize()

nemo_text_processing/inverse_text_normalization/ko/utils.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,6 @@
1515
import os
1616

1717

18-
1918
def get_abs_path(rel_path):
2019

2120
return os.path.dirname(os.path.abspath(__file__)) + '/' + rel_path
22-
23-

nemo_text_processing/inverse_text_normalization/ko/verbalizers/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,4 @@
1414

1515
from nemo_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import ClassifyFst
1616
from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize import VerbalizeFst
17-
from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize_final import VerbalizeFinalFst
17+
from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize_final import VerbalizeFinalFst

nemo_text_processing/inverse_text_normalization/ko/verbalizers/cardinal.py

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,7 @@
1515
import pynini
1616
from pynini.lib import pynutil
1717

18-
from nemo_text_processing.inverse_text_normalization.ko.graph_utils import (
19-
NEMO_NOT_QUOTE,
20-
GraphFst,
21-
delete_space,
22-
)
18+
from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space
2319

2420

2521
class CardinalFst(GraphFst):
@@ -34,21 +30,17 @@ def __init__(self):
3430
pynutil.delete("negative:")
3531
+ delete_space
3632
+ pynutil.delete("\"")
37-
+ pynini.accep("-")
33+
+ pynini.accep("-")
3834
+ pynutil.delete("\"")
3935
)
4036

4137
optional_sign_output = pynini.closure(negative_sign + delete_space, 0, 1)
4238

43-
digits_from_tag = pynini.closure(NEMO_NOT_QUOTE, 1)
39+
digits_from_tag = pynini.closure(NEMO_NOT_QUOTE, 1)
4440
integer_cardinal = (
45-
pynutil.delete("integer:")
46-
+ delete_space
47-
+ pynutil.delete("\"")
48-
+ digits_from_tag
49-
+ pynutil.delete("\"")
41+
pynutil.delete("integer:") + delete_space + pynutil.delete("\"") + digits_from_tag + pynutil.delete("\"")
5042
)
5143

5244
graph = integer_cardinal
5345
final_graph = optional_sign_output + graph
54-
self.fst = self.delete_tokens(final_graph).optimize()
46+
self.fst = self.delete_tokens(final_graph).optimize()

nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515

16+
from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst
1617
from nemo_text_processing.inverse_text_normalization.ko.verbalizers.cardinal import CardinalFst
1718
from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
18-
from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst
1919

2020

2121
class VerbalizeFst(GraphFst):
@@ -30,7 +30,6 @@ def __init__(self):
3030
cardinal = CardinalFst()
3131
cardinal_graph = cardinal.fst
3232
word_graph = WordFst().fst
33-
34-
graph = (cardinal_graph|word_graph)
33+
34+
graph = cardinal_graph | word_graph
3535
self.fst = graph
36-

nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,17 @@
1818
import pynini
1919
from pynini.lib import pynutil
2020

21+
from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, delete_space, generator_main
2122
from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize import VerbalizeFst
2223
from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
23-
from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, generator_main, delete_space
2424

2525

2626
class VerbalizeFinalFst(GraphFst):
2727
"""
2828
Finite state transducer that verbalizes an entire sentence, e.g.
2929
tokens { name: "its" } tokens { time { hours: "12" minutes: "30" } } tokens { name: "now" } -> its 12:30 now
3030
"""
31+
3132
def __init__(self, deterministic: bool = True, cache_dir: str = None, overwrite_cache: bool = False):
3233
super().__init__(name="verbalize_final", kind="verbalize", deterministic=deterministic)
3334
far_file = None

0 commit comments

Comments
 (0)