Skip to content

Commit a5164dc

Browse files
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
1 parent 41ac59d commit a5164dc

12 files changed

Lines changed: 43 additions & 50 deletions

File tree

nemo_text_processing/inverse_text_normalization/inverse_normalize.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ def __init__(
135135
from nemo_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import ClassifyFst
136136
from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize_final import (
137137
VerbalizeFinalFst,
138-
)
138+
)
139139

140140
self.tagger = ClassifyFst(
141141
cache_dir=cache_dir, whitelist=whitelist, overwrite_cache=overwrite_cache, input_case=input_case
@@ -180,7 +180,7 @@ def parse_args():
180180
parser.add_argument(
181181
"--language",
182182
help="language",
183-
choices=['en', 'de', 'es', 'pt', 'ru', 'fr', 'sv', 'vi', 'ar', 'es_en', 'zh', 'hi', 'hy', 'mr', 'ja','ko'],
183+
choices=['en', 'de', 'es', 'pt', 'ru', 'fr', 'sv', 'vi', 'ar', 'es_en', 'zh', 'hi', 'hy', 'mr', 'ja', 'ko'],
184184
default="en",
185185
type=str,
186186
)

nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_DIGIT, GraphFst, delete_space
2020
from nemo_text_processing.inverse_text_normalization.ko.utils import get_abs_path
2121

22+
2223
class CardinalFst(GraphFst):
2324
"""
2425
Finite state transducer for classifying cardinals
@@ -40,7 +41,7 @@ def __init__(self):
4041
graph_ten_component = pynini.union((graph_digit + ten) | ten_alt, pynutil.insert("0"))
4142
### Responsible for the first digit of number. ex) 1,2,3,4,5,,,
4243
graph_ten_component += graph_digit | pynutil.insert("0")
43-
44+
4445
hundred = pynutil.delete("백")
4546
hundred_alt = pynini.cross("백", "1")
4647
graph_hundred_component = pynini.union(((graph_digit + hundred) | hundred_alt), pynutil.insert("0"))
@@ -55,46 +56,55 @@ def __init__(self):
5556
tenthousand_alt = pynini.cross("만", "1")
5657
### "만" can express next four digits of numbers until the next unit "억", so insert "0000" to allocate four digit worth of space
5758
### From "만", keep adding four digits and graph_thousand_component(0000-9999), because Korean units increase every four digits
58-
graph_tenthousand_component = pynini.union(((graph_thousand_component + tenthousand) | tenthousand_alt), pynutil.insert("0000"))
59+
graph_tenthousand_component = pynini.union(
60+
((graph_thousand_component + tenthousand) | tenthousand_alt), pynutil.insert("0000")
61+
)
5962
graph_tenthousand_component += graph_thousand_component
6063

6164
hundredmillion = pynutil.delete("억")
6265
hundredmillion_alt = pynini.cross("억", "1")
63-
graph_hundredmillion_component = pynini.union(((graph_thousand_component + hundredmillion) | hundredmillion_alt), pynutil.insert("0000"))
64-
graph_hundredmillion_component += graph_tenthousand_component
65-
66+
graph_hundredmillion_component = pynini.union(
67+
((graph_thousand_component + hundredmillion) | hundredmillion_alt), pynutil.insert("0000")
68+
)
69+
graph_hundredmillion_component += graph_tenthousand_component
70+
6671
trillion = pynutil.delete("조")
6772
trillion_alt = pynini.cross("조", "1")
68-
graph_trillion_component = pynini.union(((graph_thousand_component + trillion) | trillion_alt), pynutil.insert("0000"))
73+
graph_trillion_component = pynini.union(
74+
((graph_thousand_component + trillion) | trillion_alt), pynutil.insert("0000")
75+
)
6976
graph_trillion_component += graph_hundredmillion_component
7077

7178
tenquadrillion = pynutil.delete("경")
7279
tenquadrillion_alt = pynini.cross("경", "1")
73-
graph_tenquadrillion_component = pynini.union(((graph_thousand_component + tenquadrillion) | tenquadrillion_alt), pynutil.insert("0000"))
80+
graph_tenquadrillion_component = pynini.union(
81+
((graph_thousand_component + tenquadrillion) | tenquadrillion_alt), pynutil.insert("0000")
82+
)
7483
graph_tenquadrillion_component += graph_trillion_component
7584

76-
7785
graph = pynini.union(
7886
### From biggest unit to smallest, everything is included
79-
graph_tenquadrillion_component|
80-
graph_zero
87+
graph_tenquadrillion_component
88+
| graph_zero
8189
)
8290

8391
leading_zero = (
8492
pynutil.delete(pynini.closure("0")) + pynini.difference(NEMO_DIGIT, "0") + pynini.closure(NEMO_DIGIT)
8593
)
8694
graph_nonzero = graph @ leading_zero
8795
graph = pynini.union(graph_nonzero, graph_zero)
88-
96+
8997
graph = graph @ leading_zero | graph_zero
9098

9199
self.just_cardinals = graph
92100

93-
optional_sign = pynini.closure((pynini.cross("마이너스", 'negative: "-"') | pynini.cross("-", 'negative: "-"')) + delete_space,0, 1)
101+
optional_sign = pynini.closure(
102+
(pynini.cross("마이너스", 'negative: "-"') | pynini.cross("-", 'negative: "-"')) + delete_space, 0, 1
103+
)
94104

95105
final_graph = (
96106
optional_sign + pynutil.insert(" ") + pynutil.insert("integer: \"") + graph + pynutil.insert("\"")
97107
) | (pynutil.insert("integer: \"") + graph + pynutil.insert("\""))
98108

99109
final_graph = self.add_tokens(final_graph)
100-
self.fst = final_graph.optimize()
110+
self.fst = final_graph.optimize()

nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,9 @@
1919
import pynini
2020
from pynini.lib import pynutil
2121

22+
from nemo_text_processing.inverse_text_normalization.ko.graph_utils import INPUT_LOWER_CASED, GraphFst, generator_main
2223
from nemo_text_processing.inverse_text_normalization.ko.taggers.cardinal import CardinalFst
2324
from nemo_text_processing.inverse_text_normalization.ko.taggers.word import WordFst
24-
from nemo_text_processing.inverse_text_normalization.ko.graph_utils import (
25-
INPUT_LOWER_CASED,
26-
GraphFst,
27-
generator_main,
28-
)
2925

3026

3127
class ClassifyFst(GraphFst):
@@ -62,13 +58,13 @@ def __init__(
6258
cardinal = CardinalFst()
6359
cardinal_graph = cardinal.fst
6460
word_graph = WordFst().fst
65-
classify = (pynutil.add_weight(cardinal_graph, 1.1)| pynutil.add_weight(word_graph, 100))
66-
61+
classify = pynutil.add_weight(cardinal_graph, 1.1) | pynutil.add_weight(word_graph, 100)
62+
6763
token = pynutil.insert("tokens { ") + classify + pynutil.insert(" } ")
6864
tagger = pynini.closure(token, 1)
6965

7066
self.fst = tagger
7167

7268
if far_file:
7369
generator_main(far_file, {"tokenize_and_classify": self.fst})
74-
logging.info(f"ClassifyFst grammars are saved to {far_file}.")
70+
logging.info(f"ClassifyFst grammars are saved to {far_file}.")

nemo_text_processing/inverse_text_normalization/ko/taggers/word.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,5 @@ class WordFst(GraphFst):
2727

2828
def __init__(self):
2929
super().__init__(name="word", kind="classify")
30-
word = pynutil.insert(
31-
"name: \"") + pynini.closure(NEMO_NOT_SPACE, 1) + pynutil.insert("\"")
30+
word = pynutil.insert("name: \"") + pynini.closure(NEMO_NOT_SPACE, 1) + pynutil.insert("\"")
3231
self.fst = word.optimize()

nemo_text_processing/inverse_text_normalization/ko/utils.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,6 @@
1515
import os
1616

1717

18-
1918
def get_abs_path(rel_path):
2019

2120
return os.path.dirname(os.path.abspath(__file__)) + '/' + rel_path
22-
23-

nemo_text_processing/inverse_text_normalization/ko/verbalizers/cardinal.py

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,7 @@
1515
import pynini
1616
from pynini.lib import pynutil
1717

18-
from nemo_text_processing.inverse_text_normalization.ko.graph_utils import (
19-
NEMO_NOT_QUOTE,
20-
GraphFst,
21-
delete_space,
22-
)
18+
from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space
2319

2420

2521
class CardinalFst(GraphFst):
@@ -34,21 +30,17 @@ def __init__(self):
3430
pynutil.delete("negative:")
3531
+ delete_space
3632
+ pynutil.delete("\"")
37-
+ pynini.accep("-")
33+
+ pynini.accep("-")
3834
+ pynutil.delete("\"")
3935
)
4036

4137
optional_sign_output = pynini.closure(negative_sign + delete_space, 0, 1)
4238

43-
digits_from_tag = pynini.closure(NEMO_NOT_QUOTE, 1)
39+
digits_from_tag = pynini.closure(NEMO_NOT_QUOTE, 1)
4440
integer_cardinal = (
45-
pynutil.delete("integer:")
46-
+ delete_space
47-
+ pynutil.delete("\"")
48-
+ digits_from_tag
49-
+ pynutil.delete("\"")
41+
pynutil.delete("integer:") + delete_space + pynutil.delete("\"") + digits_from_tag + pynutil.delete("\"")
5042
)
5143

5244
graph = integer_cardinal
5345
final_graph = optional_sign_output + graph
54-
self.fst = self.delete_tokens(final_graph).optimize()
46+
self.fst = self.delete_tokens(final_graph).optimize()

nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515

16+
from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst
1617
from nemo_text_processing.inverse_text_normalization.ko.verbalizers.cardinal import CardinalFst
1718
from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
18-
from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst
1919

2020

2121
class VerbalizeFst(GraphFst):
@@ -30,7 +30,6 @@ def __init__(self):
3030
cardinal = CardinalFst()
3131
cardinal_graph = cardinal.fst
3232
word_graph = WordFst().fst
33-
34-
graph = (cardinal_graph|word_graph)
33+
34+
graph = cardinal_graph | word_graph
3535
self.fst = graph
36-

nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,16 @@
1818
import pynini
1919
from pynini.lib import pynutil
2020

21+
from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, delete_space, generator_main
2122
from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize import VerbalizeFst
22-
from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, generator_main, delete_space
2323

2424

2525
class VerbalizeFinalFst(GraphFst):
2626
"""
2727
Finite state transducer that verbalizes an entire sentence, e.g.
2828
tokens { name: "its" } tokens { time { hours: "12" minutes: "30" } } tokens { name: "now" } -> its 12:30 now
2929
"""
30+
3031
def __init__(self, deterministic: bool = True, cache_dir: str = None, overwrite_cache: bool = False):
3132
super().__init__(name="verbalize_final", kind="verbalize", deterministic=deterministic)
3233
far_file = None

nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_NOT_QUOTE, GraphFst
1919

2020

21-
2221
class WordFst(GraphFst):
2322
'''
2423
tokens { name: "一" } -> 一

nemo_text_processing/inverse_text_normalization/run_evaluate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def parse_args():
3535
parser.add_argument(
3636
"--lang",
3737
help="language",
38-
choices=["ar", "de", "en", "es", "es_en", "fr", "hi", "hy", "mr", "pt", "ru", "sv", "vi", "zh", "ja","ko"],
38+
choices=["ar", "de", "en", "es", "es_en", "fr", "hi", "hy", "mr", "pt", "ru", "sv", "vi", "zh", "ja", "ko"],
3939
default="en",
4040
type=str,
4141
)

0 commit comments

Comments
 (0)