Skip to content

Commit 41ac59d

Browse files
committed
This reverts commit f893d89, reversing
changes made to 9f7e876. Signed-off-by: hmlee245 <hmlee245@gmail.com>
1 parent 4df2965 commit 41ac59d

13 files changed

Lines changed: 50 additions & 72 deletions

File tree

nemo_text_processing/inverse_text_normalization/inverse_normalize.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ def __init__(
135135
from nemo_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import ClassifyFst
136136
from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize_final import (
137137
VerbalizeFinalFst,
138-
)
138+
)
139139

140140
self.tagger = ClassifyFst(
141141
cache_dir=cache_dir, whitelist=whitelist, overwrite_cache=overwrite_cache, input_case=input_case
@@ -180,7 +180,7 @@ def parse_args():
180180
parser.add_argument(
181181
"--language",
182182
help="language",
183-
choices=['en', 'de', 'es', 'pt', 'ru', 'fr', 'sv', 'vi', 'ar', 'es_en', 'zh', 'hi', 'hy', 'mr', 'ja', 'ko'],
183+
choices=['en', 'de', 'es', 'pt', 'ru', 'fr', 'sv', 'vi', 'ar', 'es_en', 'zh', 'hi', 'hy', 'mr', 'ja','ko'],
184184
default="en",
185185
type=str,
186186
)

nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py

Lines changed: 13 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_DIGIT, GraphFst, delete_space
2020
from nemo_text_processing.inverse_text_normalization.ko.utils import get_abs_path
2121

22-
2322
class CardinalFst(GraphFst):
2423
"""
2524
Finite state transducer for classifying cardinals
@@ -35,19 +34,13 @@ def __init__(self):
3534
graph_zero = pynini.cross("영", "0")
3635
graph_digit = pynini.string_file(get_abs_path("data/numbers/digit.tsv"))
3736

38-
<<<<<<< HEAD
39-
=======
40-
graph_negative = pynini.cross("마이너스", "-")
41-
graph_negative += delete_space
42-
43-
>>>>>>> 77da79d12b1378502cc2b382cd6933b02e7c2545
4437
ten = pynutil.delete("십")
4538
ten_alt = pynini.cross("십", "1")
4639
### Responsible for second digit of two digit number. ex) 20's 2
4740
graph_ten_component = pynini.union((graph_digit + ten) | ten_alt, pynutil.insert("0"))
4841
### Responsible for the first digit of number. ex) 1,2,3,4,5,,,
4942
graph_ten_component += graph_digit | pynutil.insert("0")
50-
43+
5144
hundred = pynutil.delete("백")
5245
hundred_alt = pynini.cross("백", "1")
5346
graph_hundred_component = pynini.union(((graph_digit + hundred) | hundred_alt), pynutil.insert("0"))
@@ -62,55 +55,46 @@ def __init__(self):
6255
tenthousand_alt = pynini.cross("만", "1")
6356
### "만" can express next four digits of numbers until the next unit "억", so insert "0000" to allocate four digit worth of space
6457
### From "만", keep adding four digits and graph_thousand_component(0000-9999), because Korean units increase every four digits
65-
graph_tenthousand_component = pynini.union(
66-
((graph_thousand_component + tenthousand) | tenthousand_alt), pynutil.insert("0000")
67-
)
58+
graph_tenthousand_component = pynini.union(((graph_thousand_component + tenthousand) | tenthousand_alt), pynutil.insert("0000"))
6859
graph_tenthousand_component += graph_thousand_component
6960

7061
hundredmillion = pynutil.delete("억")
7162
hundredmillion_alt = pynini.cross("억", "1")
72-
graph_hundredmillion_component = pynini.union(
73-
((graph_thousand_component + hundredmillion) | hundredmillion_alt), pynutil.insert("0000")
74-
)
75-
graph_hundredmillion_component += graph_tenthousand_component
76-
63+
graph_hundredmillion_component = pynini.union(((graph_thousand_component + hundredmillion) | hundredmillion_alt), pynutil.insert("0000"))
64+
graph_hundredmillion_component += graph_tenthousand_component
65+
7766
trillion = pynutil.delete("조")
7867
trillion_alt = pynini.cross("조", "1")
79-
graph_trillion_component = pynini.union(
80-
((graph_thousand_component + trillion) | trillion_alt), pynutil.insert("0000")
81-
)
68+
graph_trillion_component = pynini.union(((graph_thousand_component + trillion) | trillion_alt), pynutil.insert("0000"))
8269
graph_trillion_component += graph_hundredmillion_component
8370

8471
tenquadrillion = pynutil.delete("경")
8572
tenquadrillion_alt = pynini.cross("경", "1")
86-
graph_tenquadrillion_component = pynini.union(
87-
((graph_thousand_component + tenquadrillion) | tenquadrillion_alt), pynutil.insert("0000")
88-
)
73+
graph_tenquadrillion_component = pynini.union(((graph_thousand_component + tenquadrillion) | tenquadrillion_alt), pynutil.insert("0000"))
8974
graph_tenquadrillion_component += graph_trillion_component
9075

76+
9177
graph = pynini.union(
9278
### From biggest unit to smallest, everything is included
93-
graph_tenquadrillion_component
94-
| graph_zero
79+
graph_tenquadrillion_component|
80+
graph_zero
9581
)
9682

9783
leading_zero = (
9884
pynutil.delete(pynini.closure("0")) + pynini.difference(NEMO_DIGIT, "0") + pynini.closure(NEMO_DIGIT)
9985
)
10086
graph_nonzero = graph @ leading_zero
10187
graph = pynini.union(graph_nonzero, graph_zero)
102-
88+
10389
graph = graph @ leading_zero | graph_zero
10490

10591
self.just_cardinals = graph
10692

107-
optional_sign = pynini.closure(
108-
(pynini.cross("마이너스", 'negative: "-"') | pynini.cross("-", 'negative: "-"')) + delete_space, 0, 1
109-
)
93+
optional_sign = pynini.closure((pynini.cross("마이너스", 'negative: "-"') | pynini.cross("-", 'negative: "-"')) + delete_space,0, 1)
11094

11195
final_graph = (
11296
optional_sign + pynutil.insert(" ") + pynutil.insert("integer: \"") + graph + pynutil.insert("\"")
11397
) | (pynutil.insert("integer: \"") + graph + pynutil.insert("\""))
11498

11599
final_graph = self.add_tokens(final_graph)
116-
self.fst = final_graph.optimize()
100+
self.fst = final_graph.optimize()

nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,13 @@
1919
import pynini
2020
from pynini.lib import pynutil
2121

22-
from nemo_text_processing.inverse_text_normalization.ko.graph_utils import INPUT_LOWER_CASED, GraphFst, generator_main
2322
from nemo_text_processing.inverse_text_normalization.ko.taggers.cardinal import CardinalFst
2423
from nemo_text_processing.inverse_text_normalization.ko.taggers.word import WordFst
24+
from nemo_text_processing.inverse_text_normalization.ko.graph_utils import (
25+
INPUT_LOWER_CASED,
26+
GraphFst,
27+
generator_main,
28+
)
2529

2630

2731
class ClassifyFst(GraphFst):
@@ -58,13 +62,13 @@ def __init__(
5862
cardinal = CardinalFst()
5963
cardinal_graph = cardinal.fst
6064
word_graph = WordFst().fst
61-
classify = pynutil.add_weight(cardinal_graph, 1.1) | pynutil.add_weight(word_graph, 100)
62-
65+
classify = (pynutil.add_weight(cardinal_graph, 1.1)| pynutil.add_weight(word_graph, 100))
66+
6367
token = pynutil.insert("tokens { ") + classify + pynutil.insert(" } ")
6468
tagger = pynini.closure(token, 1)
6569

6670
self.fst = tagger
6771

6872
if far_file:
6973
generator_main(far_file, {"tokenize_and_classify": self.fst})
70-
logging.info(f"ClassifyFst grammars are saved to {far_file}.")
74+
logging.info(f"ClassifyFst grammars are saved to {far_file}.")

nemo_text_processing/inverse_text_normalization/ko/taggers/word.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,5 +27,6 @@ class WordFst(GraphFst):
2727

2828
def __init__(self):
2929
super().__init__(name="word", kind="classify")
30-
word = pynutil.insert("name: \"") + pynini.closure(NEMO_NOT_SPACE, 1) + pynutil.insert("\"")
30+
word = pynutil.insert(
31+
"name: \"") + pynini.closure(NEMO_NOT_SPACE, 1) + pynutil.insert("\"")
3132
self.fst = word.optimize()

nemo_text_processing/inverse_text_normalization/ko/utils.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@
1515
import os
1616

1717

18+
1819
def get_abs_path(rel_path):
1920

2021
return os.path.dirname(os.path.abspath(__file__)) + '/' + rel_path
22+
23+

nemo_text_processing/inverse_text_normalization/ko/verbalizers/__init__.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,3 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14-
<<<<<<< HEAD
15-
=======
16-
17-
from nemo_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import ClassifyFst
18-
from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize import VerbalizeFst
19-
from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize_final import VerbalizeFinalFst
20-
>>>>>>> 77da79d12b1378502cc2b382cd6933b02e7c2545

nemo_text_processing/inverse_text_normalization/ko/verbalizers/cardinal.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,11 @@
1515
import pynini
1616
from pynini.lib import pynutil
1717

18-
from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space
18+
from nemo_text_processing.inverse_text_normalization.ko.graph_utils import (
19+
NEMO_NOT_QUOTE,
20+
GraphFst,
21+
delete_space,
22+
)
1923

2024

2125
class CardinalFst(GraphFst):
@@ -30,17 +34,21 @@ def __init__(self):
3034
pynutil.delete("negative:")
3135
+ delete_space
3236
+ pynutil.delete("\"")
33-
+ pynini.accep("-")
37+
+ pynini.accep("-")
3438
+ pynutil.delete("\"")
3539
)
3640

3741
optional_sign_output = pynini.closure(negative_sign + delete_space, 0, 1)
3842

39-
digits_from_tag = pynini.closure(NEMO_NOT_QUOTE, 1)
43+
digits_from_tag = pynini.closure(NEMO_NOT_QUOTE, 1)
4044
integer_cardinal = (
41-
pynutil.delete("integer:") + delete_space + pynutil.delete("\"") + digits_from_tag + pynutil.delete("\"")
45+
pynutil.delete("integer:")
46+
+ delete_space
47+
+ pynutil.delete("\"")
48+
+ digits_from_tag
49+
+ pynutil.delete("\"")
4250
)
4351

4452
graph = integer_cardinal
4553
final_graph = optional_sign_output + graph
46-
self.fst = self.delete_tokens(final_graph).optimize()
54+
self.fst = self.delete_tokens(final_graph).optimize()

nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515

16-
from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst
1716
from nemo_text_processing.inverse_text_normalization.ko.verbalizers.cardinal import CardinalFst
1817
from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
18+
from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst
1919

2020

2121
class VerbalizeFst(GraphFst):
@@ -30,6 +30,7 @@ def __init__(self):
3030
cardinal = CardinalFst()
3131
cardinal_graph = cardinal.fst
3232
word_graph = WordFst().fst
33-
34-
graph = cardinal_graph | word_graph
33+
34+
graph = (cardinal_graph|word_graph)
3535
self.fst = graph
36+

nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,24 +18,15 @@
1818
import pynini
1919
from pynini.lib import pynutil
2020

21-
from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, delete_space, generator_main
2221
from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize import VerbalizeFst
23-
24-
<<<<<<< HEAD
25-
from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, delete_space, generator_main
26-
27-
=======
28-
from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
29-
30-
>>>>>>> 77da79d12b1378502cc2b382cd6933b02e7c2545
22+
from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, generator_main, delete_space
3123

3224

3325
class VerbalizeFinalFst(GraphFst):
3426
"""
3527
Finite state transducer that verbalizes an entire sentence, e.g.
3628
tokens { name: "its" } tokens { time { hours: "12" minutes: "30" } } tokens { name: "now" } -> its 12:30 now
3729
"""
38-
3930
def __init__(self, deterministic: bool = True, cache_dir: str = None, overwrite_cache: bool = False):
4031
super().__init__(name="verbalize_final", kind="verbalize", deterministic=deterministic)
4132
far_file = None

nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_NOT_QUOTE, GraphFst
1919

2020

21+
2122
class WordFst(GraphFst):
2223
'''
2324
tokens { name: "一" } -> 一

0 commit comments

Comments
 (0)