Skip to content

Commit b272550

Browse files
pre-commit-ci[bot]folivoramanh
authored andcommitted
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci Signed-off-by: Mai Anh <palasek182@gmail.com>
1 parent a59bd41 commit b272550

File tree

6 files changed

+21
-92
lines changed

6 files changed

+21
-92
lines changed

nemo_text_processing/text_normalization/pt/graph_utils.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,7 @@
4949
insert_space = pynutil.insert(" ")
5050
delete_extra_space = pynini.cross(pynini.closure(NEMO_WHITE_SPACE, 1), " ").optimize()
5151

52-
delete_preserve_order = pynini.closure(
53-
pynutil.delete(" preserve_order: true")
54-
)
52+
delete_preserve_order = pynini.closure(pynutil.delete(" preserve_order: true"))
5553

5654

5755
def generator_main(file_name: str, graphs: Dict[str, "pynini.FstLike"]) -> None:

nemo_text_processing/text_normalization/pt/taggers/date.py

Lines changed: 7 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,8 @@ def __init__(self, cardinal: GraphFst, deterministic: bool = True):
4242
month_pairs = [(r[0], r[1]) for r in month_rows if len(r) >= 2]
4343
month_to_word = pynini.string_map(month_pairs).optimize()
4444

45-
day_10_31 = (
46-
(NEMO_DIGIT - "0") + NEMO_DIGIT
47-
) @ pynini.union(*[str(x) for x in range(10, 32)]) @ numbers
48-
day_02_09 = pynutil.delete("0") + (
49-
pynini.union(*[str(x) for x in range(2, 10)]) @ numbers
50-
)
45+
day_10_31 = ((NEMO_DIGIT - "0") + NEMO_DIGIT) @ pynini.union(*[str(x) for x in range(10, 32)]) @ numbers
46+
day_02_09 = pynutil.delete("0") + (pynini.union(*[str(x) for x in range(2, 10)]) @ numbers)
5147
day_2_9 = pynini.union(*[str(x) for x in range(2, 10)]) @ numbers
5248
day_inner = pynini.union(
5349
pynini.cross("01", "primeiro"),
@@ -92,9 +88,7 @@ def __init__(self, cardinal: GraphFst, deterministic: bool = True):
9288
day_spokens = set()
9389
for n in range(1, 32):
9490
for key in (str(n), f"{n:02d}"):
95-
dstr = pynini.shortestpath(
96-
pynini.compose(pynini.accep(key), day_inner.optimize())
97-
).string()
91+
dstr = pynini.shortestpath(pynini.compose(pynini.accep(key), day_inner.optimize())).string()
9892
day_spokens.add(dstr)
9993

10094
_preserve_tail = " preserve_order: true"
@@ -109,9 +103,7 @@ def __init__(self, cardinal: GraphFst, deterministic: bool = True):
109103
pynutil.insert('day: "' + day + '" month: "' + month + '" ')
110104
+ pynini.accep("year:")
111105
+ NEMO_SIGMA
112-
+ pynutil.delete(
113-
' month: "' + month + '" day: "' + day + '"' + _preserve_tail
114-
)
106+
+ pynutil.delete(' month: "' + month + '" day: "' + day + '"' + _preserve_tail)
115107
)
116108
ymd_to_dmy_graph = ymd_curr if ymd_to_dmy_graph is None else pynini.union(ymd_to_dmy_graph, ymd_curr)
117109

@@ -131,36 +123,9 @@ def __init__(self, cardinal: GraphFst, deterministic: bool = True):
131123
sep_accep = pynini.accep(pynini.escape(sep))
132124
del_sep = pynutil.delete(sep_accep)
133125

134-
dmy_core = (
135-
day_part
136-
+ del_sep
137-
+ insert_space
138-
+ month_part
139-
+ del_sep
140-
+ insert_space
141-
+ year_part
142-
+ preserve
143-
)
144-
iso_core = (
145-
year_part
146-
+ del_sep
147-
+ insert_space
148-
+ month_part
149-
+ del_sep
150-
+ insert_space
151-
+ day_part
152-
+ preserve
153-
)
154-
mdy_core = (
155-
month_part
156-
+ del_sep
157-
+ insert_space
158-
+ day_part
159-
+ del_sep
160-
+ insert_space
161-
+ year_part
162-
+ preserve
163-
)
126+
dmy_core = day_part + del_sep + insert_space + month_part + del_sep + insert_space + year_part + preserve
127+
iso_core = year_part + del_sep + insert_space + month_part + del_sep + insert_space + day_part + preserve
128+
mdy_core = month_part + del_sep + insert_space + day_part + del_sep + insert_space + year_part + preserve
164129

165130
lhs_dmy = one_or_two_digits + sep_accep + one_or_two_digits + sep_accep + year_four
166131
lhs_iso = year_four + sep_accep + one_or_two_digits + sep_accep + one_or_two_digits

nemo_text_processing/text_normalization/pt/taggers/time.py

Lines changed: 8 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,7 @@
1515
import pynini
1616
from pynini.lib import pynutil
1717

18-
from nemo_text_processing.text_normalization.pt.graph_utils import (
19-
NEMO_DIGIT,
20-
GraphFst,
21-
delete_space,
22-
insert_space,
23-
)
18+
from nemo_text_processing.text_normalization.pt.graph_utils import NEMO_DIGIT, GraphFst, delete_space, insert_space
2419
from nemo_text_processing.text_normalization.pt.utils import get_abs_path, load_labels
2520

2621

@@ -50,35 +45,23 @@ def __init__(self, cardinal: GraphFst, deterministic: bool = True):
5045
pynini.closure(pynutil.delete("0"), 0, 1) + NEMO_DIGIT
5146
)
5247

53-
graph_hour = (
54-
delete_leading_zero_to_double_digit
55-
@ pynini.union(*labels_hour)
56-
@ cardinal_graph
57-
)
48+
graph_hour = delete_leading_zero_to_double_digit @ pynini.union(*labels_hour) @ cardinal_graph
5849

5950
graph_minute_single = pynini.union(*labels_minute_single) @ cardinal_graph
6051
graph_minute_double = pynini.union(*labels_minute_double) @ cardinal_graph
6152
final_graph_minute = (
6253
pynutil.insert('minutes: "')
63-
+ (
64-
pynutil.delete("0") + graph_minute_single
65-
| graph_minute_double
66-
)
54+
+ (pynutil.delete("0") + graph_minute_single | graph_minute_double)
6755
+ pynutil.insert('"')
6856
)
6957

7058
final_graph_second = (
7159
pynutil.insert('seconds: "')
72-
+ (
73-
pynutil.delete("0") + graph_minute_single
74-
| graph_minute_double
75-
)
60+
+ (pynutil.delete("0") + graph_minute_single | graph_minute_double)
7661
+ pynutil.insert('"')
7762
)
7863

79-
final_graph_hour = (
80-
pynutil.insert('hours: "') + graph_hour + pynutil.insert('"')
81-
)
64+
final_graph_hour = pynutil.insert('hours: "') + graph_hour + pynutil.insert('"')
8265

8366
delete_h = pynini.union(
8467
pynutil.delete(pynini.accep(pynini.escape("h"))),
@@ -96,15 +79,8 @@ def __init__(self, cardinal: GraphFst, deterministic: bool = True):
9679
if len(row) < 2 or not row[0].strip():
9780
continue
9881
tail, tag_val = row[0].strip(), row[1].strip()
99-
period_branches.append(
100-
pynutil.delete(tail) + pynutil.insert(f'suffix: "{tag_val}"')
101-
)
102-
suffix_tail = (
103-
delete_space
104-
+ pynutil.delete("da")
105-
+ delete_space
106-
+ pynini.union(*period_branches)
107-
)
82+
period_branches.append(pynutil.delete(tail) + pynutil.insert(f'suffix: "{tag_val}"'))
83+
suffix_tail = delete_space + pynutil.delete("da") + delete_space + pynini.union(*period_branches)
10884
optional_suffix = pynini.closure(insert_space + suffix_tail, 0, 1)
10985

11086
graph_hm = (
@@ -123,12 +99,7 @@ def __init__(self, cardinal: GraphFst, deterministic: bool = True):
12399
+ pynutil.insert(" preserve_order: true")
124100
)
125101

126-
graph_h_only = (
127-
final_graph_hour
128-
+ delete_h
129-
+ optional_suffix
130-
+ pynutil.insert(" preserve_order: true")
131-
)
102+
graph_h_only = final_graph_hour + delete_h + optional_suffix + pynutil.insert(" preserve_order: true")
132103

133104
graph_hms = (
134105
final_graph_hour

nemo_text_processing/text_normalization/pt/verbalizers/verbalize.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,6 @@ def __init__(self, deterministic: bool = True):
4242
decimal = DecimalFst(deterministic=deterministic)
4343
date = DateFst(deterministic=deterministic)
4444
time = TimeFst(deterministic=deterministic)
45-
graph = (
46-
fraction.fst
47-
| decimal.fst
48-
| date.fst
49-
| time.fst
50-
| ordinal.fst
51-
| cardinal.fst
52-
)
45+
graph = fraction.fst | decimal.fst | date.fst | time.fst | ordinal.fst | cardinal.fst
5346

5447
self.fst = graph

tests/nemo_text_processing/pt/test_date.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,10 @@ def test_denorm(self, test_input, expected):
3232
assert pred == expected
3333

3434
normalizer = Normalizer(lang='pt', cache_dir=CACHE_DIR, overwrite_cache=False, input_case='cased')
35+
3536
@parameterized.expand(parse_test_case_file('pt/data_text_normalization/test_cases_date.txt'))
3637
@pytest.mark.run_only_on('CPU')
3738
@pytest.mark.unit
3839
def test_norm(self, test_input, expected):
3940
pred = self.normalizer.normalize(test_input, verbose=False)
40-
assert pred == expected
41+
assert pred == expected

tests/nemo_text_processing/pt/test_time.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,10 @@ def test_denorm(self, test_input, expected):
3131
assert pred == expected
3232

3333
normalizer = Normalizer(lang='pt', cache_dir=CACHE_DIR, overwrite_cache=False, input_case='cased')
34+
3435
@parameterized.expand(parse_test_case_file('pt/data_text_normalization/test_cases_time.txt'))
3536
@pytest.mark.run_only_on('CPU')
3637
@pytest.mark.unit
3738
def test_norm(self, test_input, expected):
3839
pred = self.normalizer.normalize(test_input, verbose=False)
39-
assert pred == expected
40+
assert pred == expected

0 commit comments

Comments
 (0)