Skip to content

Commit b4851b8

Browse files
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
1 parent fd3e7bf commit b4851b8

5 files changed

Lines changed: 62 additions & 48 deletions

File tree

nemo_text_processing/text_normalization/fr/taggers/date.py

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,23 @@
11
import pynini
22
from pynini.lib import pynutil
3-
from nemo_text_processing.text_normalization.fr.utils import get_abs_path
4-
5-
from nemo_text_processing.text_normalization.en.graph_utils import GraphFst, NEMO_DIGIT
63

4+
from nemo_text_processing.text_normalization.en.graph_utils import NEMO_DIGIT, GraphFst
5+
from nemo_text_processing.text_normalization.fr.utils import get_abs_path
76

8-
# TODO: add articles? 'le...'
7+
# TODO: add articles? 'le...'
98

109
month_numbers = pynini.string_file(get_abs_path("data/dates/months.tsv"))
1110
eras = pynini.string_file(get_abs_path("data/dates/eras.tsv"))
12-
delete_leading_zero = (pynutil.delete("0") | (NEMO_DIGIT - "0")) + NEMO_DIGIT #reminder, NEMO_DIGIT = filter on digits
11+
delete_leading_zero = (
12+
pynutil.delete("0") | (NEMO_DIGIT - "0")
13+
) + NEMO_DIGIT # reminder, NEMO_DIGIT = filter on digits
14+
1315

1416
class DateFst(GraphFst):
1517
''' Finite state transducer for classyfing dates, e.g.:
1618
'02.03.2003' -> date {day: 'deux' month: 'mai' year: 'deux mille trois' preserve order: true}
1719
'''
20+
1821
def __init__(self, cardinal: GraphFst, deterministic: bool = True):
1922
super().__init__(name="dates", kind="classify")
2023

@@ -23,10 +26,10 @@ def __init__(self, cardinal: GraphFst, deterministic: bool = True):
2326
# 'le' -> 'le', 'les' -> 'les'
2427
le_determiner = pynini.accep("le ") | pynini.accep("les ")
2528
self.optional_le = pynini.closure(le_determiner, 0, 1)
26-
27-
# '01' -> 'un'
29+
30+
# '01' -> 'un'
2831
optional_leading_zero = delete_leading_zero | NEMO_DIGIT
29-
valid_day_number = pynini.union(*[str(x) for x in range(1,32)])
32+
valid_day_number = pynini.union(*[str(x) for x in range(1, 32)])
3033
premier = pynini.string_map([("1", "premier")])
3134
day_number_to_word = premier | cardinal_graph
3235

@@ -59,7 +62,7 @@ def __init__(self, cardinal: GraphFst, deterministic: bool = True):
5962
)
6063

6164
# Accepts "janvier", "février", etc
62-
month_name_graph = pynutil.insert("month: \"") + month_numbers.project("output") + pynutil.insert("\"")
65+
month_name_graph = pynutil.insert("month: \"") + month_numbers.project("output") + pynutil.insert("\"")
6366

6467
self.fst |= (
6568
pynutil.insert("date { ")
@@ -73,9 +76,8 @@ def __init__(self, cardinal: GraphFst, deterministic: bool = True):
7376
# Accepts "70s", "80s", etc
7477
self.fst |= pynutil.insert("date { decade: \"") + eras + pynutil.insert("\" preserve_order: true }")
7578

76-
7779
# Accepts date ranges, "17-18-19 juin" -> date { day: "17" day: "18": day: "19"}
78-
for separator in ["-", "/"]:
80+
for separator in ["-", "/"]:
7981
self.fst |= (
8082
pynutil.insert("date { ")
8183
+ pynini.closure(self.day_graph + pynutil.delete(separator) + pynutil.insert(" "), 1)
@@ -89,15 +91,16 @@ def __init__(self, cardinal: GraphFst, deterministic: bool = True):
8991
self.fst = self.fst.optimize()
9092

9193

92-
9394
def apply_fst(text, fst):
9495
try:
9596
print(text, "-->", pynini.shortestpath(text @ fst).string())
9697
except pynini.FstOpError:
9798
print(f"Error: No valid output with given input: '{text}'")
9899

100+
99101
if __name__ == "__main__":
100102
from nemo_text_processing.text_normalization.fr.taggers.cardinal import CardinalFst
103+
101104
fst = DateFst(CardinalFst())
102105

103106
print('DETERMINER')
@@ -125,7 +128,7 @@ def apply_fst(text, fst):
125128
apply_fst("02/03/2003", fst.fst)
126129
apply_fst("02-03-2003", fst.fst)
127130
apply_fst("le 02.03.2003", fst.fst)
128-
131+
129132
apply_fst("02.03", fst.fst)
130133
apply_fst("17 janvier", fst.fst)
131134
apply_fst("10 mars 2023", fst.fst)
@@ -135,4 +138,6 @@ def apply_fst(text, fst):
135138
apply_fst("80s", fst.fst)
136139

137140
print("\nDATE RANGES")
138-
apply_fst("les 17/18/19 juin", fst.fst) # returns: date { day: "les dix-sept" day: "dix-huit" day: "dix-neuf" month: "juin" preserve_order: true }
141+
apply_fst(
142+
"les 17/18/19 juin", fst.fst
143+
) # returns: date { day: "les dix-sept" day: "dix-huit" day: "dix-neuf" month: "juin" preserve_order: true }

nemo_text_processing/text_normalization/fr/taggers/tokenize_and_classify.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,12 @@
2626
)
2727
from nemo_text_processing.text_normalization.en.taggers.punctuation import PunctuationFst
2828
from nemo_text_processing.text_normalization.fr.taggers.cardinal import CardinalFst
29+
from nemo_text_processing.text_normalization.fr.taggers.date import DateFst
2930
from nemo_text_processing.text_normalization.fr.taggers.decimals import DecimalFst
3031
from nemo_text_processing.text_normalization.fr.taggers.fraction import FractionFst
3132
from nemo_text_processing.text_normalization.fr.taggers.ordinal import OrdinalFst
3233
from nemo_text_processing.text_normalization.fr.taggers.whitelist import WhiteListFst
3334
from nemo_text_processing.text_normalization.fr.taggers.word import WordFst
34-
from nemo_text_processing.text_normalization.fr.taggers.date import DateFst
3535
from nemo_text_processing.utils.logging import logger
3636

3737

@@ -86,7 +86,7 @@ def __init__(
8686
self.whitelist = WhiteListFst(input_case=input_case, deterministic=deterministic, input_file=whitelist)
8787
whitelist_graph = self.whitelist.fst
8888
punct_graph = PunctuationFst(deterministic=deterministic).fst
89-
89+
9090
self.date = DateFst(self.cardinal, deterministic=deterministic)
9191
date_graph = self.date.fst
9292

nemo_text_processing/text_normalization/fr/verbalizers/date.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,10 @@
1919
NEMO_NOT_QUOTE,
2020
NEMO_SPACE,
2121
GraphFst,
22-
delete_preserve_order
22+
delete_preserve_order,
2323
)
2424

25+
2526
class DateFst(GraphFst):
2627
"""
2728
Finite state transducer for verbalizing date, e.g.
@@ -41,7 +42,12 @@ def __init__(self, deterministic: bool = True):
4142
year = pynutil.delete("year: \"") + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete("\"")
4243
decade = pynutil.delete("decade: \"") + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete("\"")
4344

44-
graph_dmy = pynini.closure(day + NEMO_SPACE, 1) + month + pynini.closure(NEMO_SPACE + year, 0, 1) + delete_preserve_order
45+
graph_dmy = (
46+
pynini.closure(day + NEMO_SPACE, 1)
47+
+ month
48+
+ pynini.closure(NEMO_SPACE + year, 0, 1)
49+
+ delete_preserve_order
50+
)
4551
graph_my = month + NEMO_SPACE + year + delete_preserve_order
4652
graph_decade = decade + delete_preserve_order
4753

@@ -57,8 +63,11 @@ def apply_fst(text, fst):
5763
except pynini.FstOpError:
5864
print(f"Error: No valid output with given input: '{text}'")
5965

66+
6067
if __name__ == "__main__":
6168
fst = DateFst()
6269

6370
# tagger output for "les 17/18/19 juin"
64-
apply_fst('date { day: "les dix-sept" day: "dix-huit" day: "dix-neuf" month: "juin" preserve_order: true }', fst.fst)
71+
apply_fst(
72+
'date { day: "les dix-sept" day: "dix-huit" day: "dix-neuf" month: "juin" preserve_order: true }', fst.fst
73+
)

nemo_text_processing/text_normalization/fr/verbalizers/verbalize.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,10 @@
1414
from nemo_text_processing.text_normalization.en.graph_utils import GraphFst
1515
from nemo_text_processing.text_normalization.en.verbalizers.whitelist import WhiteListFst
1616
from nemo_text_processing.text_normalization.fr.verbalizers.cardinal import CardinalFst
17+
from nemo_text_processing.text_normalization.fr.verbalizers.date import DateFst
1718
from nemo_text_processing.text_normalization.fr.verbalizers.decimals import DecimalFst
1819
from nemo_text_processing.text_normalization.fr.verbalizers.fraction import FractionFst
1920
from nemo_text_processing.text_normalization.fr.verbalizers.ordinal import OrdinalFst
20-
from nemo_text_processing.text_normalization.fr.verbalizers.date import DateFst
2121

2222

2323
class VerbalizeFst(GraphFst):

nemo_text_processing/text_normalization/fr_tutorial/taggers/my_test_script.py

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -3,47 +3,47 @@
33

44
from nemo_text_processing.text_normalization.fr.utils import get_abs_path
55

6+
67
def apply_fst(text, fst):
7-
""" Given a string input, returns the output string
8+
""" Given a string input, returns the output string
89
produced by traversing the path with lowest weight.
910
If no valid path accepts input string, returns an
1011
error.
1112
"""
12-
try:
13-
print(pynini.shortestpath(text @ fst).string())
14-
except pynini.FstOpError:
15-
print(f"Error: No valid output with given input: '{text}'")
16-
17-
zero = pynini.string_map([("zéro","0")]) # French only pronounces zeroes as stand alone
18-
digits_map = pynini.string_map([ # pynini function that creates explicit input-output mappings for a WFST
19-
("un","1"),
20-
("une","1"),
21-
("deux","2"),
22-
("trois","3"),
23-
("quatre","4"),
24-
("cinq","5"),
25-
("six","6"),
26-
("sept","7"),
27-
("huit","8"),
28-
("neuf","9")
29-
])
13+
try:
14+
print(pynini.shortestpath(text @ fst).string())
15+
except pynini.FstOpError:
16+
print(f"Error: No valid output with given input: '{text}'")
17+
18+
19+
zero = pynini.string_map([("zéro", "0")]) # French only pronounces zeroes as stand alone
20+
digits_map = pynini.string_map(
21+
[ # pynini function that creates explicit input-output mappings for a WFST
22+
("un", "1"),
23+
("une", "1"),
24+
("deux", "2"),
25+
("trois", "3"),
26+
("quatre", "4"),
27+
("cinq", "5"),
28+
("six", "6"),
29+
("sept", "7"),
30+
("huit", "8"),
31+
("neuf", "9"),
32+
]
33+
)
3034

3135
digits = pynini.string_file("data/numbers/digits.tsv")
3236

33-
teens = pynini.string_map([
34-
("onze", "11"),
35-
("douze", "12"),
36-
("treize", "13"),
37-
("quatorze", "14"),
38-
("quinze", "16"),
39-
])
37+
teens = pynini.string_map([("onze", "11"), ("douze", "12"), ("treize", "13"), ("quatorze", "14"), ("quinze", "16"),])
4038

4139
tens = pynini.string_map([("dix", "1")])
42-
delete_hyphen = pynini.closure(pynutil.delete("-"), 0, 1) # Applies a closure from 0-1 of operation. Equivalent to regex /?/
40+
delete_hyphen = pynini.closure(
41+
pynutil.delete("-"), 0, 1
42+
) # Applies a closure from 0-1 of operation. Equivalent to regex /?/
4343

4444
graph_tens = tens + delete_hyphen + digits
4545
graph_tens_and_teens = graph_tens | teens
4646

4747
graph_digits = digits | pynutil.insert("0")
4848

49-
apply_fst("un", graph_tens_and_teens)
49+
apply_fst("un", graph_tens_and_teens)

0 commit comments

Comments
 (0)