Skip to content

Commit 41465e0

Browse files
read double digits for telephone grammar (#32)
* read double digits for telephone grammar Signed-off-by: Larisa Kempbell <lkempbell@nvidia.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * import zero graph instead of hard coding Signed-off-by: Larisa Kempbell <lkempbell@nvidia.com> --------- Signed-off-by: Larisa Kempbell <lkempbell@nvidia.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 1b81616 commit 41465e0

2 files changed

Lines changed: 23 additions & 3 deletions

File tree

nemo_text_processing/inverse_text_normalization/fr/taggers/telephone.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,17 +52,36 @@ def __init__(self):
5252
(graph_ties + delete_hyphen + graph_digit),
5353
)
5454

55+
# accept `double zéro` -> `00`
56+
single_digits = graph_digit | graph_zero
57+
digit_words = pynini.union(graph_digit.optimize(), graph_zero).invert()
58+
59+
doubled_digit = pynini.union(
60+
*[
61+
pynini.cross(
62+
pynini.project(str(i) @ digit_words, "output")
63+
+ pynini.accep(" ")
64+
+ pynini.project(str(i) @ digit_words, "output"),
65+
pynutil.insert("double ") + pynini.project(str(i) @ digit_words, "output"),
66+
)
67+
for i in range(10)
68+
]
69+
)
70+
doubled_digit.invert()
71+
digit_twice = single_digits + pynutil.delete(" ") + single_digits
72+
doubled_digit @= digit_twice
73+
5574
graph_first_pair = graph_zero + delete_space + graph_digit
5675
graph_first_pair |= pynutil.insert("0") + graph_digit # if zero is omitted
76+
graph_first_pair |= doubled_digit
5777
graph_first_pair += (
5878
delete_space + insert_space
5979
) # delete_space since closure allows possible gaps to be removed
6080

6181
# All digits
62-
single_digits = graph_digit | graph_zero
63-
6482
graph_pair_all_digits = single_digits + delete_space
6583
graph_pair_all_digits += single_digits
84+
graph_pair_all_digits |= doubled_digit
6685

6786
graph_all_digits = pynini.closure(graph_pair_all_digits + delete_space + insert_space, 3, 3)
6887
graph_all_digits = graph_first_pair + graph_all_digits + graph_pair_all_digits
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
zéro deux douze trente-deux trente trente~02 12 32 30 30
22
zéro deux une deux trois deux trois zéro trois zéro~02 12 32 30 30
33
deux douze trente-deux trente trente~02 12 32 30 30
4-
deux une deux trois deux trois zéro trois zéro~02 12 32 30 30
4+
deux une deux trois deux trois zéro trois zéro~02 12 32 30 30
5+
double neuf douze trente-deux trente trente~99 12 32 30 30

0 commit comments

Comments
 (0)