Skip to content

Commit cd610a1

Browse files
zoobereqanand-nv
authored andcommitted
Fix space issue with ZH ITN (#244)
* Fix space issue with ZH ITN Signed-off-by: Anand Joseph <anajoseph@nvidia.com> * Update Jenkinsfile Update FST paths Signed-off-by: anand-nv <105917641+anand-nv@users.noreply.github.com> --------- Signed-off-by: Anand Joseph <anajoseph@nvidia.com> Signed-off-by: anand-nv <105917641+anand-nv@users.noreply.github.com> Signed-off-by: Simon Zuberek <simon@zuberek.net> Co-authored-by: Anand Joseph <anajoseph@nvidia.com> Co-authored-by: anand-nv <105917641+anand-nv@users.noreply.github.com>
1 parent bb3e4a3 commit cd610a1

4 files changed

Lines changed: 6 additions & 5 deletions

File tree

Jenkinsfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ pipeline {
2222
RU_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
2323
VI_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
2424
SV_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
25-
ZH_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/04-30-24-0'
25+
ZH_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/11-13-24-0'
2626
IT_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/08-22-24-0'
2727
HY_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/03-12-24-0'
2828
MR_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/03-12-24-1'

nemo_text_processing/inverse_text_normalization/zh/taggers/tokenize_and_classify.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
GraphFst,
2222
delete_extra_space,
2323
delete_space,
24+
delete_zero_or_one_space,
2425
generator_main,
2526
)
2627
from nemo_text_processing.inverse_text_normalization.zh.taggers.cardinal import CardinalFst
@@ -92,12 +93,12 @@ def __init__(
9293
)
9394

9495
punct = pynutil.insert("tokens { ") + pynutil.add_weight(punct_graph, weight=1.1) + pynutil.insert(" }")
95-
token = pynutil.insert("tokens { ") + classify + pynutil.insert(" }")
96+
token = pynutil.insert("tokens { ") + classify + pynutil.insert(" } ")
9697
token_plus_punct = (
9798
pynini.closure(punct + pynutil.insert(" ")) + token + pynini.closure(pynutil.insert(" ") + punct)
9899
)
99100

100-
graph = token_plus_punct + pynini.closure(delete_extra_space + token_plus_punct)
101+
graph = token_plus_punct + pynini.closure(delete_zero_or_one_space + token_plus_punct)
101102
graph = delete_space + graph + delete_space
102103

103104
self.fst = graph.optimize()

nemo_text_processing/inverse_text_normalization/zh/taggers/word.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,5 +26,5 @@ class WordFst(GraphFst):
2626

2727
def __init__(self):
2828
super().__init__(name="word", kind="classify")
29-
word = pynutil.insert('name: "') + pynini.closure(NEMO_NOT_SPACE, 1) + pynutil.insert('"')
29+
word = pynutil.insert('name: "') + NEMO_NOT_SPACE + pynutil.insert('"')
3030
self.fst = word.optimize()

nemo_text_processing/inverse_text_normalization/zh/verbalizers/verbalize_final.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,5 +40,5 @@ def __init__(self):
4040
+ delete_space
4141
+ pynutil.delete("}")
4242
)
43-
graph = delete_space + pynini.closure(graph + delete_extra_space) + graph + delete_space
43+
graph = delete_space + pynini.closure(graph + delete_space) + graph + delete_space
4444
self.fst = graph

0 commit comments

Comments
 (0)