Skip to content

Commit 6e6280f

Browse files
authored
Merge branch 'main' into staging_hi_itn
Signed-off-by: Mariana <47233618+mgrafu@users.noreply.github.com>
2 parents 628034e + e15caea commit 6e6280f

3 files changed

Lines changed: 24 additions & 23 deletions

File tree

Jenkinsfile

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -10,25 +10,24 @@ pipeline {
1010
disableConcurrentBuilds(abortPrevious: true)
1111
}
1212
environment {
13-
14-
AR_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/04-24-24-0'
15-
DE_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/10-23-24-0'
16-
EN_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/09-04-24-0'
17-
ES_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/09-25-24-0'
18-
ES_EN_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/08-30-24-0'
19-
FR_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/04-07-25-0'
20-
HU_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/07-16-24-0'
21-
PT_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
22-
RU_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
23-
VI_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
24-
SV_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
25-
ZH_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/11-13-24-0'
26-
IT_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/08-22-24-0'
27-
HY_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/03-12-24-0'
28-
MR_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/03-12-24-1'
29-
JA_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/10-17-24-1'
30-
HI_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/08-01-25-1'
31-
DEFAULT_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
13+
AR_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/04-24-24-0'
14+
DE_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/10-23-24-0'
15+
EN_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/09-25-25-0'
16+
ES_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/09-25-24-0'
17+
ES_EN_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/08-30-24-0'
18+
FR_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/04-07-25-0'
19+
HU_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/07-16-24-0'
20+
PT_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/06-08-23-0'
21+
RU_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/06-08-23-0'
22+
VI_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/06-08-23-0'
23+
SV_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/06-08-23-0'
24+
ZH_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/11-13-24-0'
25+
IT_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/08-22-24-0'
26+
HY_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/03-12-24-0'
27+
MR_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/03-12-24-1'
28+
JA_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/10-17-24-1'
29+
HI_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/04-22-25-0'
30+
DEFAULT_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/06-08-23-0'
3231
}
3332
stages {
3433

nemo_text_processing/text_normalization/en/taggers/electronic.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -127,14 +127,15 @@ def __init__(self, cardinal: GraphFst, deterministic: bool = True):
127127

128128
full_stop_accep = pynini.accep(".")
129129
dollar_accep = pynini.accep("$") # Include for the correct transduction of the money graph
130-
excluded_symbols = full_stop_accep | dollar_accep
130+
excluded_symbols = full_stop_accep | dollar_accep | pynini.accep(",")
131131
filtered_symbols = pynini.difference(accepted_symbols, excluded_symbols)
132132
accepted_characters = NEMO_ALPHA | NEMO_DIGIT | filtered_symbols
133133
domain_component = full_stop_accep + pynini.closure(accepted_characters, 2)
134-
graph_domain = (
134+
graph_domain = pynutil.add_weight(
135135
pynutil.insert('domain: "')
136136
+ (pynini.closure(accepted_characters, 1) + pynini.closure(domain_component, 1))
137-
+ pynutil.insert('"')
137+
+ pynutil.insert('"'),
138+
0.1,
138139
).optimize()
139140

140141
graph |= pynutil.add_weight(graph_domain, MIN_NEG_WEIGHT)

tests/nemo_text_processing/en/data_text_normalization/test_cases_electronic.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,4 +41,5 @@ https://www.nvidia.com/dgx-basepod/~HTTPS colon slash slash WWW dot NVIDIA dot c
4141
i can use your card ending in 8876~i can use your card ending in eight eight seven six
4242
upgrade/update~upgrade slash update
4343
upgrade / update~upgrade slash update
44-
upgrade/update/downgrade~upgrade slash update slash downgrade
44+
upgrade/update/downgrade~upgrade slash update slash downgrade
45+
5.4, or 5.5~five point four, or five point five

0 commit comments

Comments
 (0)