Skip to content

Commit 31db380

Browse files
authored
Merge branch 'NVIDIA:main' into hebrew_itn
2 parents 9dd3c35 + fcebf16 commit 31db380

41 files changed

Lines changed: 1037 additions & 113 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.pre-commit-config.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,30 +22,30 @@ ci:
2222

2323
repos:
2424
- repo: https://github.com/pre-commit/pre-commit-hooks
25-
rev: v5.0.0
25+
rev: v6.0.0
2626
hooks:
2727
- id: check-yaml
2828
- id: check-case-conflict
2929
- id: detect-private-key
3030
- id: requirements-txt-fixer
3131

3232
- repo: https://github.com/PyCQA/flake8
33-
rev: 7.2.0
33+
rev: 7.3.0
3434
hooks:
3535
- id: flake8
3636
args:
3737
- --select=W605
3838

3939
- repo: https://github.com/PyCQA/isort
40-
rev: 6.0.1
40+
rev: 6.1.0
4141
hooks:
4242
- id: isort
4343
name: Format imports
4444
args: [ --multi-line=3, --trailing-comma, --force-grid-wrap=0, --use-parentheses, --line-width=119, -rc, -ws ]
4545
exclude: docs/
4646

47-
- repo: https://github.com/psf/black
48-
rev: 25.1.0
47+
- repo: https://github.com/psf/black-pre-commit-mirror
48+
rev: 25.9.0
4949
hooks:
5050
- id: black
5151
name: Format code

Jenkinsfile

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,17 @@ pipeline {
22
agent {
33
docker {
44
image 'tnitn_ci_py310:24.07'
5-
args '-v /mnt/jenkins/jenkinsci:/home/jenkins -v $HOME/.cache:/root/.cache --shm-size=4g --entrypoint=""'
5+
args '-v /mnt/jenkins/jenkinsci/TestData:/home/jenkins/TestData -v $HOME/.cache:/root/.cache --shm-size=4g --entrypoint=""'
66
}
77
}
88
options {
99
timeout(time: 2, unit: 'HOURS')
1010
disableConcurrentBuilds(abortPrevious: true)
1111
}
1212
environment {
13-
1413
AR_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/04-24-24-0'
1514
DE_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/10-23-24-0'
16-
EN_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/09-04-24-0'
15+
EN_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/09-25-25-0'
1716
ES_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/09-25-24-0'
1817
ES_EN_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/08-30-24-0'
1918
FR_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/04-07-25-0'
@@ -28,7 +27,7 @@ pipeline {
2827
HY_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/03-12-24-0'
2928
MR_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/03-12-24-1'
3029
JA_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/10-17-24-1'
31-
HI_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/04-22-25-0'
30+
HI_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/10-31-25-0'
3231
DEFAULT_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/06-08-23-0'
3332
}
3433
stages {

nemo_text_processing/text_normalization/en/taggers/electronic.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -127,14 +127,15 @@ def __init__(self, cardinal: GraphFst, deterministic: bool = True):
127127

128128
full_stop_accep = pynini.accep(".")
129129
dollar_accep = pynini.accep("$") # Include for the correct transduction of the money graph
130-
excluded_symbols = full_stop_accep | dollar_accep
130+
excluded_symbols = full_stop_accep | dollar_accep | pynini.accep(",")
131131
filtered_symbols = pynini.difference(accepted_symbols, excluded_symbols)
132132
accepted_characters = NEMO_ALPHA | NEMO_DIGIT | filtered_symbols
133133
domain_component = full_stop_accep + pynini.closure(accepted_characters, 2)
134-
graph_domain = (
134+
graph_domain = pynutil.add_weight(
135135
pynutil.insert('domain: "')
136136
+ (pynini.closure(accepted_characters, 1) + pynini.closure(domain_component, 1))
137-
+ pynutil.insert('"')
137+
+ pynutil.insert('"'),
138+
0.1,
138139
).optimize()
139140

140141
graph |= pynutil.add_weight(graph_domain, MIN_NEG_WEIGHT)
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
हफ़्ते
2+
सप्ताह
3+
सदियां
4+
सदियों
5+

nemo_text_processing/text_normalization/hi/data/measure/quarterly_units.tsv renamed to nemo_text_processing/text_normalization/hi/data/measure/quarterly_units_map.tsv

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,8 @@ h घंटे
44
min मिनट
55
doz दर्जन
66
yr साल
7-
yr वर्ष
87
hp हॉर्सपॉवर
98
d दिन
109
month महीना
1110
months महीने
12-
हफ़्ते हफ़्ते
11+

nemo_text_processing/text_normalization/hi/data/measure/unit.tsv

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,6 @@ KHz किलोहर्ट्ज़
134134
N न्यूटन
135135
dB डेसीबल
136136
yr साल
137-
yr वर्ष
138137
hp हॉर्सपॉवर
139138
d दिन
140139
month महीना
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
१ला पहला
2+
१ली पहली
3+
२रा दूसरा
4+
२री दूसरी
5+
३रा तीसरा
6+
३री तीसरी
7+
४था चौथा
8+
४थी चौथी
9+
५वां पाँचवां
10+
५वीं पाँचवीं
11+
६ठा छठा
12+
६ठी छठी
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
वां
2+
वीं
3+
वें
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
वे वें
2+

0 commit comments

Comments
 (0)