Skip to content

Commit c470cc9

Browse files
authored
Merge branch 'staging/hi_itn_v3' into hi-itn-electronic-clean
Signed-off-by: mayuris-00 <mayuris@nvidia.com>
2 parents d4c9cf6 + b5d525d commit c470cc9

583 files changed

Lines changed: 25934 additions & 2034 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.pre-commit-config.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,30 +22,30 @@ ci:
2222

2323
repos:
2424
- repo: https://github.com/pre-commit/pre-commit-hooks
25-
rev: v5.0.0
25+
rev: v6.0.0
2626
hooks:
2727
- id: check-yaml
2828
- id: check-case-conflict
2929
- id: detect-private-key
3030
- id: requirements-txt-fixer
3131

3232
- repo: https://github.com/PyCQA/flake8
33-
rev: 7.2.0
33+
rev: 7.3.0
3434
hooks:
3535
- id: flake8
3636
args:
3737
- --select=W605
3838

3939
- repo: https://github.com/PyCQA/isort
40-
rev: 6.0.1
40+
rev: 6.1.0
4141
hooks:
4242
- id: isort
4343
name: Format imports
4444
args: [ --multi-line=3, --trailing-comma, --force-grid-wrap=0, --use-parentheses, --line-width=119, -rc, -ws ]
4545
exclude: docs/
4646

47-
- repo: https://github.com/psf/black
48-
rev: 25.1.0
47+
- repo: https://github.com/psf/black-pre-commit-mirror
48+
rev: 25.9.0
4949
hooks:
5050
- id: black
5151
name: Format code

Jenkinsfile

Lines changed: 87 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2,32 +2,34 @@ pipeline {
22
agent {
33
docker {
44
image 'tnitn_ci_py310:24.07'
5-
args '-v /mnt/jenkins/jenkinsci:/home/jenkins -v $HOME/.cache:/root/.cache --shm-size=4g --entrypoint=""'
5+
args '-v /mnt/jenkins/jenkinsci/TestData:/home/jenkins/TestData -v $HOME/.cache:/root/.cache --shm-size=4g --entrypoint=""'
66
}
77
}
88
options {
99
timeout(time: 2, unit: 'HOURS')
1010
disableConcurrentBuilds(abortPrevious: true)
1111
}
1212
environment {
13-
1413
AR_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/04-24-24-0'
1514
DE_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/10-23-24-0'
16-
EN_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/09-04-24-0'
15+
EN_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/06-04-26-3'
1716
ES_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/09-25-24-0'
1817
ES_EN_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/08-30-24-0'
18+
HI_EN_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/06-04-26-4'
1919
FR_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/04-07-25-0'
2020
HU_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/07-16-24-0'
21-
PT_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/06-08-23-0'
21+
PT_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/05-01-26-1'
2222
RU_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/06-08-23-0'
23-
VI_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/06-08-23-0'
23+
VI_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/10-29-25-0'
2424
SV_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/06-08-23-0'
2525
ZH_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/11-13-24-0'
2626
IT_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/08-22-24-0'
27+
HE_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/09-24-25-0'
2728
HY_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/03-12-24-0'
2829
MR_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/03-12-24-1'
2930
JA_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/10-17-24-1'
30-
HI_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/04-22-25-0'
31+
HI_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/06-04-26-5'
32+
KO_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/06-04-25-6'
3133
DEFAULT_TN_CACHE='/home/jenkins/TestData/text_norm/ci/grammars/06-08-23-0'
3234
}
3335
stages {
@@ -103,7 +105,11 @@ pipeline {
103105
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=hi --text="एक" --cache_dir ${HI_TN_CACHE}'
104106
}
105107
}
106-
108+
stage('L0: Codeswitched HI/EN ITN grammars') {
109+
steps {
110+
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=hi_en --text="एक" --cache_dir ${HI_EN_TN_CACHE}'
111+
}
112+
}
107113
}
108114
}
109115

@@ -120,12 +126,12 @@ pipeline {
120126
parallel {
121127
stage('L0: DE TN grammars') {
122128
steps {
123-
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=de --text="1" --cache_dir ${DEFAULT_TN_CACHE}'
129+
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=de --text="1" --cache_dir ${DE_TN_CACHE}'
124130
}
125131
}
126132
stage('L0: DE ITN grammars') {
127133
steps {
128-
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=de --text="ein hundert " --cache_dir ${DEFAULT_TN_CACHE}'
134+
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=de --text="ein hundert " --cache_dir ${DE_TN_CACHE}'
129135
}
130136
}
131137
stage('L0: ES TN grammars') {
@@ -167,11 +173,10 @@ pipeline {
167173
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=ar --text="اثنان " --cache_dir ${AR_TN_CACHE}'
168174
}
169175
}
170-
171176
}
172177
}
173178

174-
stage('L0: Create FR TN/ITN & VI ITN & HU TN & IT TN') {
179+
stage('L0: Create FR TN/ITN & VI TN/ITN & HU TN & IT TN') {
175180
when {
176181
anyOf {
177182
branch 'main'
@@ -197,6 +202,11 @@ pipeline {
197202
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=vi --text="một ngàn " --cache_dir ${VI_TN_CACHE}'
198203
}
199204
}
205+
stage('L0: VI TN grammars') {
206+
steps {
207+
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=vi --text="100" --cache_dir ${VI_TN_CACHE}'
208+
}
209+
}
200210
stage('L0: HU TN grammars') {
201211
steps {
202212
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=hu --text="100" --cache_dir ${HU_TN_CACHE}'
@@ -236,24 +246,41 @@ pipeline {
236246
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=sv --text="100" --cache_dir ${SV_TN_CACHE}'
237247
}
238248
}
239-
// stage('L0: SV ITN grammars') {
240-
// steps {
241-
// sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=sv --text="hundra " --cache_dir ${SV_TN_CACHE}'
242-
// }
243-
// }
244-
// stage('L0: PT TN grammars') {
245-
// steps {
246-
// sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=pt --text="2" --cache_dir ${DEFAULT_TN_CACHE}'
247-
// }
248-
// }
249+
// stage('L0: SV ITN grammars') {
250+
// steps {
251+
// sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=sv --text="hundra " --cache_dir ${SV_TN_CACHE}'
252+
// }
253+
// }
254+
stage('L0: PT TN grammars') {
255+
steps {
256+
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=pt --text="2" --cache_dir ${PT_TN_CACHE}'
257+
}
258+
}
249259
stage('L0: PT ITN grammars') {
250260
steps {
251261
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=pt --text="dez " --cache_dir ${PT_TN_CACHE}'
252262
}
253263
}
254264
}
255265
}
256-
266+
stage('L0: Create HE ITN Grammar') {
267+
when {
268+
anyOf {
269+
branch 'main'
270+
branch 'staging/**'
271+
branch 'staging_*'
272+
changeRequest target: 'main'
273+
}
274+
}
275+
failFast true
276+
parallel {
277+
stage('L0: HE ITN grammars') {
278+
steps {
279+
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=he --text="ת " --cache_dir ${HE_TN_CACHE}'
280+
}
281+
}
282+
}
283+
}
257284
stage('L0: Create HY TN/ITN Grammars & MR') {
258285
when {
259286
anyOf {
@@ -323,6 +350,29 @@ pipeline {
323350
}
324351
}
325352
}
353+
stage('L0: Create KO TN/ITN Grammars') {
354+
when {
355+
anyOf {
356+
branch 'main'
357+
branch 'staging/**'
358+
branch 'staging_*'
359+
changeRequest target: 'main'
360+
}
361+
}
362+
failFast true
363+
parallel {
364+
stage('L0: KO ITN grammars') {
365+
steps {
366+
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=ko --text="백" --cache_dir ${KO_TN_CACHE}'
367+
}
368+
}
369+
stage('L0: KO TN grammars') {
370+
steps {
371+
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=ko --text="100" --cache_dir ${KO_TN_CACHE}'
372+
}
373+
}
374+
}
375+
}
326376

327377

328378
// L1 Tests starts here
@@ -363,6 +413,11 @@ pipeline {
363413
sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/es_en/ -m "not pleasefixme" --cpu --tn_cache_dir ${ES_EN_TN_CACHE}'
364414
}
365415
}
416+
stage('L1: Run all Codeswitched HI/EN TN/ITN tests (restore grammars from cache)') {
417+
steps {
418+
sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/hi_en/ -m "not pleasefixme" --cpu --tn_cache_dir ${HI_EN_TN_CACHE}'
419+
}
420+
}
366421
stage('L1: Run all AR TN/ITN tests (restore grammars from cache)') {
367422
steps {
368423
sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/ar/ -m "not pleasefixme" --cpu --tn_cache_dir ${AR_TN_CACHE}'
@@ -413,6 +468,16 @@ pipeline {
413468
sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/hy/ -m "not pleasefixme" --cpu --tn_cache_dir ${HY_TN_CACHE}'
414469
}
415470
}
471+
stage('L1: Run all HE TN/ITN tests (restore grammars from cache)') {
472+
steps {
473+
sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/he/ -m "not pleasefixme" --cpu --tn_cache_dir ${HE_TN_CACHE}'
474+
}
475+
}
476+
stage('L1: Run all KO TN/ITN tests (restore grammars from cache)') {
477+
steps {
478+
sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/ko/ -m "not pleasefixme" --cpu --tn_cache_dir ${KO_TN_CACHE}'
479+
}
480+
}
416481
}
417482
}
418483

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
חצי
2+
רבע
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
°F פרנהייט
2+
°C צלסיוס
3+
° מעלות
4+
°F מעלות פרנהייט
5+
°C מעלות צלסיוס
6+
K קלווין
7+
% אחוז
8+
% אחוזים
9+
Hz הרץ
10+
kW קילוואט
11+
kW קילו ואט
12+
kW קילו וואט
13+
kWh קילו ואט לשעה
14+
kWh קילוואט לשעה
15+
Wh ואט לשעה
16+
W ואט
17+
ghz ג׳יגה הרץ
18+
ghz גיגה הרץ
19+
khz קילו הרץ
20+
mhz מגה הרץ
21+
v וולט
22+
nm ננומטר
23+
mA מילי אמפר
24+
tW טרה ואט
25+
mv מילי וולט
26+
mW מגה ואט
27+
μm מיקרומטר
28+
" אינץ׳
29+
cc סי סי
30+
ω אוהם
31+
db דציבל
32+
db דציבלים
33+
kb קילו ביט
34+
mb מגה ביט
35+
gb ג׳יגה ביט
36+
gb גיגה ביט
37+
tb טרה ביט
38+
pb פטה ביט
39+
mb מגה בייט
40+
kb קילו בייט
41+
gb ג׳יגה בייט
42+
gb גיגה בייט
43+
tb טרה בייט
44+
pb פטה בייט
45+
A אמפר
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
ינואר
2+
פברואר
3+
מרץ
4+
מרס
5+
אפריל
6+
מאי
7+
יוני
8+
יולי
9+
אוגוסט
10+
ספטמבר
11+
אוקטובר
12+
נובמבר
13+
דצמבר
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
ינואר 1
2+
פברואר 2
3+
מרץ 3
4+
אפריל 4
5+
מאי 5
6+
יוני 6
7+
יולי 7
8+
אוגוסט 8
9+
ספטמבר 9
10+
אוקטובר 10
11+
נובמבר 11
12+
דצמבר 12
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
ראשון 1
2+
שני 2
3+
שלישי 3
4+
רביעי 4
5+
חמישי 5
6+
שישי 6
7+
שביעי 7
8+
שמיני 8
9+
תשיעי 9
10+
עשירי 10
11+
אחת עשרה 11
12+
שתיים עשרה 12

0 commit comments

Comments
 (0)