Skip to content

Commit e798f48

Browse files
committed
migrating merge branch to main
Signed-off-by: tbartley94 <tbartley@nvidia.com>
1 parent 8e56168 commit e798f48

71 files changed

Lines changed: 3781 additions & 1007 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

Jenkinsfile

Lines changed: 45 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -80,31 +80,7 @@ pipeline {
8080
}
8181
}
8282

83-
stage('L0: Create HI TN/ITN Grammars') {
84-
when {
85-
anyOf {
86-
branch 'main'
87-
branch 'staging/**'
88-
branch 'staging_*'
89-
changeRequest target: 'main'
90-
}
91-
}
92-
failFast true
93-
parallel {
94-
stage('L0: Hi TN grammars') {
95-
steps {
96-
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=hi --text="१" --cache_dir ${HI_TN_CACHE}'
97-
}
98-
}
99-
stage('L0: Hi ITN grammars') {
100-
steps {
101-
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=hi --text="एक" --cache_dir ${HI_TN_CACHE}'
102-
}
103-
}
104-
}
105-
}
106-
107-
stage('L0: Create DE/ES TN/ITN Grammars') {
83+
stage('L0: Create DE/ES/FR TN/ITN Grammars') {
10884
when {
10985
anyOf {
11086
branch 'main'
@@ -140,34 +116,21 @@ pipeline {
140116
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=es_en --text="ciento uno " --cache_dir ${ES_EN_TN_CACHE}'
141117
}
142118
}
143-
}
144-
}
145-
146-
stage('L0: Create AR TN/ITN Grammars') {
147-
when {
148-
anyOf {
149-
branch 'main'
150-
branch 'staging/**'
151-
branch 'staging_*'
152-
changeRequest target: 'main'
153-
}
154-
}
155-
failFast true
156-
parallel {
157-
stage('L0: AR TN grammars') {
119+
stage('L0: FR TN grammars') {
158120
steps {
159-
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=ar --text="2" --cache_dir ${AR_TN_CACHE}'
121+
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=fr --text="2" --cache_dir ${FR_TN_CACHE}'
160122
}
161123
}
162-
stage('L0: AR ITN grammars') {
124+
stage('L0: FR ITN grammars') {
163125
steps {
164-
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=ar --text="اثنان " --cache_dir ${AR_TN_CACHE}'
126+
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=fr --text="cent " --cache_dir ${FR_TN_CACHE}'
165127
}
166128
}
167129
}
168130
}
169131

170-
stage('L0: Create FR TN/ITN & VI TN/ITN & HU TN & IT TN') {
132+
133+
stage('L0: Create HI/VI/RU TN/ITN') {
171134
when {
172135
anyOf {
173136
branch 'main'
@@ -178,40 +141,40 @@ pipeline {
178141
}
179142
failFast true
180143
parallel {
181-
stage('L0: FR TN grammars') {
144+
stage('L0: VI ITN grammars') {
182145
steps {
183-
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=fr --text="2" --cache_dir ${FR_TN_CACHE}'
146+
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=vi --text="một ngàn " --cache_dir ${VI_TN_CACHE}'
184147
}
185148
}
186-
stage('L0: FR ITN grammars') {
149+
stage('L0: VI TN grammars') {
187150
steps {
188-
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=fr --text="cent " --cache_dir ${FR_TN_CACHE}'
151+
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=vi --text="100" --cache_dir ${VI_TN_CACHE}'
189152
}
190153
}
191-
stage('L0: VI ITN grammars') {
154+
stage('L0: RU TN grammars') {
192155
steps {
193-
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=vi --text="một ngàn " --cache_dir ${VI_TN_CACHE}'
156+
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize_with_audio.py --lang=ru --text="03" --cache_dir ${RU_TN_CACHE}'
194157
}
195158
}
196-
stage('L0: VI TN grammars') {
159+
stage('L0: RU ITN grammars') {
197160
steps {
198-
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=vi --text="100" --cache_dir ${VI_TN_CACHE}'
161+
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=ru --text="три " --cache_dir ${RU_TN_CACHE}'
199162
}
200163
}
201-
stage('L0: HU TN grammars') {
164+
stage('L0: Hi TN grammars') {
202165
steps {
203-
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=hu --text="100" --cache_dir ${HU_TN_CACHE}'
166+
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=hi --text="" --cache_dir ${HI_TN_CACHE}'
204167
}
205168
}
206-
stage('L0: IT TN grammars') {
169+
stage('L0: Hi ITN grammars') {
207170
steps {
208-
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=it --text="122" --cache_dir ${IT_TN_CACHE}'
171+
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=hi --text="एक" --cache_dir ${HI_TN_CACHE}'
209172
}
210173
}
211174
}
212175
}
213176

214-
stage('L0: Create RU TN/ITN Grammars & SV & PT') {
177+
stage('L0: Create AR/HU/SV/PT/IT TN/ITN Grammars') {
215178
when {
216179
anyOf {
217180
branch 'main'
@@ -222,19 +185,24 @@ pipeline {
222185
}
223186
failFast true
224187
parallel {
225-
stage('L0: RU TN grammars') {
188+
stage('L0: SV TN grammars') {
226189
steps {
227-
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize_with_audio.py --lang=ru --text="03" --cache_dir ${RU_TN_CACHE}'
190+
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=sv --text="100" --cache_dir ${SV_TN_CACHE}'
228191
}
229192
}
230-
stage('L0: RU ITN grammars') {
193+
stage('L0: HU TN grammars') {
231194
steps {
232-
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=ru --text="три " --cache_dir ${RU_TN_CACHE}'
195+
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=hu --text="100" --cache_dir ${HU_TN_CACHE}'
233196
}
234197
}
235-
stage('L0: SV TN grammars') {
198+
stage('L0: AR TN grammars') {
236199
steps {
237-
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=sv --text="100" --cache_dir ${SV_TN_CACHE}'
200+
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=ar --text="2" --cache_dir ${AR_TN_CACHE}'
201+
}
202+
}
203+
stage('L0: AR ITN grammars') {
204+
steps {
205+
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=ar --text="اثنان " --cache_dir ${AR_TN_CACHE}'
238206
}
239207
}
240208
// stage('L0: SV ITN grammars') {
@@ -252,29 +220,15 @@ pipeline {
252220
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=pt --text="dez " --cache_dir ${PT_TN_CACHE}'
253221
}
254222
}
255-
}
256-
}
257-
258-
stage('L0: Create He TN/ITN Grammars & MR') {
259-
when {
260-
anyOf {
261-
branch 'main'
262-
branch 'staging/**'
263-
branch 'staging_*'
264-
changeRequest target: 'main'
265-
}
266-
}
267-
failFast true
268-
parallel {
269-
stage('L0: HE ITN grammars') {
223+
stage('L0: IT TN grammars') {
270224
steps {
271-
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=he --text="ת " --cache_dir ${HE_TN_CACHE}'
225+
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=it --text="122" --cache_dir ${IT_TN_CACHE}'
272226
}
273227
}
274228
}
275229
}
276230

277-
stage('L0: Create HY TN/ITN Grammars & MR') {
231+
stage('L0: Create MR/HE/HY TN/ITN Grammars') {
278232
when {
279233
anyOf {
280234
branch 'main'
@@ -300,10 +254,15 @@ pipeline {
300254
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=hy --text="վեց " --cache_dir ${HY_TN_CACHE}'
301255
}
302256
}
257+
stage('L0: HE ITN grammars') {
258+
steps {
259+
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=he --text="ת " --cache_dir ${HE_TN_CACHE}'
260+
}
261+
}
303262
}
304263
}
305264

306-
stage('L0: Create ZH TN/ITN Grammar') {
265+
stage('L0: Create CJK TN/ITN Grammar') {
307266
when {
308267
anyOf {
309268
branch 'main'
@@ -324,42 +283,21 @@ pipeline {
324283
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=zh --text="6" --cache_dir ${ZH_TN_CACHE}'
325284
}
326285
}
327-
}
328-
}
329-
330-
stage('L0: Create JA ITN Grammars') {
331-
when {
332-
anyOf {
333-
branch 'main'
334-
branch 'staging/**'
335-
branch 'staging_*'
336-
changeRequest target: 'main'
337-
}
338-
}
339-
failFast true
340-
parallel {
341286
stage('L0: JA ITN grammars') {
342287
steps {
343288
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=ja --text="100" --cache_dir ${JA_TN_CACHE}'
344289
}
345290
}
346-
}
347-
}
348-
349-
stage('L0: Create KO TN Grammars') {
350-
when {
351-
anyOf {
352-
branch 'main'
353-
changeRequest target: 'main'
354-
}
355-
}
356-
failFast true
357-
parallel {
358291
stage('L0: KO TN grammars') {
359292
steps {
360293
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --lang=ko --text="100" --cache_dir ${KO_TN_CACHE}'
361294
}
362295
}
296+
stage('L0: KO ITN grammars') {
297+
steps {
298+
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --lang=ko --text="백" --cache_dir ${KO_TN_CACHE}'
299+
}
300+
}
363301
}
364302
}
365303

nemo_text_processing/inverse_text_normalization/inverse_normalize.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,13 @@ def __init__(
136136
from nemo_text_processing.inverse_text_normalization.he.verbalizers.verbalize_final import (
137137
VerbalizeFinalFst,
138138
)
139+
elif lang == 'ko': # Korean
140+
from nemo_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import ClassifyFst
141+
from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize_final import (
142+
VerbalizeFinalFst,
143+
)
144+
else:
145+
raise NotImplementedError(f"Language {lang} has not been supported yet.")
139146

140147
self.tagger = ClassifyFst(
141148
cache_dir=cache_dir, whitelist=whitelist, overwrite_cache=overwrite_cache, input_case=input_case
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from nemo_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import ClassifyFst
16+
from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize import VerbalizeFst
17+
from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize_final import VerbalizeFinalFst
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
달러 $
2+
$
3+
유로
4+
¥
5+
파운드 £
6+
위안 ¥
7+
페소 $
8+
루피
9+
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
킬로미터 km
2+
미터 m
3+
센티미터 cm
4+
밀리미터 mm
5+
마이크로미터 μm
6+
나노미터 nm
7+
킬로그램 kg
8+
그램 g
9+
t
10+
밀리그램 mg
11+
마이크로그램 μg
12+
리터 L
13+
밀리리터 ml
14+
씨씨 cc
15+
시간 h
16+
min
17+
s
18+
뉴턴 N
19+
와트 W
20+
킬로와트 kW
21+
킬로와트시 kWh
22+
헤르츠 Hz
23+
킬로헤르츠 kHz
24+
메가헤르츠 MHz
25+
기가헤르츠 GHz
26+
°
27+
퍼센트 %
28+
프로 %
29+
분당회전수 rpm
30+
알피엠 rpm
31+
볼트 V
32+
밀리볼트 mV
33+
킬로볼트 kV
34+
암페어 A
35+
밀리암페어 mA
36+
py
37+
제곱미터
38+
제곱킬로미터 km²
39+
제곱센티미터 cm²
40+
세제곱미터
41+
기가바이트 GB
42+
기가 GB
43+
테라바이트 TB
44+
테라 TB
45+
메가바이트 MB
46+
메가 MB
47+
킬로바이트 KB
48+
바이트 B
49+
비트 bit
50+
칼로리 cal
51+
킬로칼로리 kcal
52+
J
53+
킬로줄 kJ
54+
마력 hp
55+
Ω
56+
파스칼 Pa
57+
헥토파스칼 hPa
58+
데시벨 dB
59+
루멘 lm
60+
럭스 lx
61+
픽셀 px
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
1
2+
2
3+
3
4+
4
5+
5
6+
6
7+
7
8+
8
9+
9
10+
10
11+
십일 11
12+
십이 12

0 commit comments

Comments
 (0)