Skip to content

Commit a59bd41

Browse files
committed
date and time semiotic classese
Signed-off-by: Mai Anh <palasek182@gmail.com>
1 parent aa3f531 commit a59bd41

39 files changed

Lines changed: 1113 additions & 14 deletions
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
1 janeiro
2+
01 janeiro
3+
2 fevereiro
4+
02 fevereiro
5+
3 março
6+
03 março
7+
4 abril
8+
04 abril
9+
5 maio
10+
05 maio
11+
6 junho
12+
06 junho
13+
7 julho
14+
07 julho
15+
8 agosto
16+
08 agosto
17+
9 setembro
18+
09 setembro
19+
10 outubro
20+
11 novembro
21+
12 dezembro
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
/
2+
.
3+
-
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
preposition de
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.

nemo_text_processing/text_normalization/pt/data/numbers/teens.tsv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
11 onze
33
12 doze
44
13 treze
5-
14 quatorze
5+
14 catorze
66
15 quinze
77
16 dezesseis
88
17 dezessete
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
manhã da manhã
2+
tarde da tarde
3+
noite da noite
4+
madrugada da madrugada

nemo_text_processing/text_normalization/pt/graph_utils.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,10 @@
4949
insert_space = pynutil.insert(" ")
5050
delete_extra_space = pynini.cross(pynini.closure(NEMO_WHITE_SPACE, 1), " ").optimize()
5151

52+
delete_preserve_order = pynini.closure(
53+
pynutil.delete(" preserve_order: true")
54+
)
55+
5256

5357
def generator_main(file_name: str, graphs: Dict[str, "pynini.FstLike"]) -> None:
5458
"""

nemo_text_processing/text_normalization/pt/taggers/cardinal.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,10 @@ def __init__(self, deterministic: bool = True):
7777
(connector_e + graph_tens),
7878
(connector_e + digit),
7979
)
80+
# "100" -> cem only (cross("1", cento)+delete("00") would also match "100" but
81+
# yields "cento"; OpenFst vs pynini top_rewrite can disagree on ties — Sparrowhawk).
8082
graph_hundreds |= pynini.cross("100", hundred_100)
8183
graph_hundreds |= pynini.cross("1", hundred_1) + pynini.union(
82-
pynutil.delete("00"),
8384
(connector_e + graph_tens),
8485
(connector_e + pynutil.delete("0") + digit),
8586
)
@@ -116,7 +117,8 @@ def __init__(self, deterministic: bool = True):
116117
pynutil.delete("000"),
117118
(connector_e + graph_pure_components),
118119
(insert_space + graph_compound_hundreds),
119-
(insert_space + pynutil.delete("0") + graph_compound_tens),
120+
# Use connector_e so "2024" -> dois mil e vinte e quatro (not dois mil vinte e quatro).
121+
(connector_e + pynutil.delete("0") + graph_compound_tens),
120122
)
121123

122124
t_comp = pynini.union(

0 commit comments

Comments
 (0)