Skip to content

Commit a8d2025

Browse files
committed
Century implementations
Signed-off-by: Namrata Gachchi <ngachchi@nvidia.com>
1 parent 800ec5c commit a8d2025

File tree

2 files changed

+8
-0
lines changed

2 files changed

+8
-0
lines changed

nemo_text_processing/text_normalization/hi/taggers/date.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,10 @@ def __init__(self, cardinal: GraphFst):
7474

7575
range_graph = pynini.cross("-", "से")
7676

77+
# Graph for century
78+
century_number = pynini.compose(pynini.closure(NEMO_HI_DIGIT, 1), cardinal.final_graph) + pynini.accep("वीं")
79+
century_text = pynutil.insert("text: \"") + century_number + pynutil.insert("\"") + insert_space
80+
7781
graph_dd_mm_yyyy = (
7882
days_graph + (delete_dash | delete_slash) + months_graph + (delete_dash | delete_slash) + years_graph
7983
)
@@ -109,6 +113,7 @@ def __init__(self, cardinal: GraphFst):
109113
| pynutil.add_weight(graph_mm_yyyy, -0.2)
110114
| pynutil.add_weight(graph_year_suffix, -0.001)
111115
| pynutil.add_weight(graph_range, -0.005)
116+
| pynutil.add_weight(century_text, -0.001)
112117
)
113118

114119
self.final_graph = final_graph.optimize()

tests/nemo_text_processing/hi/data_text_normalization/test_cases_date.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,6 @@
1919
२०२४~दो हज़ार चौबीस
2020
१२० ई. पू.~एक सौ बीस ईसा पूर्व
2121
२९७-२७२ ई. पू.~दो सौ सत्तानबे से दो सौ बहत्तर ईसा पूर्व
22+
३२७वीं सदी~तीन सौ सत्ताईसवीं सदी
23+
१८वीं शताब्दी~अठारहवीं शताब्दी
24+
१९वीं दशक~उन्नीसवीं दशक

0 commit comments

Comments
 (0)