From 2d80ba5cf8bcb087e0069a0bb5562ec636a476df Mon Sep 17 00:00:00 2001
From: Namrata Gachchi <ngachchi@nvidia.com>
Date: Tue, 22 Apr 2025 22:02:28 +0530
Subject: [PATCH 1/4] Future Implementations for classes - Measure, Money, and
 Date (#258)

* Future Implementations for classes - Measure, Money, and Date

Signed-off-by: Namrata Gachchi <ngachchi@nvidia.com>

* Resolved the conflicts with mm_yyyy and date ranges and added the previously removed failing test cases.

Signed-off-by: Namrata Gachchi <ngachchi@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* removed the unused empty string implementation

Signed-off-by: Namrata Gachchi <ngachchi@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* minor fixes for the tagger files

Signed-off-by: Namrata Gachchi <ngachchi@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* reformatted decimal final graph

Signed-off-by: Namrata Gachchi <ngachchi@nvidia.com>

* incorporated the suggestion for decimal graph

Signed-off-by: Namrata Gachchi <ngachchi@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Century implementations

Signed-off-by: Namrata Gachchi <ngachchi@nvidia.com>

* Working on the yyyy format for the date class

Signed-off-by: Namrata Gachchi <ngachchi@nvidia.com>

* reverted yyyy code

Signed-off-by: Namrata Gachchi <ngachchi@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* working on future implementations

Signed-off-by: Namrata Gachchi <ngachchi@nvidia.com>

* working on improving the date class accuracy

Signed-off-by: Namrata Gachchi <ngachchi@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* added year prefix for the date class

Signed-off-by: Namrata Gachchi <ngachchi@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* working on the commma cases for date class

Signed-off-by: Namrata Gachchi <ngachchi@nvidia.com>

* minor fixes

Signed-off-by: Namrata Gachchi <ngachchi@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* implemented mixed fractions

Signed-off-by: Namrata Gachchi <ngachchi@nvidia.com>

* rectified the test case

Signed-off-by: Namrata Gachchi <ngachchi@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* working on quarterly measurements

Signed-off-by: Namrata Gachchi <ngachchi@nvidia.com>

* reformatted the prefixes and suffixes for date tagger class

Signed-off-by: Namrata Gachchi <ngachchi@nvidia.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* replaced text tag with era tag for the date class

Signed-off-by: Namrata Gachchi <ngachchi@nvidia.com>

* Removed the text tag reference from date class verbalizer

Signed-off-by: Namrata Gachchi <ngachchi@nvidia.com>

---------

Signed-off-by: Namrata Gachchi <ngachchi@nvidia.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 Jenkinsfile                                   |  2 +-
 .../hi/data/date/prefixes.tsv                 |  3 +
 .../hi/data/date/suffixes.tsv                 | 10 +++
 .../hi/data/date/year_suffix.tsv              |  2 +
 .../hi/data/measure/quarterly_units.tsv       | 12 +++
 .../hi/data/measure/unit.tsv                  |  4 +-
 .../hi/data/money/currency.tsv                |  3 +-
 .../hi/data/money/major_minor_currencies.tsv  |  9 +++
 .../hi/data/numbers/teens_and_ties.tsv        | 16 ++--
 .../text_normalization/hi/data/time/hours.tsv |  1 +
 .../text_normalization/hi/taggers/cardinal.py |  3 +
 .../text_normalization/hi/taggers/date.py     | 57 ++++++++++++-
 .../text_normalization/hi/taggers/measure.py  | 75 ++++++++++++++++--
 .../text_normalization/hi/taggers/money.py    | 36 +++++----
 .../hi/taggers/tokenize_and_classify.py       |  8 +-
 .../text_normalization/hi/verbalizers/date.py |  4 +-
 .../hi/verbalizers/fraction.py                |  7 +-
 .../hi/verbalizers/money.py                   | 79 ++++++++++++++-----
 .../hi/verbalizers/verbalize.py               | 20 +++--
 .../hi/verbalizers/whitelist.py               |  2 +
 .../test_cases_date.txt                       | 15 ++++
 .../test_cases_fraction.txt                   |  4 +-
 .../test_cases_measure.txt                    |  4 +
 .../test_cases_money.txt                      | 20 ++++-
 24 files changed, 324 insertions(+), 72 deletions(-)
 create mode 100644 nemo_text_processing/text_normalization/hi/data/date/prefixes.tsv
 create mode 100644 nemo_text_processing/text_normalization/hi/data/date/suffixes.tsv
 create mode 100644 nemo_text_processing/text_normalization/hi/data/date/year_suffix.tsv
 create mode 100644 nemo_text_processing/text_normalization/hi/data/measure/quarterly_units.tsv
 create mode 100644 nemo_text_processing/text_normalization/hi/data/money/major_minor_currencies.tsv

diff --git a/Jenkinsfile b/Jenkinsfile
index c94c107c6..53c784920 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -27,7 +27,7 @@ pipeline {
     HY_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/03-12-24-0'
     MR_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/03-12-24-1'
     JA_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/10-17-24-1'
-    HI_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/04-03-25-1'
+    HI_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/02-12-25-0'
     DEFAULT_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
   }
   stages {
diff --git a/nemo_text_processing/text_normalization/hi/data/date/prefixes.tsv b/nemo_text_processing/text_normalization/hi/data/date/prefixes.tsv
new file mode 100644
index 000000000..d4c1ca0b1
--- /dev/null
+++ b/nemo_text_processing/text_normalization/hi/data/date/prefixes.tsv
@@ -0,0 +1,3 @@
+सन् 
+सन 
+साल 
\ No newline at end of file
diff --git a/nemo_text_processing/text_normalization/hi/data/date/suffixes.tsv b/nemo_text_processing/text_normalization/hi/data/date/suffixes.tsv
new file mode 100644
index 000000000..6806d3f12
--- /dev/null
+++ b/nemo_text_processing/text_normalization/hi/data/date/suffixes.tsv
@@ -0,0 +1,10 @@
+ में
+ का
+ की
+ के
+ से
+ तक
+ ईस्वी
+ शताब्दी
+ दशक
+ सदी
\ No newline at end of file
diff --git a/nemo_text_processing/text_normalization/hi/data/date/year_suffix.tsv b/nemo_text_processing/text_normalization/hi/data/date/year_suffix.tsv
new file mode 100644
index 000000000..acb37d534
--- /dev/null
+++ b/nemo_text_processing/text_normalization/hi/data/date/year_suffix.tsv
@@ -0,0 +1,2 @@
+ई. पू.	ईसा पूर्व
+ई.	ईसवी
\ No newline at end of file
diff --git a/nemo_text_processing/text_normalization/hi/data/measure/quarterly_units.tsv b/nemo_text_processing/text_normalization/hi/data/measure/quarterly_units.tsv
new file mode 100644
index 000000000..eaddf930a
--- /dev/null
+++ b/nemo_text_processing/text_normalization/hi/data/measure/quarterly_units.tsv
@@ -0,0 +1,12 @@
+s	सेकंड
+hr	घंटा
+h	घंटे
+min	मिनट
+doz	दर्जन
+yr	साल
+yr	वर्ष
+hp	हॉर्सपॉवर
+d	दिन
+month	महीना
+months	महीने
+हफ़्ते हफ़्ते
\ No newline at end of file
diff --git a/nemo_text_processing/text_normalization/hi/data/measure/unit.tsv b/nemo_text_processing/text_normalization/hi/data/measure/unit.tsv
index 0bf561379..189512687 100644
--- a/nemo_text_processing/text_normalization/hi/data/measure/unit.tsv
+++ b/nemo_text_processing/text_normalization/hi/data/measure/unit.tsv
@@ -141,14 +141,16 @@ month	महीना
 months	महीने
 ct	कैरेट
 pH	पीएच
+km/h	किलोमीटर प्रति घंटा
 km/hr	किलोमीटर प्रति घंटा
 km/min	किलोमीटर प्रति मिनट
+m/h	मीटर प्रति घंटा
 m/hr	मीटर प्रति घंटा
 mi/s	मील प्रति सेकंड
+mi/h	मील प्रति घंटा
 mi/hr	मील प्रति घंटा
 mi/min	मील प्रति मिनट
 ₹/ac	रुपए प्रति एकड़
 x	बाई
 X	बाई
 *	बाई
--	से
diff --git a/nemo_text_processing/text_normalization/hi/data/money/currency.tsv b/nemo_text_processing/text_normalization/hi/data/money/currency.tsv
index 88633ec7c..8f4a955cc 100644
--- a/nemo_text_processing/text_normalization/hi/data/money/currency.tsv
+++ b/nemo_text_processing/text_normalization/hi/data/money/currency.tsv
@@ -1,5 +1,4 @@
 ₹	रुपए
-P	पैसे 
 £	पाउंड
 ₩	वॉन
 $	डॉलर
@@ -7,4 +6,4 @@ $	डॉलर
 ৳	टका
 ¥	येन
 ₦	नाइरा
-€	यूरो
+€	यूरो
\ No newline at end of file
diff --git a/nemo_text_processing/text_normalization/hi/data/money/major_minor_currencies.tsv b/nemo_text_processing/text_normalization/hi/data/money/major_minor_currencies.tsv
new file mode 100644
index 000000000..cf62891d1
--- /dev/null
+++ b/nemo_text_processing/text_normalization/hi/data/money/major_minor_currencies.tsv
@@ -0,0 +1,9 @@
+रुपए	पैसे
+पाउंड	पेंस
+वॉन	जिओन
+डॉलर	सेंट
+लीरा	कुरस
+टका	पैसे
+येन	सेन
+नाइरा	कोबो
+यूरो	सेंट
diff --git a/nemo_text_processing/text_normalization/hi/data/numbers/teens_and_ties.tsv b/nemo_text_processing/text_normalization/hi/data/numbers/teens_and_ties.tsv
index 1d61c77b7..fbf248266 100644
--- a/nemo_text_processing/text_normalization/hi/data/numbers/teens_and_ties.tsv
+++ b/nemo_text_processing/text_normalization/hi/data/numbers/teens_and_ties.tsv
@@ -79,12 +79,12 @@
 ८८	अट्ठासी
 ८९	नवासी
 ९०	नब्बे
-९१	इक्यानबे 
-९२	बानबे 
-९३	तिरानबे 
-९४	चौरानबे 
-९५	पंचानबे 
-९६	छियानबे 
-९७	सत्तानबे 
-९८	अट्ठानबे 
+९१	इक्यानबे
+९२	बानबे
+९३	तिरानबे
+९४	चौरानबे
+९५	पंचानबे
+९६	छियानबे
+९७	सत्तानबे
+९८	अट्ठानबे
 ९९	निन्यानबे
diff --git a/nemo_text_processing/text_normalization/hi/data/time/hours.tsv b/nemo_text_processing/text_normalization/hi/data/time/hours.tsv
index d5e85a784..dd8623284 100644
--- a/nemo_text_processing/text_normalization/hi/data/time/hours.tsv
+++ b/nemo_text_processing/text_normalization/hi/data/time/hours.tsv
@@ -1,3 +1,4 @@
+०	शून्य
 १	एक
 २	दो
 ३	तीन
diff --git a/nemo_text_processing/text_normalization/hi/taggers/cardinal.py b/nemo_text_processing/text_normalization/hi/taggers/cardinal.py
index fe3ad9a1d..05d7a4ee4 100644
--- a/nemo_text_processing/text_normalization/hi/taggers/cardinal.py
+++ b/nemo_text_processing/text_normalization/hi/taggers/cardinal.py
@@ -80,6 +80,7 @@ def create_larger_number_graph(digit_graph, suffix, zeros_counts, sub_graph):
         graph_ten_thousands |= create_larger_number_graph(teens_and_ties, suffix_thousands, 1, teens_ties)
         graph_ten_thousands |= create_larger_number_graph(teens_and_ties, suffix_thousands, 0, graph_hundreds)
         graph_ten_thousands.optimize()
+        self.graph_ten_thousands = graph_ten_thousands
 
         # Lakhs graph and ten lakhs graph
         suffix_lakhs = pynutil.insert(" लाख")
@@ -90,6 +91,7 @@ def create_larger_number_graph(digit_graph, suffix, zeros_counts, sub_graph):
         graph_lakhs |= create_larger_number_graph(digit, suffix_lakhs, 1, graph_thousands)
         graph_lakhs |= create_larger_number_graph(digit, suffix_lakhs, 0, graph_ten_thousands)
         graph_lakhs.optimize()
+        self.graph_lakhs = graph_lakhs
 
         graph_ten_lakhs = create_graph_suffix(teens_and_ties, suffix_lakhs, 5)
         graph_ten_lakhs |= create_larger_number_graph(teens_and_ties, suffix_lakhs, 4, digit)
@@ -98,6 +100,7 @@ def create_larger_number_graph(digit_graph, suffix, zeros_counts, sub_graph):
         graph_ten_lakhs |= create_larger_number_graph(teens_and_ties, suffix_lakhs, 1, graph_thousands)
         graph_ten_lakhs |= create_larger_number_graph(teens_and_ties, suffix_lakhs, 0, graph_ten_thousands)
         graph_ten_lakhs.optimize()
+        self.graph_ten_lakhs = graph_ten_lakhs
 
         # Crores graph ten crores graph
         suffix_crores = pynutil.insert(" करोड़")
diff --git a/nemo_text_processing/text_normalization/hi/taggers/date.py b/nemo_text_processing/text_normalization/hi/taggers/date.py
index 19aaf3139..468753e23 100644
--- a/nemo_text_processing/text_normalization/hi/taggers/date.py
+++ b/nemo_text_processing/text_normalization/hi/taggers/date.py
@@ -26,6 +26,20 @@
 
 days = pynini.string_file(get_abs_path("data/date/days.tsv"))
 months = pynini.string_file(get_abs_path("data/date/months.tsv"))
+year_suffix = pynini.string_file(get_abs_path("data/date/year_suffix.tsv"))
+digit = pynini.string_file(get_abs_path("data/numbers/digit.tsv"))
+teens_ties = pynini.string_file(get_abs_path("data/numbers/teens_and_ties.tsv"))
+teens_and_ties = pynutil.add_weight(teens_ties, -0.1)
+
+# Read suffixes from file into a list
+with open(get_abs_path("data/date/suffixes.tsv"), "r", encoding="utf-8") as f:
+    suffixes_list = f.read().splitlines()
+with open(get_abs_path("data/date/prefixes.tsv"), "r", encoding="utf-8") as f:
+    prefixes_list = f.read().splitlines()
+
+# Create union of suffixes and prefixes
+suffix_union = pynini.union(*suffixes_list)
+prefix_union = pynini.union(*prefixes_list)
 
 
 class DateFst(GraphFst):
@@ -51,10 +65,15 @@ def __init__(self, cardinal: GraphFst):
             (NEMO_HI_DIGIT + NEMO_HI_NON_ZERO + NEMO_HI_DIGIT + NEMO_HI_DIGIT), cardinal.graph_hundreds_as_thousand
         )
 
+        cardinal_graph = (
+            digit | teens_and_ties | cardinal.graph_hundreds | graph_year_thousands | graph_year_hundreds_as_thousands
+        )
+
         graph_year = graph_year_thousands | graph_year_hundreds_as_thousands
 
         delete_dash = pynutil.delete("-")
         delete_slash = pynutil.delete("/")
+        delete_comma = pynutil.delete(",")
 
         days_graph = pynutil.insert("day: \"") + days + pynutil.insert("\"") + insert_space
 
@@ -68,6 +87,22 @@ def __init__(self, cardinal: GraphFst):
 
         graph_mm_dd += pynutil.insert(" preserve_order: true ")
 
+        # Graph for era
+        era_graph = pynutil.insert("era: \"") + year_suffix + pynutil.insert("\"") + insert_space
+
+        range_graph = pynini.cross("-", "से")
+
+        # Graph for year
+        century_number = pynini.compose(pynini.closure(NEMO_HI_DIGIT, 1), cardinal_graph) + pynini.accep("वीं")
+        century_text = pynutil.insert("era: \"") + century_number + pynutil.insert("\"") + insert_space
+
+        # Updated logic to use suffix_union
+        year_number = graph_year + suffix_union
+        year_text = pynutil.insert("era: \"") + year_number + pynutil.insert("\"") + insert_space
+
+        # Updated logic to use prefix_union
+        year_prefix = pynutil.insert("era: \"") + prefix_union + insert_space + graph_year + pynutil.insert("\"")
+
         graph_dd_mm_yyyy = (
             days_graph + (delete_dash | delete_slash) + months_graph + (delete_dash | delete_slash) + years_graph
         )
@@ -78,7 +113,20 @@ def __init__(self, cardinal: GraphFst):
 
         graph_mm_dd_yyyy += pynutil.insert(" preserve_order: true ")
 
-        graph_mm_yyyy = months_graph + delete_dash + years_graph
+        graph_mm_yyyy = months_graph + delete_dash + insert_space + years_graph
+
+        graph_year_suffix = era_graph
+
+        graph_range = (
+            pynutil.insert("era: \"")
+            + cardinal_graph
+            + insert_space
+            + range_graph
+            + insert_space
+            + cardinal_graph
+            + pynutil.insert("\"")
+            + pynutil.insert(" preserve_order: true ")
+        )
 
         # default assume dd_mm_yyyy
 
@@ -87,7 +135,12 @@ def __init__(self, cardinal: GraphFst):
             | graph_mm_dd
             | pynutil.add_weight(graph_dd_mm_yyyy, -0.001)
             | graph_mm_dd_yyyy
-            | graph_mm_yyyy
+            | pynutil.add_weight(graph_mm_yyyy, -0.2)
+            | pynutil.add_weight(graph_year_suffix, -0.001)
+            | pynutil.add_weight(graph_range, -0.005)
+            | pynutil.add_weight(century_text, -0.001)
+            | pynutil.add_weight(year_text, -0.001)
+            | pynutil.add_weight(year_prefix, -0.009)
         )
 
         self.final_graph = final_graph.optimize()
diff --git a/nemo_text_processing/text_normalization/hi/taggers/measure.py b/nemo_text_processing/text_normalization/hi/taggers/measure.py
index 7434fd70f..954215771 100644
--- a/nemo_text_processing/text_normalization/hi/taggers/measure.py
+++ b/nemo_text_processing/text_normalization/hi/taggers/measure.py
@@ -19,6 +19,11 @@
 from nemo_text_processing.text_normalization.hi.utils import get_abs_path
 
 
+digit = pynini.string_file(get_abs_path("data/numbers/digit.tsv"))
+teens_ties = pynini.string_file(get_abs_path("data/numbers/teens_and_ties.tsv"))
+teens_and_ties = pynutil.add_weight(teens_ties, -0.1)
+
+
 class MeasureFst(GraphFst):
     """
     Finite state transducer for classifying measure, suppletive aware, e.g. 
@@ -35,26 +40,55 @@ class MeasureFst(GraphFst):
     def __init__(self, cardinal: GraphFst, decimal: GraphFst):
         super().__init__(name="measure", kind="classify")
 
-        cardinal_graph = cardinal.final_graph
-        decimal_graph = decimal.final_graph_wo_negative
+        cardinal_graph = (
+            digit
+            | teens_and_ties
+            | cardinal.graph_hundreds
+            | cardinal.graph_thousands
+            | cardinal.graph_ten_thousands
+            | cardinal.graph_lakhs
+            | cardinal.graph_ten_lakhs
+        )
+        point = pynutil.delete(".")
+        decimal_integers = pynutil.insert("integer_part: \"") + cardinal_graph + pynutil.insert("\"")
+        decimal_graph = decimal_integers + point + insert_space + decimal.graph_fractional
         unit_graph = pynini.string_file(get_abs_path("data/measure/unit.tsv"))
+        quarterly_units_graph = pynini.string_file(get_abs_path("data/measure/quarterly_units.tsv"))
 
         optional_graph_negative = pynini.closure(
             pynutil.insert("negative: ") + pynini.cross("-", "\"true\"") + insert_space, 0, 1,
         )
 
+        # Define the quarterly measurements
+        quarter = pynini.string_map([(".५", "साढ़े"), ("१.५", "डेढ़"), ("२.५", "ढाई"),])
+        quarter_graph = pynutil.insert("integer_part: \"") + quarter + pynutil.insert("\"")
+
         # Define the unit handling
-        self.unit = pynutil.insert("units: \"") + unit_graph + pynutil.insert("\" ")
+        unit = pynutil.insert(" units: \"") + unit_graph + pynutil.insert("\" ")
+        units = pynutil.insert(" units: \"") + quarterly_units_graph + pynutil.insert("\" ")
 
-        graph_measurements = (
+        # Handling symbols like x, X, *
+        symbol_graph = pynini.string_map([("x", "बाई"), ("X", "बाई"), ("*", "बाई"),])
+
+        graph_decimal = (
             pynutil.insert("decimal { ")
             + optional_graph_negative
             + decimal_graph
             + pynutil.insert(" }")
             + delete_space
-            + self.unit
+            + unit
         )
-        graph_measurements |= (
+
+        graph_quarter = (
+            pynutil.insert("cardinal { ")
+            + optional_graph_negative
+            + quarter_graph
+            + pynutil.insert(" }")
+            + delete_space
+            + units
+        )
+
+        graph_cardinal = (
             pynutil.insert("cardinal { ")
             + optional_graph_negative
             + pynutil.insert("integer: \"")
@@ -62,10 +96,35 @@ def __init__(self, cardinal: GraphFst, decimal: GraphFst):
             + pynutil.insert("\"")
             + pynutil.insert(" }")
             + delete_space
-            + self.unit
+            + unit
         )
 
-        graph = graph_measurements
+        # Handling cardinal clubbed with symbol as single token
+        graph_exceptions = (
+            pynutil.insert("cardinal { ")
+            + optional_graph_negative
+            + pynutil.insert("integer: \"")
+            + cardinal_graph
+            + pynutil.insert("\"")
+            + pynutil.insert(" }")
+            + pynutil.insert(" units: \"")
+            + symbol_graph
+            + pynutil.insert("\" ")
+            + pynutil.insert("} }")
+            + insert_space
+            + pynutil.insert("tokens { cardinal { ")
+            + optional_graph_negative
+            + pynutil.insert("integer: \"")
+            + cardinal_graph
+            + pynutil.insert("\"")
+        )
+
+        graph = (
+            pynutil.add_weight(graph_decimal, 0.01)
+            | pynutil.add_weight(graph_quarter, 0.005)
+            | pynutil.add_weight(graph_cardinal, 0.01)
+            | pynutil.add_weight(graph_exceptions, 0.01)
+        )
         self.graph = graph.optimize()
 
         final_graph = self.add_tokens(graph)
diff --git a/nemo_text_processing/text_normalization/hi/taggers/money.py b/nemo_text_processing/text_normalization/hi/taggers/money.py
index c44d6d346..6d9ac6dcc 100644
--- a/nemo_text_processing/text_normalization/hi/taggers/money.py
+++ b/nemo_text_processing/text_normalization/hi/taggers/money.py
@@ -24,9 +24,11 @@
 class MoneyFst(GraphFst):
     """
     Finite state transducer for classifying money, suppletive aware, e.g.
-        ₹1 -> money { currency: "रुपए" integer_part: "एक" }
-        ₹1.2 -> money { currency: "रुपए" integer_part: "एक" fractional_part: "दो" }
-        
+        ₹५० -> money { money { currency_maj: "रुपए" integer_part: "पचास" }
+        ₹५०.५० -> money { currency_maj: "रुपए" integer_part: "पचास" fractional_part: "पचास" currency_min: "centiles" }
+        ₹०.५० -> money { currency_maj: "रुपए" integer_part: "शून्य" fractional_part: "पचास" currency_min: "centiles" }
+    Note that the 'centiles' string is a placeholder to handle by the verbalizer by applying the corresponding minor currency denomination
+
     Args:
         cardinal: CardinalFst
         decimal: DecimalFst
@@ -34,7 +36,7 @@ class MoneyFst(GraphFst):
             for False multiple transduction are generated (used for audio-based normalization)
     """
 
-    def __init__(self, cardinal: GraphFst, decimal: GraphFst):
+    def __init__(self, cardinal: GraphFst):
         super().__init__(name="money", kind="classify")
 
         cardinal_graph = cardinal.final_graph
@@ -42,21 +44,25 @@ def __init__(self, cardinal: GraphFst, decimal: GraphFst):
         optional_graph_negative = pynini.closure(
             pynutil.insert("negative: ") + pynini.cross("-", "\"true\"") + insert_space, 0, 1,
         )
-        self.currency = pynutil.insert("currency: \"") + currency_graph + pynutil.insert("\" ")
-        self.interger = pynutil.insert("integer_part: \"") + cardinal_graph + pynutil.insert("\" ")
-        self.fraction = pynutil.insert("fractional_part: \"") + cardinal_graph + pynutil.insert("\" ")
+        currency_major = pynutil.insert('currency_maj: "') + currency_graph + pynutil.insert('"')
+        integer = pynutil.insert('integer_part: "') + cardinal_graph + pynutil.insert('"')
+        fraction = pynutil.insert('fractional_part: "') + cardinal_graph + pynutil.insert('"')
+        currency_minor = pynutil.insert('currency_min: "') + pynutil.insert("centiles") + pynutil.insert('"')
 
-        graph_currencies = optional_graph_negative + self.currency + insert_space + self.interger
-        graph_currencies |= (
+        graph_major_only = optional_graph_negative + currency_major + insert_space + integer
+        graph_major_and_minor = (
             optional_graph_negative
-            + self.currency
+            + currency_major
             + insert_space
-            + self.interger
-            + pynutil.delete(".")
+            + integer
+            + pynini.cross(".", " ")
+            + fraction
             + insert_space
-            + self.fraction
+            + currency_minor
         )
-        graph = graph_currencies
-        self.graph = graph.optimize()
+
+        graph_currencies = graph_major_only | graph_major_and_minor
+
+        graph = graph_currencies.optimize()
         final_graph = self.add_tokens(graph)
         self.fst = final_graph
diff --git a/nemo_text_processing/text_normalization/hi/taggers/tokenize_and_classify.py b/nemo_text_processing/text_normalization/hi/taggers/tokenize_and_classify.py
index 48ee97ef3..bdec90c06 100644
--- a/nemo_text_processing/text_normalization/hi/taggers/tokenize_and_classify.py
+++ b/nemo_text_processing/text_normalization/hi/taggers/tokenize_and_classify.py
@@ -43,7 +43,7 @@ class ClassifyFst(GraphFst):
     Final class that composes all other classification grammars. This class can process an entire sentence including punctuation.
     For deployment, this grammar will be compiled and exported to OpenFst Finite State Archive (FAR) File.
     More details to deployment at NeMo/tools/text_processing_deployment.
-    
+
     Args:
         input_case: accepting either "lower_cased" or "cased" input.
         deterministic: if True will provide a single transduction option,
@@ -68,11 +68,11 @@ def __init__(
             os.makedirs(cache_dir, exist_ok=True)
             whitelist_file = os.path.basename(whitelist) if whitelist else ""
             far_file = os.path.join(
-                cache_dir, f"hi_tn_{deterministic}_deterministic_{input_case}_{whitelist_file}_tokenize.far"
+                cache_dir, f"hi_tn_{deterministic}_deterministic_{input_case}_{whitelist_file}_tokenize.far",
             )
         if not overwrite_cache and far_file and os.path.exists(far_file):
             self.fst = pynini.Far(far_file, mode="r")["tokenize_and_classify"]
-            logging.info(f'ClassifyFst.fst was restored from {far_file}.')
+            logging.info(f"ClassifyFst.fst was restored from {far_file}.")
         else:
             logging.info(f"Creating ClassifyFst grammars.")
 
@@ -107,7 +107,7 @@ def __init__(
             logging.debug(f"measure: {time.time() - start_time: .2f}s -- {measure_graph.num_states()} nodes")
 
             start_time = time.time()
-            money = MoneyFst(cardinal=cardinal, decimal=decimal)
+            money = MoneyFst(cardinal=cardinal)
             money_graph = money.fst
             logging.debug(f"money: {time.time() - start_time: .2f}s -- {money_graph.num_states()} nodes")
 
diff --git a/nemo_text_processing/text_normalization/hi/verbalizers/date.py b/nemo_text_processing/text_normalization/hi/verbalizers/date.py
index 1265fcec6..187acf7d6 100644
--- a/nemo_text_processing/text_normalization/hi/verbalizers/date.py
+++ b/nemo_text_processing/text_normalization/hi/verbalizers/date.py
@@ -39,6 +39,8 @@ def __init__(self):
 
         year = pynutil.delete("year: \"") + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete("\"")
 
+        graph_era = pynutil.delete("era: \"") + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete("\"")
+
         graph_dd_mm = day + NEMO_SPACE + month
 
         graph_mm_dd = month + NEMO_SPACE + day
@@ -60,7 +62,7 @@ def __init__(self):
         )
 
         self.graph = (
-            (graph_dd_mm | graph_mm_dd | graph_dd_mm_yyyy | graph_mm_dd_yyyy | graph_mm_yyyy)
+            (graph_dd_mm | graph_mm_dd | graph_dd_mm_yyyy | graph_mm_dd_yyyy | graph_mm_yyyy | graph_era)
             + delete_space
             + optional_preserve_order
         )
diff --git a/nemo_text_processing/text_normalization/hi/verbalizers/fraction.py b/nemo_text_processing/text_normalization/hi/verbalizers/fraction.py
index e4cfae302..cba534e61 100644
--- a/nemo_text_processing/text_normalization/hi/verbalizers/fraction.py
+++ b/nemo_text_processing/text_normalization/hi/verbalizers/fraction.py
@@ -39,10 +39,15 @@ def __init__(self, cardinal: GraphFst, deterministic: bool = True):
         numerator = pynutil.delete("numerator: \"") + pynini.closure(NEMO_NOT_QUOTE) + pynutil.delete("\" ")
         denominator = pynutil.delete("denominator: \"") + pynini.closure(NEMO_NOT_QUOTE) + pynutil.delete("\"")
         insert_bata = pynutil.insert(" बटा ")
+        insert_aur = pynutil.insert(" और ")
 
         fraction_default = numerator + insert_bata + denominator
 
-        self.graph = optional_sign + pynini.closure(pynini.closure(integer, 0, 1) + insert_space) + fraction_default
+        self.graph = (
+            optional_sign
+            + pynini.closure(pynini.closure(integer, 0, 1) + insert_space + insert_aur)
+            + fraction_default
+        )
 
         graph = self.graph
 
diff --git a/nemo_text_processing/text_normalization/hi/verbalizers/money.py b/nemo_text_processing/text_normalization/hi/verbalizers/money.py
index d5cab33d8..048140295 100644
--- a/nemo_text_processing/text_normalization/hi/verbalizers/money.py
+++ b/nemo_text_processing/text_normalization/hi/verbalizers/money.py
@@ -15,14 +15,26 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.text_normalization.hi.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space, insert_space
+major_minor_currencies = {
+    "रुपए": "पैसे",
+    "पाउंड": "पेंस",
+    "वॉन": "जिओन",
+    "डॉलर": "सेंट",
+    "लीरा": "कुरस",
+    "टका": "पैसे",
+    "येन": "सेन",
+    "नाइरा": "कोबो",
+    "यूरो": "सेंट",
+}
+from nemo_text_processing.text_normalization.hi.graph_utils import NEMO_NOT_QUOTE, NEMO_SPACE, GraphFst
 
 
 class MoneyFst(GraphFst):
     """
     Finite state transducer for verbalizing money, e.g.
-        money { integer_part: "बारह" currency: "रुपए" } -> बारह रुपए
-        money { integer_part: "बारह" currency: "रुपए" fractional_part: "पचास" currency: "पैसे" } -> बारह रुपए पचास पैसे
+        money { integer_part: "बारह" currency_maj: "रुपए" } -> बारह रुपए
+        money { integer_part: "बारह" currency_maj: "रुपए" fractional_part: "पचास" currency_min: "centiles" } -> बारह रुपए पचास पैसे
+        money { currency_maj: "रुपए" integer_part: "शून्य" fractional_part: "पचास" currency_min: "centiles" } -> पचास पैसे
 
     Args:
         cardinal: CardinalFst
@@ -31,33 +43,58 @@ class MoneyFst(GraphFst):
             for False multiple transduction are generated (used for audio-based normalization)
     """
 
-    def __init__(self, cardinal: GraphFst, decimal: GraphFst):
+    def __init__(self):
         super().__init__(name="money", kind="verbalize")
 
-        insert_paise = pynutil.insert("पैसे")
+        currency_major = pynutil.delete('currency_maj: "') + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete('"')
 
-        currency = (
-            pynutil.delete('currency: "') + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete('" ') + insert_space
-        )
-
-        integer_part = (
-            pynutil.delete('integer_part: "') + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete('" ') + insert_space
-        )
+        integer_part = pynutil.delete('integer_part: "') + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete('"')
 
         fractional_part = (
-            pynutil.delete('fractional_part: "')
-            + pynini.closure(NEMO_NOT_QUOTE, 1)
-            + pynutil.delete('" ')
-            + insert_space
+            pynutil.delete('fractional_part: "') + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete('"')
         )
 
-        graph_integer = integer_part + delete_space + currency
+        # Handles major denominations only
+        graph_major_only = integer_part + pynini.accep(NEMO_SPACE) + currency_major
 
-        graph_interger_fraction = (
-            integer_part + delete_space + currency + delete_space + fractional_part + delete_space + insert_paise
-        )
+        # Handles both major and minor denominations
+        major_minor_graphs = []
+
+        # Handles minor denominations only
+        minor_graphs = []
+
+        # Logic for handling minor denominations
+        for major, minor in major_minor_currencies.items():
+            graph_major = pynutil.delete('currency_maj: "') + pynini.accep(major) + pynutil.delete('"')
+            graph_minor = pynutil.delete('currency_min: "') + pynini.cross("centiles", minor) + pynutil.delete('"')
+            graph_major_minor_partial = (
+                integer_part
+                + pynini.accep(NEMO_SPACE)
+                + graph_major
+                + pynini.accep(NEMO_SPACE)
+                + fractional_part
+                + pynini.accep(NEMO_SPACE)
+                + graph_minor
+            )
+            major_minor_graphs.append(graph_major_minor_partial)
+
+            graph_minor_partial = (
+                pynutil.delete('integer_part: "शून्य"')
+                + pynutil.delete(NEMO_SPACE)
+                + pynutil.delete('currency_maj: "')
+                + pynutil.delete(major)
+                + pynutil.delete('"')
+                + pynutil.delete(NEMO_SPACE)
+                + fractional_part
+                + pynini.accep(NEMO_SPACE)
+                + graph_minor
+            )
+            minor_graphs.append(graph_minor_partial)
+
+        graph_major_minor = pynini.union(*major_minor_graphs)
+        graph_minor_only = pynini.union(*minor_graphs)
 
-        graph = graph_integer | graph_interger_fraction
+        graph = graph_major_only | graph_major_minor | pynutil.add_weight(graph_minor_only, -0.1)
 
         delete_tokens = self.delete_tokens(graph)
         self.fst = delete_tokens.optimize()
diff --git a/nemo_text_processing/text_normalization/hi/verbalizers/verbalize.py b/nemo_text_processing/text_normalization/hi/verbalizers/verbalize.py
index ca06fc9c3..e91f0d9f6 100644
--- a/nemo_text_processing/text_normalization/hi/verbalizers/verbalize.py
+++ b/nemo_text_processing/text_normalization/hi/verbalizers/verbalize.py
@@ -20,8 +20,7 @@
 from nemo_text_processing.text_normalization.hi.verbalizers.measure import MeasureFst
 from nemo_text_processing.text_normalization.hi.verbalizers.money import MoneyFst
 from nemo_text_processing.text_normalization.hi.verbalizers.time import TimeFst
-
-# from nemo_text_processing.text_normalization.hi.verbalizers.whitelist import WhiteListFst
+from nemo_text_processing.text_normalization.hi.verbalizers.whitelist import WhiteListFst
 
 
 class VerbalizeFst(GraphFst):
@@ -56,11 +55,20 @@ def __init__(self, deterministic: bool = True):
         measure = MeasureFst(cardinal=cardinal, decimal=decimal)
         measure_graph = measure.fst
 
-        money = MoneyFst(cardinal=cardinal, decimal=decimal)
+        money = MoneyFst()
         money_graph = money.fst
 
-        # whitelist_graph = WhiteListFst(deterministic=deterministic).fst
-
-        graph = cardinal_graph | decimal_graph | fraction_graph | date_graph | time_graph | measure_graph | money_graph
+        whitelist_graph = WhiteListFst(deterministic=deterministic).fst
+
+        graph = (
+            cardinal_graph
+            | decimal_graph
+            | fraction_graph
+            | date_graph
+            | time_graph
+            | measure_graph
+            | money_graph
+            | whitelist_graph
+        )
 
         self.fst = graph
diff --git a/nemo_text_processing/text_normalization/hi/verbalizers/whitelist.py b/nemo_text_processing/text_normalization/hi/verbalizers/whitelist.py
index 3f478a2d2..ed419f2f7 100644
--- a/nemo_text_processing/text_normalization/hi/verbalizers/whitelist.py
+++ b/nemo_text_processing/text_normalization/hi/verbalizers/whitelist.py
@@ -11,6 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+
 import pynini
 from pynini.lib import pynutil
 
diff --git a/tests/nemo_text_processing/hi/data_text_normalization/test_cases_date.txt b/tests/nemo_text_processing/hi/data_text_normalization/test_cases_date.txt
index d92a53852..a4b3caf07 100644
--- a/tests/nemo_text_processing/hi/data_text_normalization/test_cases_date.txt
+++ b/tests/nemo_text_processing/hi/data_text_normalization/test_cases_date.txt
@@ -17,3 +17,18 @@
 ११-२०२४~नवंबर दो हज़ार चौबीस
 २०७०~दो हज़ार सत्तर
 २०२४~दो हज़ार चौबीस
+१२० ई. पू.~एक सौ बीस ईसा पूर्व
+२९७-२७२ ई. पू.~दो सौ सत्तानबे से दो सौ बहत्तर ईसा पूर्व
+३२७वीं सदी~तीन सौ सत्ताईसवीं सदी
+१८वीं शताब्दी~अठारहवीं शताब्दी
+१९वीं दशक~उन्नीसवीं दशक
+१९९९ में~उन्नीस सौ निन्यानबे में
+१९९० का~उन्नीस सौ नब्बे का
+१९९२ की~उन्नीस सौ बानबे की
+१९६० के अभिनेता है~उन्नीस सौ साठ के अभिनेता है
+१७८८ से~सत्रह सौ अट्ठासी से
+१९५४ तक~उन्नीस सौ चौवन तक
+सन १९९९~सन उन्नीस सौ निन्यानबे
+सन् १९२०~सन् उन्नीस सौ बीस
+साल १९७१~साल उन्नीस सौ इकहत्तर
+१९२०-२६ तक~उन्नीस सौ बीस से छब्बीस तक
\ No newline at end of file
diff --git a/tests/nemo_text_processing/hi/data_text_normalization/test_cases_fraction.txt b/tests/nemo_text_processing/hi/data_text_normalization/test_cases_fraction.txt
index 25c18b777..d1473412e 100644
--- a/tests/nemo_text_processing/hi/data_text_normalization/test_cases_fraction.txt
+++ b/tests/nemo_text_processing/hi/data_text_normalization/test_cases_fraction.txt
@@ -1,5 +1,5 @@
 ९९/९९~निन्यानबे बटा निन्यानबे
-२२ ३१/१७~बाईस इकतीस बटा सत्रह
+२२ ३१/१७~बाईस और इकतीस बटा सत्रह
 ९७/०~सत्तानबे बटा शून्य
 २५६३/४१२~दो हज़ार पाँच सौ तिरेसठ बटा चार सौ बारह
 ७२८६०/७०~बहत्तर हज़ार आठ सौ साठ बटा सत्तर
@@ -19,3 +19,5 @@
 १०००००००००००००/३~एक नील बटा तीन
 १०००००००००००००००/८~एक पद्म बटा आठ
 १०००००००००००००००००/४१२~एक शंख बटा चार सौ बारह
+२ २/७~दो और दो बटा सात
+१२० ७५/९०~एक सौ बीस और पचहत्तर बटा नब्बे
\ No newline at end of file
diff --git a/tests/nemo_text_processing/hi/data_text_normalization/test_cases_measure.txt b/tests/nemo_text_processing/hi/data_text_normalization/test_cases_measure.txt
index 453369f82..86a824f72 100644
--- a/tests/nemo_text_processing/hi/data_text_normalization/test_cases_measure.txt
+++ b/tests/nemo_text_processing/hi/data_text_normalization/test_cases_measure.txt
@@ -60,3 +60,7 @@
 ९९.५ oz~निन्यानबे दशमलव पाँच आउन्स
 ८५ q~पचासी क्विंटल
 ८५.९९ q~पचासी दशमलव नौ नौ क्विंटल
+२००x१० के गद्दे~दो सौ बाई दस के गद्दे
+५x५ का सोफ़ा~पाँच बाई पाँच का सोफ़ा
+२x२ रुबिक्स क्यूब~दो बाई दो रुबिक्स क्यूब
+१३x१३ का घर~तेरह बाई तेरह का घर
diff --git a/tests/nemo_text_processing/hi/data_text_normalization/test_cases_money.txt b/tests/nemo_text_processing/hi/data_text_normalization/test_cases_money.txt
index c7b32628b..b576dac38 100644
--- a/tests/nemo_text_processing/hi/data_text_normalization/test_cases_money.txt
+++ b/tests/nemo_text_processing/hi/data_text_normalization/test_cases_money.txt
@@ -97,4 +97,22 @@ $२८२१~दो हज़ार आठ सौ इक्कीस डॉल
 ₹५४५~पाँच सौ पैंतालीस रुपए
 ₹१८४५~एक हज़ार आठ सौ पैंतालीस रुपए
 ₹३७२~तीन सौ बहत्तर रुपए
-$९८~अट्ठानबे डॉलर
\ No newline at end of file
+$९८~अट्ठानबे डॉलर
+₹१२३.५७~एक सौ तेईस रुपए सत्तावन पैसे
+₹९९९.५०~नौ सौ निन्यानबे रुपए पचास पैसे
+£१५०.२९~एक सौ पचास पाउंड उनतीस पेंस
+£८०.३१~अस्सी पाउंड इकतीस पेंस
+₩२३४५.१०~दो हज़ार तीन सौ पैंतालीस वॉन दस जिओन
+₩१००.२५~एक सौ वॉन पच्चीस जिओन
+$१२५.७०~एक सौ पच्चीस डॉलर सत्तर सेंट
+$९.९९~नौ डॉलर निन्यानबे सेंट
+₺८०.३६~अस्सी लीरा छत्तीस कुरस
+₺१२३४.७८~एक हज़ार दो सौ चौंतीस लीरा अठहत्तर कुरस
+৳१००.४२~एक सौ टका बयालीस पैसे
+৳३०२५.८७~तीन हज़ार पच्चीस टका सत्तासी पैसे
+¥१००.४८~एक सौ येन अड़तालीस सेन
+¥७७७.२३~सात सौ सतहत्तर येन तेईस सेन
+₦८७६.५३~आठ सौ छिहत्तर नाइरा तिरेपन कोबो
+₦१०.२७~दस नाइरा सत्ताईस कोबो
+€२००.९०~दो सौ यूरो नब्बे सेंट
+€१२३४.७५~एक हज़ार दो सौ चौंतीस यूरो पचहत्तर सेंट

From c4987310baf6e0568c1dd2c45e87ef55e6fc0226 Mon Sep 17 00:00:00 2001
From: Mariana Graterol Fuenmayor <marianag@nvidia.com>
Date: Tue, 22 Apr 2025 09:36:22 -0700
Subject: [PATCH 2/4] update jenkins cache

Signed-off-by: Mariana Graterol Fuenmayor <marianag@nvidia.com>
---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 53c784920..51ce37a10 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -27,7 +27,7 @@ pipeline {
     HY_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/03-12-24-0'
     MR_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/03-12-24-1'
     JA_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/10-17-24-1'
-    HI_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/02-12-25-0'
+    HI_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/04-22-25-0'
     DEFAULT_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
   }
   stages {

From 2e6d4e89b0483e8d321b61333f18b42dd70984cd Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 22 Apr 2025 16:39:03 +0000
Subject: [PATCH 3/4] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../text_normalization/hi/taggers/measure.py     | 16 ++++++++++++++--
 .../hi/taggers/tokenize_and_classify.py          |  3 ++-
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/nemo_text_processing/text_normalization/hi/taggers/measure.py b/nemo_text_processing/text_normalization/hi/taggers/measure.py
index ea6430365..9f1ffbd39 100644
--- a/nemo_text_processing/text_normalization/hi/taggers/measure.py
+++ b/nemo_text_processing/text_normalization/hi/taggers/measure.py
@@ -62,7 +62,13 @@ def __init__(self, cardinal: GraphFst, decimal: GraphFst):
         )
 
         # Define the quarterly measurements
-        quarter = pynini.string_map([(".५", "साढ़े"), ("१.५", "डेढ़"), ("२.५", "ढाई"),])
+        quarter = pynini.string_map(
+            [
+                (".५", "साढ़े"),
+                ("१.५", "डेढ़"),
+                ("२.५", "ढाई"),
+            ]
+        )
         quarter_graph = pynutil.insert("integer_part: \"") + quarter + pynutil.insert("\"")
 
         # Define the unit handling
@@ -70,7 +76,13 @@ def __init__(self, cardinal: GraphFst, decimal: GraphFst):
         units = pynutil.insert(" units: \"") + quarterly_units_graph + pynutil.insert("\" ")
 
         # Handling symbols like x, X, *
-        symbol_graph = pynini.string_map([("x", "बाई"), ("X", "बाई"), ("*", "बाई"),])
+        symbol_graph = pynini.string_map(
+            [
+                ("x", "बाई"),
+                ("X", "बाई"),
+                ("*", "बाई"),
+            ]
+        )
 
         graph_decimal = (
             pynutil.insert("decimal { ")
diff --git a/nemo_text_processing/text_normalization/hi/taggers/tokenize_and_classify.py b/nemo_text_processing/text_normalization/hi/taggers/tokenize_and_classify.py
index bdec90c06..b1bbd2a10 100644
--- a/nemo_text_processing/text_normalization/hi/taggers/tokenize_and_classify.py
+++ b/nemo_text_processing/text_normalization/hi/taggers/tokenize_and_classify.py
@@ -68,7 +68,8 @@ def __init__(
             os.makedirs(cache_dir, exist_ok=True)
             whitelist_file = os.path.basename(whitelist) if whitelist else ""
             far_file = os.path.join(
-                cache_dir, f"hi_tn_{deterministic}_deterministic_{input_case}_{whitelist_file}_tokenize.far",
+                cache_dir,
+                f"hi_tn_{deterministic}_deterministic_{input_case}_{whitelist_file}_tokenize.far",
             )
         if not overwrite_cache and far_file and os.path.exists(far_file):
             self.fst = pynini.Far(far_file, mode="r")["tokenize_and_classify"]

From 95883964b892b5d7f5be907c66620514fcc06c31 Mon Sep 17 00:00:00 2001
From: Mariana <47233618+mgrafu@users.noreply.github.com>
Date: Wed, 23 Apr 2025 13:42:28 -0400
Subject: [PATCH 4/4] Potential fix for code scanning alert no. 821: Unused
 local variable

Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com>
Signed-off-by: Mariana <47233618+mgrafu@users.noreply.github.com>
---
 nemo_text_processing/text_normalization/hi/taggers/date.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/nemo_text_processing/text_normalization/hi/taggers/date.py b/nemo_text_processing/text_normalization/hi/taggers/date.py
index b8b652128..37b192165 100644
--- a/nemo_text_processing/text_normalization/hi/taggers/date.py
+++ b/nemo_text_processing/text_normalization/hi/taggers/date.py
@@ -73,7 +73,6 @@ def __init__(self, cardinal: GraphFst):
 
         delete_dash = pynutil.delete("-")
         delete_slash = pynutil.delete("/")
-        delete_comma = pynutil.delete(",")
 
         days_graph = pynutil.insert("day: \"") + days + pynutil.insert("\"") + insert_space