[pre-commit.ci] auto fixes from pre-commit.com hooks

pre-commit-ci[bot] · pre-commit-ci[bot] · commit d8e057d18277 · 2025-05-13T18:00:26.000Z
for more information, see https://pre-commit.ci
diff --git a/nemo_text_processing/inverse_text_normalization/inverse_normalize.py b/nemo_text_processing/inverse_text_normalization/inverse_normalize.py
@@ -135,7 +135,7 @@ def __init__(
             from nemo_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import ClassifyFst
             from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize_final import (
                 VerbalizeFinalFst,
-            )    
+            )
 
         self.tagger = ClassifyFst(
             cache_dir=cache_dir, whitelist=whitelist, overwrite_cache=overwrite_cache, input_case=input_case
@@ -180,7 +180,7 @@ def parse_args():
     parser.add_argument(
         "--language",
         help="language",
-        choices=['en', 'de', 'es', 'pt', 'ru', 'fr', 'sv', 'vi', 'ar', 'es_en', 'zh', 'hi', 'hy', 'mr', 'ja','ko'],
+        choices=['en', 'de', 'es', 'pt', 'ru', 'fr', 'sv', 'vi', 'ar', 'es_en', 'zh', 'hi', 'hy', 'mr', 'ja', 'ko'],
         default="en",
         type=str,
     )
diff --git a/nemo_text_processing/inverse_text_normalization/ko/clean_eval_data.py b/nemo_text_processing/inverse_text_normalization/ko/clean_eval_data.py
@@ -282,41 +282,24 @@ def process_address_1(instance: Instance) -> Instance:
 
 
 filters = []
-filters.append(Filter(class_type="CARDINAL",
-               process_func=process_cardinal_1, filter_func=filter_cardinal_1))
-filters.append(Filter(class_type="ORDINAL",
-               process_func=process_ordinal_1, filter_func=filter_ordinal_1))
-filters.append(Filter(class_type="DECIMAL",
-               process_func=process_decimal_1, filter_func=filter_decimal_1))
-filters.append(Filter(class_type="MEASURE",
-               process_func=process_measure_1, filter_func=filter_measure_1))
-filters.append(Filter(class_type="MONEY",
-               process_func=process_money_1, filter_func=filter_money_1))
-filters.append(Filter(class_type="TIME",
-               process_func=process_time_1, filter_func=filter_time_1))
-
-filters.append(Filter(class_type="DATE",
-               process_func=process_date_1, filter_func=filter_date_1))
-filters.append(Filter(class_type="PLAIN",
-               process_func=process_plain_1, filter_func=filter_plain_1))
-filters.append(Filter(class_type="PUNCT",
-               process_func=process_punct_1, filter_func=filter_punct_1))
-filters.append(Filter(class_type="LETTERS",
-               process_func=process_letters_1, filter_func=filter_letters_1))
-filters.append(Filter(class_type="VERBATIM",
-               process_func=process_verbatim_1, filter_func=filter_verbatim_1))
-filters.append(Filter(class_type="DIGIT",
-               process_func=process_digit_1, filter_func=filter_digit_1))
-filters.append(Filter(class_type="TELEPHONE",
-               process_func=process_telephone_1, filter_func=filter_telephone_1))
-filters.append(Filter(class_type="ELECTRONIC",
-               process_func=process_electronic_1, filter_func=filter_electronic_1))
-filters.append(Filter(class_type="FRACTION",
-               process_func=process_fraction_1, filter_func=filter_fraction_1))
-filters.append(Filter(class_type="ADDRESS",
-               process_func=process_address_1, filter_func=filter_address_1))
-filters.append(Filter(class_type=EOS_TYPE,
-               process_func=lambda x: x, filter_func=lambda x: True))
+filters.append(Filter(class_type="CARDINAL", process_func=process_cardinal_1, filter_func=filter_cardinal_1))
+filters.append(Filter(class_type="ORDINAL", process_func=process_ordinal_1, filter_func=filter_ordinal_1))
+filters.append(Filter(class_type="DECIMAL", process_func=process_decimal_1, filter_func=filter_decimal_1))
+filters.append(Filter(class_type="MEASURE", process_func=process_measure_1, filter_func=filter_measure_1))
+filters.append(Filter(class_type="MONEY", process_func=process_money_1, filter_func=filter_money_1))
+filters.append(Filter(class_type="TIME", process_func=process_time_1, filter_func=filter_time_1))
+
+filters.append(Filter(class_type="DATE", process_func=process_date_1, filter_func=filter_date_1))
+filters.append(Filter(class_type="PLAIN", process_func=process_plain_1, filter_func=filter_plain_1))
+filters.append(Filter(class_type="PUNCT", process_func=process_punct_1, filter_func=filter_punct_1))
+filters.append(Filter(class_type="LETTERS", process_func=process_letters_1, filter_func=filter_letters_1))
+filters.append(Filter(class_type="VERBATIM", process_func=process_verbatim_1, filter_func=filter_verbatim_1))
+filters.append(Filter(class_type="DIGIT", process_func=process_digit_1, filter_func=filter_digit_1))
+filters.append(Filter(class_type="TELEPHONE", process_func=process_telephone_1, filter_func=filter_telephone_1))
+filters.append(Filter(class_type="ELECTRONIC", process_func=process_electronic_1, filter_func=filter_electronic_1))
+filters.append(Filter(class_type="FRACTION", process_func=process_fraction_1, filter_func=filter_fraction_1))
+filters.append(Filter(class_type="ADDRESS", process_func=process_address_1, filter_func=filter_address_1))
+filters.append(Filter(class_type=EOS_TYPE, process_func=lambda x: x, filter_func=lambda x: True))
 
 
 def filter_loaded_data(data: List[Instance], verbose: bool = False) -> List[Instance]:
@@ -344,10 +327,8 @@ def filter_loaded_data(data: List[Instance], verbose: bool = False) -> List[Inst
 
 def parse_args():
     parser = ArgumentParser()
-    parser.add_argument("--input", help="input file path",
-                        type=str, default='./en_with_types/output-00001-of-00100')
-    parser.add_argument(
-        "--verbose", help="print filtered instances", action='store_true')
+    parser.add_argument("--input", help="input file path", type=str, default='./en_with_types/output-00001-of-00100')
+    parser.add_argument("--verbose", help="print filtered instances", action='store_true')
     return parser.parse_args()
 
 
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
@@ -19,6 +19,7 @@
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_DIGIT, GraphFst, delete_space
 from nemo_text_processing.inverse_text_normalization.ko.utils import get_abs_path
 
+
 class CardinalFst(GraphFst):
     """
     Finite state transducer for classifying cardinals
@@ -37,14 +38,14 @@ def __init__(self):
 
         graph_negative = pynini.cross("마이너스", "-")
         graph_negative += delete_space
-        
+
         ten = pynutil.delete("십")
         ten_alt = pynini.cross("십", "1")
         ### Responsible for second digit of two digit number. ex) 20's 2
         graph_ten_component = pynini.union((graph_digit + ten) | ten_alt, pynutil.insert("0"))
         ### Responsible for the first digit of number. ex) 1,2,3,4,5,,,
         graph_ten_component += graph_digit | pynutil.insert("0")
-        
+
         hundred = pynutil.delete("백")
         hundred_alt = pynini.cross("백", "1")
         graph_hundred_component = pynini.union(((graph_digit + hundred) | hundred_alt), pynutil.insert("0"))
@@ -59,46 +60,55 @@ def __init__(self):
         tenthousand_alt = pynini.cross("만", "1")
         ### "만" can express next four digits of numbers until the next unit "억", so insert "0000" to allocate four digit worth of space
         ### From "만", keep adding four digits and graph_thousand_component(0000-9999), because Korean units increase every four digits
-        graph_tenthousand_component = pynini.union(((graph_thousand_component + tenthousand) | tenthousand_alt), pynutil.insert("0000"))
+        graph_tenthousand_component = pynini.union(
+            ((graph_thousand_component + tenthousand) | tenthousand_alt), pynutil.insert("0000")
+        )
         graph_tenthousand_component += graph_thousand_component
 
         hundredmillion = pynutil.delete("억")
         hundredmillion_alt = pynini.cross("억", "1")
-        graph_hundredmillion_component = pynini.union(((graph_thousand_component + hundredmillion) | hundredmillion_alt), pynutil.insert("0000"))
-        graph_hundredmillion_component +=  graph_tenthousand_component
-        
+        graph_hundredmillion_component = pynini.union(
+            ((graph_thousand_component + hundredmillion) | hundredmillion_alt), pynutil.insert("0000")
+        )
+        graph_hundredmillion_component += graph_tenthousand_component
+
         trillion = pynutil.delete("조")
         trillion_alt = pynini.cross("조", "1")
-        graph_trillion_component = pynini.union(((graph_thousand_component + trillion) | trillion_alt), pynutil.insert("0000"))
+        graph_trillion_component = pynini.union(
+            ((graph_thousand_component + trillion) | trillion_alt), pynutil.insert("0000")
+        )
         graph_trillion_component += graph_hundredmillion_component
 
         tenquadrillion = pynutil.delete("경")
         tenquadrillion_alt = pynini.cross("경", "1")
-        graph_tenquadrillion_component = pynini.union(((graph_thousand_component + tenquadrillion) | tenquadrillion_alt), pynutil.insert("0000"))
+        graph_tenquadrillion_component = pynini.union(
+            ((graph_thousand_component + tenquadrillion) | tenquadrillion_alt), pynutil.insert("0000")
+        )
         graph_tenquadrillion_component += graph_trillion_component
 
-        
         graph = pynini.union(
             ### From biggest unit to smallest, everything is included
-            graph_tenquadrillion_component|
-            graph_zero
+            graph_tenquadrillion_component
+            | graph_zero
         )
 
         leading_zero = (
             pynutil.delete(pynini.closure("0")) + pynini.difference(NEMO_DIGIT, "0") + pynini.closure(NEMO_DIGIT)
         )
         graph_nonzero = graph @ leading_zero
         graph = pynini.union(graph_nonzero, graph_zero)
-        
+
         graph = graph @ leading_zero | graph_zero
 
         self.just_cardinals = graph
 
-        optional_sign = pynini.closure((pynini.cross("마이너스", 'negative: "-"') | pynini.cross("-", 'negative: "-"')) + delete_space,0, 1)
+        optional_sign = pynini.closure(
+            (pynini.cross("마이너스", 'negative: "-"') | pynini.cross("-", 'negative: "-"')) + delete_space, 0, 1
+        )
 
         final_graph = (
             optional_sign + pynutil.insert(" ") + pynutil.insert("integer: \"") + graph + pynutil.insert("\"")
         ) | (pynutil.insert("integer: \"") + graph + pynutil.insert("\""))
 
         final_graph = self.add_tokens(final_graph)
-        self.fst = final_graph.optimize()
+        self.fst = final_graph.optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
@@ -19,15 +19,15 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.ko.taggers.cardinal import CardinalFst
-from nemo_text_processing.inverse_text_normalization.ko.taggers.word import WordFst
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import ( 
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import (
     INPUT_LOWER_CASED,
     GraphFst,
     delete_extra_space,
     delete_space,
     generator_main,
 )
+from nemo_text_processing.inverse_text_normalization.ko.taggers.cardinal import CardinalFst
+from nemo_text_processing.inverse_text_normalization.ko.taggers.word import WordFst
 
 
 class ClassifyFst(GraphFst):
@@ -64,13 +64,13 @@ def __init__(
             cardinal = CardinalFst()
             cardinal_graph = cardinal.fst
             word_graph = WordFst().fst
-            classify = (pynutil.add_weight(cardinal_graph, 1.1)| pynutil.add_weight(word_graph, 100))
-           
+            classify = pynutil.add_weight(cardinal_graph, 1.1) | pynutil.add_weight(word_graph, 100)
+
             token = pynutil.insert("tokens { ") + classify + pynutil.insert(" } ")
             tagger = pynini.closure(token, 1)
 
             self.fst = tagger
 
             if far_file:
                 generator_main(far_file, {"tokenize_and_classify": self.fst})
-                logging.info(f"ClassifyFst grammars are saved to {far_file}.")
+                logging.info(f"ClassifyFst grammars are saved to {far_file}.")
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/word.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/word.py
@@ -27,6 +27,5 @@ class WordFst(GraphFst):
 
     def __init__(self):
         super().__init__(name="word", kind="classify")
-        word = pynutil.insert(
-            "name: \"") + pynini.closure(NEMO_NOT_SPACE, 1) + pynutil.insert("\"")
+        word = pynutil.insert("name: \"") + pynini.closure(NEMO_NOT_SPACE, 1) + pynutil.insert("\"")
         self.fst = word.optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/utils.py b/nemo_text_processing/inverse_text_normalization/ko/utils.py
@@ -15,9 +15,6 @@
 import os
 
 
-
 def get_abs_path(rel_path):
 
     return os.path.dirname(os.path.abspath(__file__)) + '/' + rel_path
-
-
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/__init__.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/__init__.py
@@ -14,4 +14,4 @@
 
 from nemo_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import ClassifyFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize import VerbalizeFst
-from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize_final import VerbalizeFinalFst
+from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize_final import VerbalizeFinalFst
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/cardinal.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/cardinal.py
@@ -15,11 +15,7 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import (
-    NEMO_NOT_QUOTE,
-    GraphFst,
-    delete_space,
-)
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space
 
 
 class CardinalFst(GraphFst):
@@ -34,21 +30,17 @@ def __init__(self):
             pynutil.delete("negative:")
             + delete_space
             + pynutil.delete("\"")
-            + pynini.accep("-") 
+            + pynini.accep("-")
             + pynutil.delete("\"")
         )
 
         optional_sign_output = pynini.closure(negative_sign + delete_space, 0, 1)
 
-        digits_from_tag = pynini.closure(NEMO_NOT_QUOTE, 1) 
+        digits_from_tag = pynini.closure(NEMO_NOT_QUOTE, 1)
         integer_cardinal = (
-            pynutil.delete("integer:")
-            + delete_space
-            + pynutil.delete("\"")
-            + digits_from_tag
-            + pynutil.delete("\"")
+            pynutil.delete("integer:") + delete_space + pynutil.delete("\"") + digits_from_tag + pynutil.delete("\"")
         )
 
         graph = integer_cardinal
         final_graph = optional_sign_output + graph
-        self.fst = self.delete_tokens(final_graph).optimize()
+        self.fst = self.delete_tokens(final_graph).optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
@@ -13,9 +13,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.cardinal import CardinalFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst
 
 
 class VerbalizeFst(GraphFst):
@@ -30,7 +30,6 @@ def __init__(self):
         cardinal = CardinalFst()
         cardinal_graph = cardinal.fst
         word_graph = WordFst().fst
-        
-        graph = (cardinal_graph|word_graph)
+
+        graph = cardinal_graph | word_graph
         self.fst = graph
-        
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize_final.py
@@ -18,16 +18,17 @@
 import pynini
 from pynini.lib import pynutil
 
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, delete_space, generator_main
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.verbalize import VerbalizeFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst, generator_main, delete_space
 
 
 class VerbalizeFinalFst(GraphFst):
     """
     Finite state transducer that verbalizes an entire sentence, e.g.
     tokens { name: "its" } tokens { time { hours: "12" minutes: "30" } } tokens { name: "now" } -> its 12:30 now
     """
+
     def __init__(self, deterministic: bool = True, cache_dir: str = None, overwrite_cache: bool = False):
         super().__init__(name="verbalize_final", kind="verbalize", deterministic=deterministic)
         far_file = None
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/word.py
@@ -20,7 +20,6 @@
 from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space
 
 
-
 class WordFst(GraphFst):
     '''
     tokens { name: "一" } -> 一
diff --git a/nemo_text_processing/inverse_text_normalization/run_evaluate.py b/nemo_text_processing/inverse_text_normalization/run_evaluate.py
@@ -35,7 +35,7 @@ def parse_args():
     parser.add_argument(
         "--lang",
         help="language",
-        choices=["ar", "de", "en", "es", "es_en", "fr", "hi", "hy", "mr", "pt", "ru", "sv", "vi", "zh", "ja","ko"],
+        choices=["ar", "de", "en", "es", "es_en", "fr", "hi", "hy", "mr", "pt", "ru", "sv", "vi", "zh", "ja", "ko"],
         default="en",
         type=str,
     )
diff --git a/tests/nemo_text_processing/ko/test_cardinal.py b/tests/nemo_text_processing/ko/test_cardinal.py
@@ -33,7 +33,5 @@ def test_denorm(self, test_input, expected):
         assert pred == expected
 
     normalizer_with_audio_ko = (
-        NormalizerWithAudio(lang='ko', cache_dir=CACHE_DIR, overwrite_cache=False)
-        if RUN_AUDIO_BASED_TESTS
-        else None
-    )
+        NormalizerWithAudio(lang='ko', cache_dir=CACHE_DIR, overwrite_cache=False) if RUN_AUDIO_BASED_TESTS else None
+    )
diff --git a/tools/text_processing_deployment/pynini_export.py b/tools/text_processing_deployment/pynini_export.py
@@ -106,7 +106,7 @@ def parse_args():
             'mr',
             'ja',
             'rw',
-            'ko'
+            'ko',
         ],
         type=str,
         default='en',