NVIDIA-NeMo
diff --git a/‎CLAUDE.md‎
Lines changed: 3 additions & 3 deletions b/‎CLAUDE.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎examples/asr/asr_adapters/scoring_and_analysis.py‎
Lines changed: 6 additions & 1 deletion b/‎examples/asr/asr_adapters/scoring_and_analysis.py‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎examples/asr/export/transducer/infer_transducer_onnx.py‎
Lines changed: 5 additions & 1 deletion b/‎examples/asr/export/transducer/infer_transducer_onnx.py‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎examples/asr/export/transducer/infer_transducer_ts.py‎
Lines changed: 5 additions & 1 deletion b/‎examples/asr/export/transducer/infer_transducer_ts.py‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎examples/asr/speech_classification/vad_infer.py‎
Lines changed: 3 additions & 1 deletion b/‎examples/asr/speech_classification/vad_infer.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎examples/tts/aligner_heteronym_disambiguation.py‎
Lines changed: 3 additions & 6 deletions b/‎examples/tts/aligner_heteronym_disambiguation.py‎
Lines changed: 3 additions & 6 deletions
diff --git a/‎external/get_collections.py‎
Lines changed: 3 additions & 3 deletions b/‎external/get_collections.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎external/get_modules.py‎
Lines changed: 3 additions & 3 deletions b/‎external/get_modules.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎nemo/collections/asr/data/audio_to_ctm_dataset.py‎
Lines changed: 1 addition & 2 deletions b/‎nemo/collections/asr/data/audio_to_ctm_dataset.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎nemo/collections/asr/data/audio_to_label_dataset.py‎
Lines changed: 15 additions & 3 deletions b/‎nemo/collections/asr/data/audio_to_label_dataset.py‎
Lines changed: 15 additions & 3 deletions
@@ -21,9 +21,9 @@ Requires Python 3.10+, PyTorch 2.6+.
 - **Line length: 119** (not default 88) — consistent across black, isort, flake8
 - Black with `skip_string_normalization = true`
 - isort with `profile = black`
-- Check: `python setup.py style --scope <path>`
-- Fix: `python setup.py style --scope <path> --fix`
-- **Incremental reformatting**: most collections are excluded from black (see `extend-exclude` in pyproject.toml). The files are reformatted when somebody makes changes to avoid a single big reformatting PR. Do not reformat files outside your changes.
+- Check: `isort --check <path> && black --check <path>` or `isort --check . && black --check .`
+- Fix: `isort <path> && black <path>` or `isort . && black .`
+- Jupyter Notebooks are excluded from automatic black reformatting (see `extend-exclude`), but can be still reformatted when passed directly. Do not reformat notebooks outside your changes.
 
 ## Testing
 
 
@@ -202,7 +202,12 @@ def display_results(df_all: pd.DataFrame, category: str, best_config: pd.Series,
 
 
 def get_best_config(
-    df_exp: pd.DataFrame, dataset_type_col: str, key_info: dict, topk: int, show_analysis: bool, exp_type: str,
+    df_exp: pd.DataFrame,
+    dataset_type_col: str,
+    key_info: dict,
+    topk: int,
+    show_analysis: bool,
+    exp_type: str,
 ):
     """Get the best hyperparameter configuration for a given subset of experiments.
 
 
@@ -60,7 +60,11 @@
 def parse_arguments():
     parser = ArgumentParser()
     parser.add_argument(
-        "--nemo_model", type=str, default=None, required=False, help="Path to .nemo file",
+        "--nemo_model",
+        type=str,
+        default=None,
+        required=False,
+        help="Path to .nemo file",
     )
     parser.add_argument(
         '--pretrained_model', type=str, default=None, required=False, help='Name of a pretrained NeMo file'
 
@@ -63,7 +63,11 @@
 def parse_arguments():
     parser = ArgumentParser()
     parser.add_argument(
-        "--nemo_model", type=str, default=None, required=False, help="Path to .nemo file",
+        "--nemo_model",
+        type=str,
+        default=None,
+        required=False,
+        help="Path to .nemo file",
     )
     parser.add_argument(
         '--pretrained_model', type=str, default=None, required=False, help='Name of a pretrained NeMo file'
 
@@ -91,7 +91,9 @@ def main(cfg):
             'vad_stream': True,
             'sample_rate': 16000,
             'manifest_filepath': manifest_vad_input,
-            'labels': ['infer',],
+            'labels': [
+                'infer',
+            ],
             'num_workers': cfg.num_workers,
             'shuffle': False,
             'window_length_in_sec': cfg.vad.parameters.window_length_in_sec,
 
@@ -44,8 +44,7 @@
 
 
 def get_args():
-    """Retrieve arguments for disambiguation.
-    """
+    """Retrieve arguments for disambiguation."""
     parser = argparse.ArgumentParser("G2P disambiguation using Aligner input embedding distances.")
     # TODO(jocelynh): Make this required=False with default download from NGC once ckpt uploaded
     parser.add_argument('--model', required=True, type=str, help="Path to Aligner model checkpoint (.nemo file).")
@@ -85,8 +84,7 @@ def get_args():
 
 
 def load_and_prepare_audio(aligner, audio_path, target_sr, device):
-    """Loads and resamples audio to target sample rate (if necessary), and preprocesses for Aligner input.
-    """
+    """Loads and resamples audio to target sample rate (if necessary), and preprocesses for Aligner input."""
     # Load audio and get length for preprocessing
     audio_data, orig_sr = sf.read(audio_path)
     if orig_sr != target_sr:
@@ -238,8 +236,7 @@ def disambiguate_candidates(aligner, text, spec, spec_len, confidence, device, h
 def disambiguate_dataset(
     aligner, manifest_path, out_path, sr, heteronyms, confidence, device, verbose, heteronyms_only=True
 ):
-    """Disambiguates the phonemes for all words with ambiguous pronunciations in the given manifest.
-    """
+    """Disambiguates the phonemes for all words with ambiguous pronunciations in the given manifest."""
     log_file = open('disambiguation_logs.txt', 'w') if verbose else None
 
     with open(out_path, 'w') as f_out:
 
@@ -25,8 +25,8 @@
 
 
 def process_collection(id, col):
-    """ Helper function processing the collection.
-    
+    """Helper function processing the collection.
+
     Args:
         id: (short) name of the collection.
         col: a collection (python module).
@@ -41,7 +41,7 @@ def process_collection(id, col):
 
 
 def main():
-    """ Main function generating a JSON file with list of NeMo collections. """
+    """Main function generating a JSON file with list of NeMo collections."""
     # Parse filename.
     parser = argparse.ArgumentParser()
     parser.add_argument('--filename', help='Name of the output JSON file', type=str, default="collections.json")
 
@@ -26,8 +26,8 @@
 
 
 def process_member(name, obj, module_list):
-    """ Helper function processing the passed object and, if ok, adding a record to the module list.
-    
+    """Helper function processing the passed object and, if ok, adding a record to the module list.
+
     Args:
         name: name of the member
         obj: member (class/function etc.)
@@ -74,7 +74,7 @@ def process_member(name, obj, module_list):
 
 
 def main():
-    """ Main function analysing the indicated NeMo collection and generating a JSON file with module descriptions. """
+    """Main function analysing the indicated NeMo collection and generating a JSON file with module descriptions."""
     # Parse filename.
     parser = argparse.ArgumentParser()
     parser.add_argument('--collection', help='ID of the collection', type=str)
 
@@ -24,8 +24,7 @@
 
 @dataclass
 class FrameCtmUnit:
-    """A container class for one CTM unit with start and length countable in frames.
-    """
+    """A container class for one CTM unit with start and length countable in frames."""
 
     label: str
     start_frame: int
 
@@ -131,7 +131,11 @@ def get_tarred_classification_label_dataset(
 
 
 def get_concat_tarred_speech_label_dataset(
-    featurizer, config: dict, shuffle_n: int, global_rank: int, world_size: int,
+    featurizer,
+    config: dict,
+    shuffle_n: int,
+    global_rank: int,
+    world_size: int,
 ):
     tarred_audio_filepaths = config['tarred_audio_filepaths']
     manifest_filepaths = config['manifest_filepath']
@@ -143,7 +147,11 @@ def get_concat_tarred_speech_label_dataset(
         conf['manifest_filepath'] = manifest_filepath
         conf['tarred_audio_filepaths'] = tarred_audio_filepath
         dataset = get_tarred_speech_label_dataset(
-            config=conf, featurizer=featurizer, shuffle_n=shuffle_n, global_rank=global_rank, world_size=world_size,
+            config=conf,
+            featurizer=featurizer,
+            shuffle_n=shuffle_n,
+            global_rank=global_rank,
+            world_size=world_size,
         )
         datasets.append(dataset)
 
@@ -160,7 +168,11 @@ def get_concat_tarred_speech_label_dataset(
 
 
 def get_tarred_speech_label_dataset(
-    featurizer, config: dict, shuffle_n: int, global_rank: int, world_size: int,
+    featurizer,
+    config: dict,
+    shuffle_n: int,
+    global_rank: int,
+    world_size: int,
 ) -> audio_to_label.TarredAudioToSpeechLabelDataset:
     """
     InInstantiates a Speech Label (e.g. VAD, speaker recognition) TarredAudioLabelDataset.