clean up

Edwardf0t1 · Edwardf0t1 · commit e0e28cb3887f · 2026-02-12T16:17:22.000-08:00
Signed-off-by: Zhiyu Cheng &lt;zhiyuc@nvidia.com&gt;
diff --git a/examples/llm_ptq/example_utils.py b/examples/llm_ptq/example_utils.py
@@ -276,33 +276,20 @@ def get_tokenizer(ckpt_path, trust_remote_code=False, **kwargs) -> PreTrainedTok
     if "vila" in ckpt_path.lower():
         ckpt_path += "/llm"
 
-    # Suppress verbose tokenizer output (e.g., printing all special tokens)
-    import contextlib
-    import io
-    import logging
-    import os
-
-    # Save current settings
-    old_verbosity = os.environ.get("TOKENIZERS_PARALLELISM", None)
-    transformers_log_level = logging.getLogger("transformers").level
-
-    # Suppress output
-    os.environ["TOKENIZERS_PARALLELISM"] = "false"
-    logging.getLogger("transformers").setLevel(logging.ERROR)
-
-    # Also capture stdout to suppress verbose tokenizer printing
-    with contextlib.redirect_stdout(io.StringIO()):
-        try:
+    # Some custom tokenizers (e.g., Nemotron-Parse) print verbose output when loading.
+    # Only suppress stdout for trust_remote_code models where custom tokenizer code may be noisy.
+    if trust_remote_code:
+        import contextlib
+        import io
+
+        with contextlib.redirect_stdout(io.StringIO()):
             tokenizer = AutoTokenizer.from_pretrained(
                 ckpt_path, trust_remote_code=trust_remote_code, **kwargs
             )
-        finally:
-            # Restore original settings
-            if old_verbosity is not None:
-                os.environ["TOKENIZERS_PARALLELISM"] = old_verbosity
-            else:
-                os.environ.pop("TOKENIZERS_PARALLELISM", None)
-            logging.getLogger("transformers").setLevel(transformers_log_level)
+    else:
+        tokenizer = AutoTokenizer.from_pretrained(
+            ckpt_path, trust_remote_code=trust_remote_code, **kwargs
+        )
 
     # can't set attribute 'pad_token' for "<unk>"
     # We skip this step for Nemo models
@@ -355,25 +342,17 @@ def get_processor(
 
         return MllamaImageProcessor(processor, device)
     else:
-        # Try to load AutoProcessor for other VL models (e.g., Nemotron-Parse)
-        # This will only work if the model has a processor config
-        try:
-            import contextlib
-            import io
-            import logging
-
-            # Suppress verbose output from processor/tokenizer loading
-            transformers_log_level = logging.getLogger("transformers").level
-            logging.getLogger("transformers").setLevel(logging.ERROR)
-
-            with contextlib.redirect_stdout(io.StringIO()):
-                processor = AutoProcessor.from_pretrained(
-                    ckpt_path,
-                    **model_kwargs,
-                )
+        # Try to load AutoProcessor for other VL models (e.g., Nemotron-Parse).
+        # Suppress stdout for trust_remote_code models where custom processor code may be noisy.
+        import contextlib
+        import io
 
-            # Restore logging
-            logging.getLogger("transformers").setLevel(transformers_log_level)
+        try:
+            if model_kwargs.get("trust_remote_code", False):
+                with contextlib.redirect_stdout(io.StringIO()):
+                    processor = AutoProcessor.from_pretrained(ckpt_path, **model_kwargs)
+            else:
+                processor = AutoProcessor.from_pretrained(ckpt_path, **model_kwargs)
 
             print(f"Loaded AutoProcessor for model type: {model_type}")
             return processor
diff --git a/examples/llm_ptq/hf_ptq.py b/examples/llm_ptq/hf_ptq.py
@@ -402,44 +402,15 @@ def load_model(args: argparse.Namespace):
             language_model = extracted_lm
             model_type = extracted_model_type
     else:
-        # Check if this is a Nemotron VL model that needs a processor
-        # Do this BEFORE setting default datasets so we can use image-text data for Nemotron-Parse
-        is_nemotron_vl_model = is_nemotron_vl(full_model)
-
-        # Check specifically for Nemotron-Parse to set appropriate dataset defaults
-        config = full_model.config
-        architectures = getattr(config, "architectures", [])
-        is_nemotron_parse = any("nemotronparse" in arch.lower() for arch in architectures)
-
         if args.dataset is None:
-            if is_nemotron_parse:
-                # For Nemotron-Parse, default to Nemotron VLM Dataset v2
-                args.dataset = ["nemotron_vlm_v2"]
-                print(
-                    "No dataset specified. Defaulting to 'nemotron_vlm_v2' for Nemotron-Parse "
-                    "(NVIDIA's image-text dataset for better calibration)."
-                )
-            else:
-                # For other models, use text-only datasets
-                args.dataset = ["cnn_dailymail", "nemotron-post-training-dataset-v2"]
-                warnings.warn(
-                    "No dataset specified. Defaulting to cnn_dailymail and nemotron-post-training-dataset-v2."
-                )
-
+            args.dataset = ["cnn_dailymail", "nemotron-post-training-dataset-v2"]
+            warnings.warn(
+                "No dataset specified. Defaulting to cnn_dailymail and nemotron-post-training-dataset-v2."
+            )
         # Adjust calib_size to match dataset length by extending or truncating as needed
         args.calib_size = (args.calib_size + [args.calib_size[-1]] * len(args.dataset))[
             : len(args.dataset)
         ]
-
-        if is_nemotron_vl_model:
-            # Load processor for Nemotron VL models (like Nemotron-Parse)
-            processor = get_processor(
-                args.pyt_ckpt_path,
-                model_type,
-                device,
-                trust_remote_code=args.trust_remote_code,
-            )
-
         tokenizer = get_tokenizer(args.pyt_ckpt_path, trust_remote_code=args.trust_remote_code)
 
         default_padding_side = tokenizer.padding_side
diff --git a/examples/llm_ptq/vlm_utils.py b/examples/llm_ptq/vlm_utils.py
@@ -126,11 +126,7 @@ def run_vl_preview_generation(model, tokenizer, model_path, stage_name):
         else:
             processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
 
-            # Check if this is Nemotron-Parse (uses task prompts instead of chat templates)
-            config = model.config
-            architectures = getattr(config, "architectures", [])
-            is_nemotron_parse = any("nemotronparse" in arch.lower() for arch in architectures)
-
+            # is_nemotron_parse was already computed above
             if is_nemotron_parse:
                 # Nemotron-Parse uses a specific task prompt format
                 # See: https://huggingface.co/nvidia/NVIDIA-Nemotron-Parse-v1.1#usage-example
diff --git a/modelopt/torch/export/model_utils.py b/modelopt/torch/export/model_utils.py
@@ -147,7 +147,9 @@ def get_language_model_from_vl(model) -> list[nn.Module] | None:
     if hasattr(model, "language_model"):
         return [model, model.language_model]
 
-    # Pattern 3: For encoder-decoder VL models (e.g., Nemotron-Parse), the decoder is the language model
+    # Pattern 3: For encoder-decoder VL models (e.g., Nemotron-Parse), the decoder is the language model.
+    # Note: This is safe because this function is only called when the model is already detected as a VLM.
+    # Non-VLM encoder-decoder models (T5, Bart) won't reach this code path.
     if hasattr(model, "decoder"):
         return [model, model.decoder]