Skip to content

Commit e0e28cb

Browse files
committed
clean up
Signed-off-by: Zhiyu Cheng <zhiyuc@nvidia.com>
1 parent dc1af90 commit e0e28cb

4 files changed

Lines changed: 29 additions & 81 deletions

File tree

examples/llm_ptq/example_utils.py

Lines changed: 21 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -276,33 +276,20 @@ def get_tokenizer(ckpt_path, trust_remote_code=False, **kwargs) -> PreTrainedTok
276276
if "vila" in ckpt_path.lower():
277277
ckpt_path += "/llm"
278278

279-
# Suppress verbose tokenizer output (e.g., printing all special tokens)
280-
import contextlib
281-
import io
282-
import logging
283-
import os
284-
285-
# Save current settings
286-
old_verbosity = os.environ.get("TOKENIZERS_PARALLELISM", None)
287-
transformers_log_level = logging.getLogger("transformers").level
288-
289-
# Suppress output
290-
os.environ["TOKENIZERS_PARALLELISM"] = "false"
291-
logging.getLogger("transformers").setLevel(logging.ERROR)
292-
293-
# Also capture stdout to suppress verbose tokenizer printing
294-
with contextlib.redirect_stdout(io.StringIO()):
295-
try:
279+
# Some custom tokenizers (e.g., Nemotron-Parse) print verbose output when loading.
280+
# Only suppress stdout for trust_remote_code models where custom tokenizer code may be noisy.
281+
if trust_remote_code:
282+
import contextlib
283+
import io
284+
285+
with contextlib.redirect_stdout(io.StringIO()):
296286
tokenizer = AutoTokenizer.from_pretrained(
297287
ckpt_path, trust_remote_code=trust_remote_code, **kwargs
298288
)
299-
finally:
300-
# Restore original settings
301-
if old_verbosity is not None:
302-
os.environ["TOKENIZERS_PARALLELISM"] = old_verbosity
303-
else:
304-
os.environ.pop("TOKENIZERS_PARALLELISM", None)
305-
logging.getLogger("transformers").setLevel(transformers_log_level)
289+
else:
290+
tokenizer = AutoTokenizer.from_pretrained(
291+
ckpt_path, trust_remote_code=trust_remote_code, **kwargs
292+
)
306293

307294
# can't set attribute 'pad_token' for "<unk>"
308295
# We skip this step for Nemo models
@@ -355,25 +342,17 @@ def get_processor(
355342

356343
return MllamaImageProcessor(processor, device)
357344
else:
358-
# Try to load AutoProcessor for other VL models (e.g., Nemotron-Parse)
359-
# This will only work if the model has a processor config
360-
try:
361-
import contextlib
362-
import io
363-
import logging
364-
365-
# Suppress verbose output from processor/tokenizer loading
366-
transformers_log_level = logging.getLogger("transformers").level
367-
logging.getLogger("transformers").setLevel(logging.ERROR)
368-
369-
with contextlib.redirect_stdout(io.StringIO()):
370-
processor = AutoProcessor.from_pretrained(
371-
ckpt_path,
372-
**model_kwargs,
373-
)
345+
# Try to load AutoProcessor for other VL models (e.g., Nemotron-Parse).
346+
# Suppress stdout for trust_remote_code models where custom processor code may be noisy.
347+
import contextlib
348+
import io
374349

375-
# Restore logging
376-
logging.getLogger("transformers").setLevel(transformers_log_level)
350+
try:
351+
if model_kwargs.get("trust_remote_code", False):
352+
with contextlib.redirect_stdout(io.StringIO()):
353+
processor = AutoProcessor.from_pretrained(ckpt_path, **model_kwargs)
354+
else:
355+
processor = AutoProcessor.from_pretrained(ckpt_path, **model_kwargs)
377356

378357
print(f"Loaded AutoProcessor for model type: {model_type}")
379358
return processor

examples/llm_ptq/hf_ptq.py

Lines changed: 4 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -402,44 +402,15 @@ def load_model(args: argparse.Namespace):
402402
language_model = extracted_lm
403403
model_type = extracted_model_type
404404
else:
405-
# Check if this is a Nemotron VL model that needs a processor
406-
# Do this BEFORE setting default datasets so we can use image-text data for Nemotron-Parse
407-
is_nemotron_vl_model = is_nemotron_vl(full_model)
408-
409-
# Check specifically for Nemotron-Parse to set appropriate dataset defaults
410-
config = full_model.config
411-
architectures = getattr(config, "architectures", [])
412-
is_nemotron_parse = any("nemotronparse" in arch.lower() for arch in architectures)
413-
414405
if args.dataset is None:
415-
if is_nemotron_parse:
416-
# For Nemotron-Parse, default to Nemotron VLM Dataset v2
417-
args.dataset = ["nemotron_vlm_v2"]
418-
print(
419-
"No dataset specified. Defaulting to 'nemotron_vlm_v2' for Nemotron-Parse "
420-
"(NVIDIA's image-text dataset for better calibration)."
421-
)
422-
else:
423-
# For other models, use text-only datasets
424-
args.dataset = ["cnn_dailymail", "nemotron-post-training-dataset-v2"]
425-
warnings.warn(
426-
"No dataset specified. Defaulting to cnn_dailymail and nemotron-post-training-dataset-v2."
427-
)
428-
406+
args.dataset = ["cnn_dailymail", "nemotron-post-training-dataset-v2"]
407+
warnings.warn(
408+
"No dataset specified. Defaulting to cnn_dailymail and nemotron-post-training-dataset-v2."
409+
)
429410
# Adjust calib_size to match dataset length by extending or truncating as needed
430411
args.calib_size = (args.calib_size + [args.calib_size[-1]] * len(args.dataset))[
431412
: len(args.dataset)
432413
]
433-
434-
if is_nemotron_vl_model:
435-
# Load processor for Nemotron VL models (like Nemotron-Parse)
436-
processor = get_processor(
437-
args.pyt_ckpt_path,
438-
model_type,
439-
device,
440-
trust_remote_code=args.trust_remote_code,
441-
)
442-
443414
tokenizer = get_tokenizer(args.pyt_ckpt_path, trust_remote_code=args.trust_remote_code)
444415

445416
default_padding_side = tokenizer.padding_side

examples/llm_ptq/vlm_utils.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -126,11 +126,7 @@ def run_vl_preview_generation(model, tokenizer, model_path, stage_name):
126126
else:
127127
processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
128128

129-
# Check if this is Nemotron-Parse (uses task prompts instead of chat templates)
130-
config = model.config
131-
architectures = getattr(config, "architectures", [])
132-
is_nemotron_parse = any("nemotronparse" in arch.lower() for arch in architectures)
133-
129+
# is_nemotron_parse was already computed above
134130
if is_nemotron_parse:
135131
# Nemotron-Parse uses a specific task prompt format
136132
# See: https://huggingface.co/nvidia/NVIDIA-Nemotron-Parse-v1.1#usage-example

modelopt/torch/export/model_utils.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,9 @@ def get_language_model_from_vl(model) -> list[nn.Module] | None:
147147
if hasattr(model, "language_model"):
148148
return [model, model.language_model]
149149

150-
# Pattern 3: For encoder-decoder VL models (e.g., Nemotron-Parse), the decoder is the language model
150+
# Pattern 3: For encoder-decoder VL models (e.g., Nemotron-Parse), the decoder is the language model.
151+
# Note: This is safe because this function is only called when the model is already detected as a VLM.
152+
# Non-VLM encoder-decoder models (T5, Bart) won't reach this code path.
151153
if hasattr(model, "decoder"):
152154
return [model, model.decoder]
153155

0 commit comments

Comments
 (0)