Skip to content

Commit 64fa1fa

Browse files
committed
add image-text data calibration support
Signed-off-by: Zhiyu Cheng <zhiyuc@nvidia.com>
1 parent e252fb2 commit 64fa1fa

File tree

1 file changed

+5
-2
lines changed

1 file changed

+5
-2
lines changed

examples/llm_ptq/hf_ptq.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -602,11 +602,14 @@ def mono_quantize(
602602
if is_nemotron_parse:
603603
# For Nemotron-Parse, wrap the model to force use_cache=False
604604
print("Wrapping Nemotron-Parse model for calibration (use_cache=False)")
605-
original_forward = language_model.forward
605+
# Store original forward before wrapping
606+
_original_forward = language_model.forward
607+
original_forward = _original_forward # Capture in outer scope
606608

607609
def wrapped_forward(*args, **kwargs):
608610
kwargs["use_cache"] = False
609-
return original_forward(*args, **kwargs)
611+
# Call the captured forward method
612+
return _original_forward(*args, **kwargs)
610613

611614
# Temporarily replace forward method
612615
language_model.forward = wrapped_forward

0 commit comments

Comments
 (0)