File tree Expand file tree Collapse file tree 1 file changed +4
-8
lines changed
Expand file tree Collapse file tree 1 file changed +4
-8
lines changed Original file line number Diff line number Diff line change @@ -809,14 +809,10 @@ def quantize_main(
809809 device : torch .device ,
810810):
811811 if args .batch_size == 0 :
812- # Check if this is a vision-language model
813- # For VL models, skip automatic batch size detection and use a conservative default
814- # since proper multimodal input preparation is complex
815- if is_multimodal_model (full_model ) or is_nemotron_vl (full_model ):
816- print (
817- "Vision-language model detected. Using default batch_size=1 for calibration "
818- "to ensure proper handling of multimodal inputs."
819- )
812+ # For VL models with image-text calibration, skip automatic batch size detection
813+ # since get_max_batch_size can't handle multimodal inputs
814+ if args .calib_with_images :
815+ print ("Image-text calibration enabled. Using default batch_size=1 for calibration." )
820816 args .batch_size = 1
821817 else :
822818 # Calibration/sparsification will actually take much more memory than regular inference
You can’t perform that action at this time.
0 commit comments