Disable disk-offload shells for Qwen2-VL models

Qubitium · Qubitium · commit ad3002228672 · 2026-04-18T21:50:40.000Z
diff --git a/gptqmodel/models/definitions/base_qwen2_vl.py b/gptqmodel/models/definitions/base_qwen2_vl.py
@@ -18,6 +18,10 @@
 
 class BaseQwen2VLGPTQ(BaseQModel):
     loader = AutoModelForImageTextToText
+    # Qwen2-VL placeholder-mask validation currently hits torch.nonzero() on
+    # fake/meta tensors during LazyTurtle shell execution, so use direct CPU
+    # loading instead of checkpoint-backed meta shells for quantization.
+    support_offload_to_disk = False
 
     pre_lm_head_norm_module = ["model.language_model.norm", "language_model.norm"]
 
diff --git a/tests/test_qwen2_family_compat.py b/tests/test_qwen2_family_compat.py
@@ -36,6 +36,10 @@ def test_qwen2_vl_image_only_process_vision_info_returns_image_list():
     assert image_inputs == [image]
 
 
+def test_qwen2_vl_disables_offload_to_disk_shell_loading():
+    assert base_qwen2_vl.BaseQwen2VLGPTQ.support_offload_to_disk is False
+
+
 def test_qwen2_vl_pre_quantize_hooks_use_inner_model_layout():
     instance = object.__new__(base_qwen2_vl.BaseQwen2VLGPTQ)
     instance.model = types.SimpleNamespace(