update

sugunav14 · sugunav14 · commit d7e311f19104 · 2026-04-17T16:34:22.000Z
Signed-off-by: Suguna Velury &lt;178320438+sugunav14@users.noreply.github.com&gt;
diff --git a/examples/llm_ptq/example_utils.py b/examples/llm_ptq/example_utils.py
@@ -583,7 +583,7 @@ def get_model(
     model_kwargs = config_kwargs.copy()
     # Don't set torch_dtype for VILA models as they handle it explicitly in their builder
     if "vila" not in ckpt_path.lower():
-        model_kwargs.setdefault("torch_dtype", "auto")
+        model_kwargs.setdefault("dtype", "auto")
 
     if "vila" in ckpt_path.lower():
         hf_vila = AutoModel.from_pretrained(
@@ -666,7 +666,7 @@ def has_pack_quantized_config(config):
                 model_kwargs2 = model_kwargs.copy()
                 if auto_model_module not in [AutoModelForCausalLM, AutoModel]:
                     model_kwargs2.pop("trust_remote_code", None)
-                model_kwargs2["torch_dtype"] = torch_dtype
+                model_kwargs2["dtype"] = torch_dtype
                 model_kwargs2.pop("max_memory", None)
                 model = from_config(hf_config, **model_kwargs2)
 
diff --git a/examples/llm_ptq/hf_ptq.py b/examples/llm_ptq/hf_ptq.py
@@ -420,15 +420,10 @@ def load_model(args: argparse.Namespace):
             attn_implementation=args.attn_implementation,
         )
     else:
-        if args.qformat in QUANT_CFG_CHOICES:
-            quant_cfg = QUANT_CFG_CHOICES[args.qformat]
-        elif hasattr(mtq, args.qformat):
-            quant_cfg = getattr(mtq, args.qformat)
-        else:
-            raise AssertionError(
-                f"Quantization format is not supported for low memory mode. "
-                f"Supported formats: {QUANT_CFG_CHOICES.keys()}"
-            )
+        assert args.qformat in QUANT_CFG_CHOICES, (
+            f"Quantization format is not supported for low memory mode. Supported formats: {QUANT_CFG_CHOICES.keys()}"
+        )
+        quant_cfg = QUANT_CFG_CHOICES[args.qformat]
         if args.kv_cache_qformat != "none":
             quant_cfg = mtq.utils.update_quant_cfg_with_kv_cache_quant(
                 quant_cfg,
diff --git a/modelopt/torch/quantization/model_calib.py b/modelopt/torch/quantization/model_calib.py
@@ -1665,7 +1665,10 @@ def gptq(
 
     def _make_gptq_handle(name, m):
         backend = getattr(m.weight_quantizer, "backend", None)
-        cls = _GPTQ_HELPER_REGISTRY.get(backend, GPTQHelper)
+        if backend is None:
+            cls = GPTQHelper
+        else:
+            cls = _GPTQ_HELPER_REGISTRY.get(backend, GPTQHelper)
         return cls(m, name, offload_to_cpu=True)
 
     gptq_handles = {name: _make_gptq_handle(name, m) for name, m in quantized_layers}
@@ -1685,10 +1688,6 @@ def _make_gptq_handle(name, m):
     print_rank_0("Updating weights using GPTQ algorithm...")
     for handle in gptq_handles.values():
         handle.update_weights(block_size, perc_damp)
-
-        # Disable weight quantizer after running GPTQ update since weights are already QDQ'ed
-        if hasattr(handle.module, "weight_quantizer"):
-            handle.module.weight_quantizer.disable()
         handle.free()
     del gptq_handles