Api refractor (#1579)

Qubitium · web-flow · commit 2c4320470ea9 · 2025-05-02T15:27:57.000+08:00
* cleanup names

Signed-off-by: Qubitium &lt;Qubitium@modelcloud.ai&gt;

* format

Signed-off-by: Qubitium &lt;Qubitium@modelcloud.ai&gt;

---------

Signed-off-by: Qubitium &lt;Qubitium@modelcloud.ai&gt;
diff --git a/gptqmodel/looper/gptq_processor.py b/gptqmodel/looper/gptq_processor.py
@@ -29,7 +29,7 @@
 from ..quantization.config import QUANT_METHOD, QuantizeConfig
 from ..utils.logger import setup_logger
 from ..utils.model import move_to, pack_model
-from ..utils.torch import CPU, DEVICE_0, torch_streamCtx, torch_sync
+from ..utils.torch import CPU, DEVICE_0, DEVICE_1, torch_streamCtx, torch_sync
 
 log = setup_logger()
 
@@ -235,9 +235,7 @@ def process(self, module: NamedModule, auto_gc: bool = True):
             "wq": wq,  # fp16, quantized weight but not int4 (packed qweight)
         })
 
-        old = module.weight.data # TODO HACK since we cannot delete weight.data directly
         module.weight.data = wq
-        del old
 
         # if auto_gc:
         #     torch_empty_cache()
diff --git a/gptqmodel/looper/qqq_processor.py b/gptqmodel/looper/qqq_processor.py
@@ -30,7 +30,7 @@
 from ..quantization.qqq import QQQ
 from ..utils.logger import setup_logger
 from ..utils.model import move_to, pack_model
-from ..utils.torch import CPU, torch_sync, torch_streamCtx, DEVICE_0
+from ..utils.torch import CPU, DEVICE_0, torch_streamCtx, torch_sync
 
 log = setup_logger()
 
diff --git a/tests/models/model_test.py b/tests/models/model_test.py
@@ -211,7 +211,7 @@ def quantModel(self, model_id_or_path, trust_remote_code=False, torch_dtype="aut
         is_ovis_model = model.__class__.__name__ == "OvisGPTQ"
         need_create_processor = is_image_to_text_model and not is_ovis_model
         if not is_quantized:
-            model.quantize(calibration_dataset, backend=self.QUANT_BACKEND, batch_size=batch_size, buffered_fwd=True)
+            model.quantize(calibration_dataset, backend=self.QUANT_BACKEND, batch_size=batch_size, buffered_fwd=False)
 
             self.check_kernel(model, self.KERNEL_QUANT)