Skip to content

Commit 1693760

Browse files
[FIX] Avoid invoke tensor.transpose(0, 1).contiguous() when the shapes already match (#2913)
* Avoid invoke `tensor.transpose(0, 1).contiguous()` when the shapes already match. Signed-off-by: ZX-ModelCloud <zx@modelcloud.ai> * WeightOnlyLooper add progressbar Signed-off-by: ZX-ModelCloud <zx@modelcloud.ai> * cleanup Signed-off-by: ZX-ModelCloud <zx@modelcloud.ai> --------- Signed-off-by: ZX-ModelCloud <zx@modelcloud.ai>
1 parent 28b3870 commit 1693760

4 files changed

Lines changed: 397 additions & 118 deletions

File tree

gptqmodel/looper/weight_only_looper.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,10 @@ def loop(self, **kwargs):
171171

172172
layer_count = len(layers)
173173
total_layers = layer_count + (1 if quant_config.lm_head else 0)
174+
pb = log.pb(range(total_layers)).manual().set(show_left_steps=1)
175+
pb.title(f"Weight-only quantization ({total_layers} layers)")
176+
self.processor.layer_count = layer_count
177+
self.processor.pb = pb
174178
preprocessor = None
175179
if getattr(quant_config, "preprocessors", None):
176180
preprocessor = ModulePreProcessor(
@@ -201,6 +205,15 @@ def loop(self, **kwargs):
201205
if is_lm_head_module:
202206
layer_name = None
203207

208+
if pb is not None:
209+
layer_title = (
210+
"Weight-only quantizing lm_head"
211+
if is_lm_head_module
212+
else f"Weight-only quantizing layer {layer_index} of {layer_count - 1}"
213+
)
214+
pb.current_iter_step = layer_index
215+
pb.title(layer_title).subtitle("").draw()
216+
204217
module = self.gptq_model.pre_quantize(module)
205218
if not is_lm_head_module:
206219
# Preserve existing module conversion behavior so the new
@@ -270,7 +283,12 @@ def loop(self, **kwargs):
270283
self.gptq_model.post_quantize(module)
271284
else:
272285
layers[layer_index] = self.gptq_model.post_quantize(module)
286+
if pb is not None:
287+
pb.current_iter_step = layer_index + 1
288+
pb.draw()
273289
finally:
290+
if pb is not None:
291+
pb.close()
274292
self.gptq_model.model.config.use_cache = forward_pass_use_cache
275293

276294
total_log = {self.processor.name(): self.processor.log}

0 commit comments

Comments
 (0)