Skip to content

Commit d1d4533

Browse files
committed
WeightOnlyLooper add progressbar
Signed-off-by: ZX-ModelCloud <zx@modelcloud.ai>
1 parent 4501ed4 commit d1d4533

2 files changed

Lines changed: 181 additions & 0 deletions

File tree

gptqmodel/looper/weight_only_looper.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,10 @@ def loop(self, **kwargs):
171171

172172
layer_count = len(layers)
173173
total_layers = layer_count + (1 if quant_config.lm_head else 0)
174+
pb = log.pb(range(total_layers)).manual().set(show_left_steps=1)
175+
pb.title(f"Weight-only quantization ({total_layers} layers)")
176+
self.processor.layer_count = layer_count
177+
self.processor.pb = pb
174178
preprocessor = None
175179
if getattr(quant_config, "preprocessors", None):
176180
preprocessor = ModulePreProcessor(
@@ -201,6 +205,15 @@ def loop(self, **kwargs):
201205
if is_lm_head_module:
202206
layer_name = None
203207

208+
if pb is not None:
209+
layer_title = (
210+
"Weight-only quantizing lm_head"
211+
if is_lm_head_module
212+
else f"Weight-only quantizing layer {layer_index} of {layer_count - 1}"
213+
)
214+
pb.current_iter_step = layer_index
215+
pb.title(layer_title).subtitle("").draw()
216+
204217
module = self.gptq_model.pre_quantize(module)
205218
if not is_lm_head_module:
206219
# Preserve existing module conversion behavior so the new
@@ -270,7 +283,12 @@ def loop(self, **kwargs):
270283
self.gptq_model.post_quantize(module)
271284
else:
272285
layers[layer_index] = self.gptq_model.post_quantize(module)
286+
if pb is not None:
287+
pb.current_iter_step = layer_index + 1
288+
pb.draw()
273289
finally:
290+
if pb is not None:
291+
pb.close()
274292
self.gptq_model.model.config.use_cache = forward_pass_use_cache
275293

276294
total_log = {self.processor.name(): self.processor.log}

tests/test_weight_only_looper.py

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
# SPDX-FileCopyrightText: 2026 ModelCloud.ai
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
from types import SimpleNamespace
5+
6+
import torch
7+
from torch import nn
8+
9+
import gptqmodel.looper.weight_only_looper as weight_only_looper_module
10+
from gptqmodel.looper.weight_only_looper import WeightOnlyLooper
11+
from gptqmodel.quantization.config import RTNConfig
12+
13+
14+
class _FakeProgress:
15+
def __init__(self):
16+
self.current_iter_step = 0
17+
self.titles = []
18+
self.subtitles = []
19+
self.draw_calls = []
20+
self.closed = False
21+
22+
def manual(self):
23+
return self
24+
25+
def set(self, **_kwargs):
26+
return self
27+
28+
def title(self, value):
29+
self.titles.append(value)
30+
return self
31+
32+
def subtitle(self, value):
33+
self.subtitles.append(value)
34+
return self
35+
36+
def draw(self, force: bool = False):
37+
self.draw_calls.append((self.current_iter_step, force))
38+
return self
39+
40+
def close(self):
41+
self.closed = True
42+
43+
44+
class _FakeLogger:
45+
def __init__(self):
46+
self.progress = _FakeProgress()
47+
self.iterable = None
48+
49+
def pb(self, iterable, *, output_interval=None):
50+
del output_interval
51+
self.iterable = list(iterable)
52+
return self.progress
53+
54+
def info(self, *_args, **_kwargs):
55+
return None
56+
57+
58+
class _TinyLayer(nn.Module):
59+
def __init__(self):
60+
super().__init__()
61+
self.linear = nn.Linear(4, 4, bias=False)
62+
63+
64+
class _TinyModel(nn.Module):
65+
def __init__(self):
66+
super().__init__()
67+
self.config = SimpleNamespace(
68+
use_cache=True,
69+
model_type="tiny_weight_only_progress",
70+
tie_word_embeddings=False,
71+
)
72+
self.layers = nn.ModuleList([_TinyLayer(), _TinyLayer()])
73+
74+
75+
class _FakeQModel:
76+
def __init__(self, qcfg):
77+
self.model = _TinyModel()
78+
self.quantize_config = qcfg
79+
self.layer_modules_strict = True
80+
self.lm_head = "lm_head"
81+
self.tokenizer = None
82+
self.quant_log = None
83+
84+
def extract_layers_node(self):
85+
return ["layers"]
86+
87+
def get_modules_with_direct_meta_tensors(self, _model):
88+
return []
89+
90+
def simple_layer_modules(self, **_kwargs):
91+
return [["linear"]]
92+
93+
def pre_quantize(self, module):
94+
return module
95+
96+
def post_quantize(self, module):
97+
return module
98+
99+
100+
class _FakeProcessor:
101+
def __init__(self, qcfg):
102+
self.qcfg = qcfg
103+
self.log = []
104+
self.layer_count = None
105+
self.pb = None
106+
self.memory_calls = []
107+
self.quantized = []
108+
self.finalized = []
109+
self.finalize_called = False
110+
111+
def name(self):
112+
return "fake_weight_only"
113+
114+
def collect_memory_info(self, layer_index):
115+
self.memory_calls.append(layer_index)
116+
117+
def quantize_module(self, module):
118+
self.quantized.append(module.full_name)
119+
return self.qcfg
120+
121+
def submodule_finalize(self, module, _model, *, qcfg=None):
122+
self.finalized.append((module.full_name, qcfg))
123+
124+
def finalize(self, *, model):
125+
del model
126+
self.finalize_called = True
127+
128+
129+
def test_weight_only_looper_reports_logbar_progress(monkeypatch):
130+
qcfg = RTNConfig(bits=4, group_size=4, offload_to_disk=False, device="cpu")
131+
qcfg.lm_head = False
132+
fake_logger = _FakeLogger()
133+
processor = _FakeProcessor(qcfg)
134+
model = _FakeQModel(qcfg)
135+
136+
monkeypatch.setattr(weight_only_looper_module, "log", fake_logger)
137+
monkeypatch.setattr(
138+
weight_only_looper_module,
139+
"get_layers_with_prefixes",
140+
lambda _model, _nodes: (list(model.model.layers), ["layers.0", "layers.1"]),
141+
)
142+
143+
looper = WeightOnlyLooper(model=model, processor=processor)
144+
total_log = looper.loop()
145+
146+
assert total_log == {"fake_weight_only": []}
147+
assert model.model.config.use_cache is True
148+
assert processor.layer_count == 2
149+
assert processor.pb is fake_logger.progress
150+
assert processor.memory_calls == [0, 1]
151+
assert processor.quantized == ["layers.0.linear", "layers.1.linear"]
152+
assert processor.finalize_called is True
153+
154+
assert fake_logger.iterable == [0, 1]
155+
assert fake_logger.progress.titles == [
156+
"Weight-only quantization (2 layers)",
157+
"Weight-only quantizing layer 0 of 1",
158+
"Weight-only quantizing layer 1 of 1",
159+
]
160+
assert fake_logger.progress.subtitles == ["", ""]
161+
assert fake_logger.progress.draw_calls[0] == (0, False)
162+
assert fake_logger.progress.draw_calls[-1] == (2, False)
163+
assert fake_logger.progress.closed is True

0 commit comments

Comments
 (0)