Skip to content

Commit cebb682

Browse files
Fix gptqmodel backend check (#2420)
* fix backend Signed-off-by: jiqing-feng <jiqing.feng@intel.com> * rm exllama_set_max_input_length Signed-off-by: jiqing-feng <jiqing.feng@intel.com> * add backend check Signed-off-by: jiqing-feng <jiqing.feng@intel.com> * add comments and format Signed-off-by: jiqing-feng <jiqing.feng@intel.com> * install pcre and setuptools for gptqmodel Signed-off-by: jiqing-feng <jiqing.feng@intel.com> * Apply suggestion from @IlyasMoutawwakil --------- Signed-off-by: jiqing-feng <jiqing.feng@intel.com> Co-authored-by: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com>
1 parent e2d0e42 commit cebb682

2 files changed

Lines changed: 14 additions & 2 deletions

File tree

.github/workflows/test_gptq.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,9 @@ jobs:
4444
- name: Install dependencies
4545
run: |
4646
pip install --upgrade pip uv
47+
uv pip install torch torchvision --index-url https://download.pytorch.org/whl/cu128
4748
uv pip install .[tests]
49+
uv pip install pypcre "setuptools>=78.1.1,<82"
4850
uv pip install "gptqmodel>=5.6.12" --no-build-isolation
4951
5052
- name: Run tests

optimum/gptq/quantizer.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
from accelerate.hooks import remove_hook_from_module
4848

4949
if is_gptqmodel_available():
50-
from gptqmodel import BACKEND, QuantizeConfig, exllama_set_max_input_length
50+
from gptqmodel import BACKEND, QuantizeConfig
5151
from gptqmodel.quantization import FORMAT, GPTQ, METHOD
5252
from gptqmodel.utils.importer import hf_select_quant_linear_v2
5353
from gptqmodel.utils.model import hf_convert_gptq_v1_to_v2_format, hf_convert_gptq_v2_to_v1_format
@@ -669,8 +669,18 @@ class StoreAttr(object):
669669
model.quantize_config = StoreAttr()
670670
model.quantize_config.desc_act = self.desc_act
671671
model = gptq_post_init(model, use_act_order=self.desc_act)
672-
if self.desc_act and self.backend == BACKEND.EXLLAMA_V1 and self.max_input_length is not None:
672+
# Keep this compatibility guard for older gptqmodel versions where EXLLAMA_V1 still exists.
673+
# This branch can be removed once we bump the minimum gptqmodel version and drop v1 support.
674+
if (
675+
hasattr(BACKEND, "EXLLAMA_V1")
676+
and self.backend == BACKEND.EXLLAMA_V1
677+
and self.desc_act
678+
and self.max_input_length is not None
679+
):
680+
from gptqmodel import exllama_set_max_input_length
681+
673682
model = exllama_set_max_input_length(model, self.max_input_length)
683+
674684
return model
675685

676686
def pack_model(

0 commit comments

Comments
 (0)