Fix gptqmodel backend check (#2420)

jiqing-feng · IlyasMoutawwakil · web-flow · commit cebb682db587 · 2026-04-15T15:28:17.000+02:00
* fix backend Signed-off-by: jiqing-feng <jiqing.feng@intel.com> * rm exllama_set_max_input_length Signed-off-by: jiqing-feng <jiqing.feng@intel.com> * add backend check Signed-off-by: jiqing-feng <jiqing.feng@intel.com> * add comments and format Signed-off-by: jiqing-feng <jiqing.feng@intel.com> * install pcre and setuptools for gptqmodel Signed-off-by: jiqing-feng <jiqing.feng@intel.com> * Apply suggestion from @IlyasMoutawwakil --------- Signed-off-by: jiqing-feng <jiqing.feng@intel.com> Co-authored-by: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com>
diff --git a/.github/workflows/test_gptq.yml b/.github/workflows/test_gptq.yml
@@ -44,7 +44,9 @@ jobs:
       - name: Install dependencies
         run: |
           pip install --upgrade pip uv
+          uv pip install torch torchvision --index-url https://download.pytorch.org/whl/cu128
           uv pip install .[tests]
+          uv pip install pypcre "setuptools>=78.1.1,<82"
           uv pip install "gptqmodel>=5.6.12" --no-build-isolation
 
       - name: Run tests
diff --git a/optimum/gptq/quantizer.py b/optimum/gptq/quantizer.py
@@ -47,7 +47,7 @@
     from accelerate.hooks import remove_hook_from_module
 
 if is_gptqmodel_available():
-    from gptqmodel import BACKEND, QuantizeConfig, exllama_set_max_input_length
+    from gptqmodel import BACKEND, QuantizeConfig
     from gptqmodel.quantization import FORMAT, GPTQ, METHOD
     from gptqmodel.utils.importer import hf_select_quant_linear_v2
     from gptqmodel.utils.model import hf_convert_gptq_v1_to_v2_format, hf_convert_gptq_v2_to_v1_format
@@ -669,8 +669,18 @@ class StoreAttr(object):
         model.quantize_config = StoreAttr()
         model.quantize_config.desc_act = self.desc_act
         model = gptq_post_init(model, use_act_order=self.desc_act)
-        if self.desc_act and self.backend == BACKEND.EXLLAMA_V1 and self.max_input_length is not None:
+        # Keep this compatibility guard for older gptqmodel versions where EXLLAMA_V1 still exists.
+        # This branch can be removed once we bump the minimum gptqmodel version and drop v1 support.
+        if (
+            hasattr(BACKEND, "EXLLAMA_V1")
+            and self.backend == BACKEND.EXLLAMA_V1
+            and self.desc_act
+            and self.max_input_length is not None
+        ):
+            from gptqmodel import exllama_set_max_input_length
+
             model = exllama_set_max_input_length(model, self.max_input_length)
+
         return model
 
     def pack_model(