From 28bbf6906f9e67b60e9f81a68ff8f54cbbd3c630 Mon Sep 17 00:00:00 2001 From: ZX-ModelCloud Date: Thu, 12 Mar 2026 15:47:35 +0800 Subject: [PATCH 1/4] fix awq tests Signed-off-by: ZX-ModelCloud --- tests/test_awq_rotary_device.py | 4 +++- tests/test_awq_weight_mean.py | 6 ++++-- tests/test_model_require_pkgs.py | 4 ---- 3 files changed, 7 insertions(+), 7 deletions(-) delete mode 100644 tests/test_model_require_pkgs.py diff --git a/tests/test_awq_rotary_device.py b/tests/test_awq_rotary_device.py index c6ac05f89..0274bac03 100644 --- a/tests/test_awq_rotary_device.py +++ b/tests/test_awq_rotary_device.py @@ -52,7 +52,9 @@ def _make_processor(rotary: nn.Module) -> AWQProcessor: calibration_concat_size=None, calibration_sort=None, batch_size=1, - gptq_model=None, + gptq_model=types.SimpleNamespace( + rotary_embedding=None, + ), model=model, require_fwd=True, calculate_w_wq_diff=False, diff --git a/tests/test_awq_weight_mean.py b/tests/test_awq_weight_mean.py index a6a9e5530..d64ff323a 100644 --- a/tests/test_awq_weight_mean.py +++ b/tests/test_awq_weight_mean.py @@ -1,5 +1,5 @@ import os - +import types os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["PYTORCH_ALLOC_CONF"] = "expandable_segments:True,max_split_size_mb:256,garbage_collection_threshold:0.7" #"expandable_segments:True" @@ -95,7 +95,9 @@ def __init__(self, qcfg: QuantizeConfig): calibration_concat_size=None, calibration_sort=None, batch_size=1, - gptq_model=None, + gptq_model=types.SimpleNamespace( + rotary_embedding=None, + ), model=None, require_fwd=True, calculate_w_wq_diff=False, diff --git a/tests/test_model_require_pkgs.py b/tests/test_model_require_pkgs.py deleted file mode 100644 index 8be8534ce..000000000 --- a/tests/test_model_require_pkgs.py +++ /dev/null @@ -1,4 +0,0 @@ -# SPDX-FileCopyrightText: 2024-2025 ModelCloud.ai -# SPDX-License-Identifier: Apache-2.0 -# Contact: qubitium@modelcloud.ai, x.com/qubitium - From c7c0d1306f58ee85ef1d99a4b4bdb5bc6b25b937 Mon Sep 17 00:00:00 2001 From: ZX-ModelCloud Date: Thu, 12 Mar 2026 15:48:43 +0800 Subject: [PATCH 2/4] fix test_modelscope.py Signed-off-by: ZX-ModelCloud --- tests/test_modelscope.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_modelscope.py b/tests/test_modelscope.py index f41cca858..03b6b2e3a 100644 --- a/tests/test_modelscope.py +++ b/tests/test_modelscope.py @@ -21,7 +21,7 @@ def setUpClass(self): def test_load_modelscope(self): model = GPTQModel.load(self.MODEL_ID) - result = model.generate("The capital of France is")[0] + result = model.generate("The capital city of France is named")[0] str_output = model.tokenizer.decode(result) assert "paris" in str_output.lower() or "city" in str_output.lower() From d00596548f838c41ee77ef9e1d2be7677d52aa42 Mon Sep 17 00:00:00 2001 From: ZX-ModelCloud Date: Thu, 12 Mar 2026 16:49:47 +0800 Subject: [PATCH 3/4] fix test_model.py Signed-off-by: ZX-ModelCloud --- tests/test_model.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_model.py b/tests/test_model.py index e2409f0fc..21d340e1b 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -433,6 +433,7 @@ def test_model_save_with_non_persistent_buffer(self, offload_to_disk): def test_moe(self): quantize_config = QuantizeConfig( failsafe=None, + offload_to_disk=False ) model = GPTQModel.load( From 3ca5d4dbfd1e573b52ff66ff0129b149344deac4 Mon Sep 17 00:00:00 2001 From: ZX-ModelCloud Date: Thu, 12 Mar 2026 16:55:01 +0800 Subject: [PATCH 4/4] update defuser version Signed-off-by: ZX-ModelCloud --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index a5c0969d1..825054515 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,4 +21,4 @@ pyarrow>=21.0 dill>=0.3.8 torchao>=0.14.1 kernels>=0.12.2 -defuser>=0.0.6 +defuser>=0.0.7