From 8947e96143faa1dfe16b8a5918094416d78ebb86 Mon Sep 17 00:00:00 2001 From: Luka Dojcinovic <56648891+Luka-D@users.noreply.github.com> Date: Thu, 5 Jun 2025 16:48:24 -0400 Subject: [PATCH 1/4] fix: Update QuantLinear import to GPTQLoraLinear Renaming QuantLinear to GPTQLoraLinear to match the changes made in the peft library. Signed-off-by: Luka Dojcinovic <56648891+Luka-D@users.noreply.github.com> --- .../src/fms_acceleration_peft/autogptq_utils.py | 4 ++-- .../src/fms_acceleration_peft/gptqmodel/utils/peft.py | 4 ++-- plugins/fused-ops-and-kernels/tests/test_fused_ops.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/plugins/accelerated-peft/src/fms_acceleration_peft/autogptq_utils.py b/plugins/accelerated-peft/src/fms_acceleration_peft/autogptq_utils.py index d0b01428..c073b82e 100644 --- a/plugins/accelerated-peft/src/fms_acceleration_peft/autogptq_utils.py +++ b/plugins/accelerated-peft/src/fms_acceleration_peft/autogptq_utils.py @@ -26,7 +26,7 @@ ModelPatcherTrigger, ) from peft import LoraConfig -from peft.tuners.lora.gptq import QuantLinear as LoraLinearGPTQ +from peft.tuners.lora.gptq import GPTQLoraLinear import torch # these parameters are to be patched for triton v2 @@ -162,7 +162,7 @@ def create_new_module_peft( # to be installed new_module = None if isinstance(target, target_cls): - new_module = LoraLinearGPTQ( + new_module = GPTQLoraLinear( target, adapter_name, lora_config=lora_config, **kwargs ) diff --git a/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/utils/peft.py b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/utils/peft.py index a6fd4b15..c73a1d8d 100644 --- a/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/utils/peft.py +++ b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/utils/peft.py @@ -30,7 +30,7 @@ from peft.mapping import PEFT_TYPE_TO_CONFIG_MAPPING from peft.peft_model import PEFT_TYPE_TO_MODEL_MAPPING from peft.tuners.lora import LoraConfig, LoraModel -from peft.tuners.lora.gptq import QuantLinear as LoraLinearGPTQ +from peft.tuners.lora.gptq import GPTQLoraLinear import torch # Local @@ -68,7 +68,7 @@ def _create_new_module( # to be installed new_module = None if isinstance(target, target_cls): - new_module = LoraLinearGPTQ( + new_module = GPTQLoraLinear( target, adapter_name, lora_config=lora_config, **kwargs ) diff --git a/plugins/fused-ops-and-kernels/tests/test_fused_ops.py b/plugins/fused-ops-and-kernels/tests/test_fused_ops.py index b7ee56d1..4b607bf1 100644 --- a/plugins/fused-ops-and-kernels/tests/test_fused_ops.py +++ b/plugins/fused-ops-and-kernels/tests/test_fused_ops.py @@ -28,7 +28,7 @@ if _is_package_available("auto_gptq"): # pylint: disable=ungrouped-imports # Third Party - from peft.tuners.lora.gptq import QuantLinear as LoraGPTQLinear4bit + from peft.tuners.lora.gptq import GPTQLoraLinear as LoraGPTQLinear4bit LORA_QUANTIZED_CLASSES[GPTQ] = LoraGPTQLinear4bit From accf4dfb83a9a239d875463d01aaf2091d424d84 Mon Sep 17 00:00:00 2001 From: Luka Dojcinovic <56648891+Luka-D@users.noreply.github.com> Date: Fri, 6 Jun 2025 13:26:01 -0400 Subject: [PATCH 2/4] fix: Specified peft version Updated dependencies to include peft>=0.15 Signed-off-by: Luka Dojcinovic <56648891+Luka-D@users.noreply.github.com> --- plugins/accelerated-peft/pyproject.toml | 3 +++ plugins/fused-ops-and-kernels/pyproject.toml | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/plugins/accelerated-peft/pyproject.toml b/plugins/accelerated-peft/pyproject.toml index 6513087c..a04ae1e2 100644 --- a/plugins/accelerated-peft/pyproject.toml +++ b/plugins/accelerated-peft/pyproject.toml @@ -23,6 +23,9 @@ classifiers=[ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", ] +dependencies = [ +"peft>=0.15", +] [project.optional-dependencies] flash-attn = ["flash-attn"] diff --git a/plugins/fused-ops-and-kernels/pyproject.toml b/plugins/fused-ops-and-kernels/pyproject.toml index 3848e2b8..4b9e7ff9 100644 --- a/plugins/fused-ops-and-kernels/pyproject.toml +++ b/plugins/fused-ops-and-kernels/pyproject.toml @@ -22,7 +22,9 @@ classifiers=[ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", ] -dependencies = [] +dependencies = [ +"peft>=0.15", +] [tool.hatch.build.targets.wheel] only-include = ["src/fms_acceleration_foak"] From 55a9e08f6ff10e211841255b9553d0907b067206 Mon Sep 17 00:00:00 2001 From: Will Johnson Date: Tue, 10 Jun 2025 09:46:59 -0400 Subject: [PATCH 3/4] peft lower limit Signed-off-by: Will Johnson --- plugins/accelerated-peft/pyproject.toml | 2 +- plugins/framework/pyproject.toml | 2 +- plugins/fused-ops-and-kernels/pyproject.toml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/plugins/accelerated-peft/pyproject.toml b/plugins/accelerated-peft/pyproject.toml index a04ae1e2..93e84012 100644 --- a/plugins/accelerated-peft/pyproject.toml +++ b/plugins/accelerated-peft/pyproject.toml @@ -24,7 +24,7 @@ classifiers=[ "Programming Language :: Python :: 3.11", ] dependencies = [ -"peft>=0.15", + "peft>=0.15", ] [project.optional-dependencies] diff --git a/plugins/framework/pyproject.toml b/plugins/framework/pyproject.toml index 157c4ada..e46513b9 100644 --- a/plugins/framework/pyproject.toml +++ b/plugins/framework/pyproject.toml @@ -24,7 +24,7 @@ classifiers=[ dependencies = [ "numpy<2.0", # numpy needs to be bounded due to incompatiblity with current torch<2.3 "torch>2.2", - "peft<=0.14.0", # QuantLinear is not available for peft version > 0.14.0 + "peft>=0.15.0", "accelerate", "pandas", ] diff --git a/plugins/fused-ops-and-kernels/pyproject.toml b/plugins/fused-ops-and-kernels/pyproject.toml index 4b9e7ff9..5835b991 100644 --- a/plugins/fused-ops-and-kernels/pyproject.toml +++ b/plugins/fused-ops-and-kernels/pyproject.toml @@ -23,7 +23,7 @@ classifiers=[ "Programming Language :: Python :: 3.11", ] dependencies = [ -"peft>=0.15", + "peft>=0.15", ] [tool.hatch.build.targets.wheel] From 19003e76ada516a13da21effdd802c8e7ef3962a Mon Sep 17 00:00:00 2001 From: Will Johnson Date: Tue, 10 Jun 2025 09:57:11 -0400 Subject: [PATCH 4/4] fix: remove deps from plugins Signed-off-by: Will Johnson --- plugins/accelerated-peft/pyproject.toml | 3 --- plugins/fused-ops-and-kernels/pyproject.toml | 4 +--- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/plugins/accelerated-peft/pyproject.toml b/plugins/accelerated-peft/pyproject.toml index 93e84012..6513087c 100644 --- a/plugins/accelerated-peft/pyproject.toml +++ b/plugins/accelerated-peft/pyproject.toml @@ -23,9 +23,6 @@ classifiers=[ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", ] -dependencies = [ - "peft>=0.15", -] [project.optional-dependencies] flash-attn = ["flash-attn"] diff --git a/plugins/fused-ops-and-kernels/pyproject.toml b/plugins/fused-ops-and-kernels/pyproject.toml index 5835b991..3848e2b8 100644 --- a/plugins/fused-ops-and-kernels/pyproject.toml +++ b/plugins/fused-ops-and-kernels/pyproject.toml @@ -22,9 +22,7 @@ classifiers=[ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", ] -dependencies = [ - "peft>=0.15", -] +dependencies = [] [tool.hatch.build.targets.wheel] only-include = ["src/fms_acceleration_foak"]