Tencent
diff --git a/‎angelslim/compressor/quant/ptq.py‎
Lines changed: 11 additions & 0 deletions b/‎angelslim/compressor/quant/ptq.py‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎angelslim/compressor/transform/__init__.py‎
Lines changed: 5 additions & 0 deletions b/‎angelslim/compressor/transform/__init__.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎angelslim/compressor/transform/base.py‎
Lines changed: 50 additions & 0 deletions b/‎angelslim/compressor/transform/base.py‎
Lines changed: 50 additions & 0 deletions
diff --git a/‎angelslim/compressor/transform/factory.py‎
Lines changed: 117 additions & 0 deletions b/‎angelslim/compressor/transform/factory.py‎
Lines changed: 117 additions & 0 deletions
diff --git a/‎angelslim/compressor/transform/rotation/__init__.py‎
Lines changed: 3 additions & 0 deletions b/‎angelslim/compressor/transform/rotation/__init__.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎angelslim/compressor/transform/rotation/fuse_norm_utils.py‎
Lines changed: 77 additions & 0 deletions b/‎angelslim/compressor/transform/rotation/fuse_norm_utils.py‎
Lines changed: 77 additions & 0 deletions
@@ -22,6 +22,7 @@
 
 from ...utils import find_parent_layer_and_sub_name, print_info
 from ..compressor_factory import CompressorFactory
+from ..transform import TransformFactory
 from .core import PTQHook
 from .modules import AWQ, FP8, GPTQ, INT8, NVFP4, W4A8INT8, LeptoFP8, SmoothQuant
 
@@ -36,14 +37,24 @@ def __init__(self, model, slim_config=None):
             model(nn.Moudle, required): the model to be quant.
             slim_config(dict, required): the configuration for quantization.
                 - compress_config: the configuration for compression.
+                - transform_config: the configuration for transform.
                 - global_config: the global configuration for the model.
         """
         self.quant_model = model
         # init ptq config of model
         self.quant_model.init_ptq(slim_config)
         self.absolute_model_path = slim_config["global_config"].absolute_model_path
         self.quant_algo = self.quant_model.quant_config.quant_algo
+
+        # init transform
+        # TODO(gavinlee) will be deprecated, and move to transform, now only for smoothquant
         self.quant_helpers = self.quant_model.quant_config.quant_helpers
+
+        # create transform, for example, smoothquant
+        self.trasform_runner = TransformFactory.create(self.quant_model, slim_config)
+        # trasform first, then run quantization
+        self.trasform_runner.run()
+
         if "fp8" in self.quant_algo or "int8" in self.quant_algo or "nvfp4" in self.quant_algo:
             # Add ptq observer hook
             self.ptq_hook = PTQHook(self.quant_model)
 
@@ -0,0 +1,5 @@
+from .base import TransformBase
+from .factory import TransformFactory
+from .rotation.spin import SpinQuant
+
+__all__ = ["TransformBase", "TransformFactory", "SpinQuant"]
@@ -0,0 +1,50 @@
+# Copyright 2025 Tencent Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from abc import ABC, abstractmethod
+
+__all__ = ["TransformBase"]
+
+
+class TransformBase(ABC):
+    """Abstract base class for model weight transforms (e.g. SpinQuant).
+
+    Subclasses must implement `run()`. The lifecycle is:
+        1. TransformFactory.create(quant_model, quant_config)  -> TransformBase
+        2. transform.run()      - apply transform (PTQ: fuse into weights)
+        3. transform.convert()  - fuse hooks into weights after QAT training (optional)
+        4. transform.save()     - save transformed model (optional)
+    """
+
+    def __init__(self, quant_model, quant_config):
+        self.quant_model = quant_model
+        self.config = quant_config
+
+    @abstractmethod
+    def run(self):
+        """Apply the transform to the model weights."""
+
+    def convert(self, **kwargs):
+        """Fuse online rotation hooks into weights after QAT training.
+
+        Override in subclasses that support QAT mode.
+        """
+        raise NotImplementedError(f"{type(self).__name__} does not implement convert()")
+
+    def save(self, save_path: str = None):
+        """Save the transformed model.
+
+        Override in subclasses to implement actual saving logic.
+        """
+        raise NotImplementedError(f"{type(self).__name__} does not implement save()")
@@ -0,0 +1,117 @@
+# Copyright 2025 Tencent Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .base import TransformBase
+
+__all__ = ["TransformFactory"]
+
+
+class _NoOpTransform(TransformBase):
+    """No-op transform returned when slim_config has no transform_config."""
+
+    def __init__(self, quant_model, slim_config=None):
+        # slim_config may be a dict (PTQ path), skip TransformBase.__init__ attribute assignment
+        self.quant_model = quant_model
+        self.config = slim_config
+
+    def run(self):
+        pass
+
+
+class TransformFactory:
+    """Factory for creating TransformBase instances from config.
+
+    Usage
+    -----
+        transform = TransformFactory.create(slim_model, slim_config)
+        transform.run()
+
+    The transform name is read from ``slim_config.transform_config["name"]``,
+    which corresponds to the ``transform.name`` field in the YAML config:
+
+        transform:
+          name: SpinQuant
+          spin_config: ...
+
+    Registering a new transform
+    ---------------------------
+        @TransformFactory.register("MyTransform")
+        class MyTransform(TransformBase):
+            ...
+    """
+
+    _registry: dict[str, type[TransformBase]] = {}
+
+    @classmethod
+    def create(cls, quant_model, slim_config) -> TransformBase:
+        """Instantiate a transform from slim_config.
+
+        Args:
+            quant_model: The wrapped slim model.
+            slim_config: Config object with a ``transform_config`` dict containing ``"name"``.
+
+        Returns:
+            An unrun TransformBase instance. Call ``.run()`` to apply the transform.
+
+        Raises:
+            ValueError: If transform name is missing or not registered.
+        """
+        # slim_config may be a dict (PTQ path) or an object with attributes (transform path)
+        if isinstance(slim_config, dict):
+            transform_config = slim_config.get("transform_config")
+        else:
+            transform_config = getattr(slim_config, "transform_config", None)
+
+        if not transform_config:
+            return _NoOpTransform(quant_model, slim_config)
+
+        name = (
+            transform_config.get("name")
+            if isinstance(transform_config, dict)
+            else getattr(transform_config, "name", None)
+        )
+        if not name:
+            return _NoOpTransform(quant_model, slim_config)
+
+        if name not in cls._registry:
+            available = list(cls._registry.keys())
+            raise ValueError(f"Unknown transform '{name}'. Available: {available}")
+
+        return cls._registry[name](quant_model, slim_config)
+
+    @classmethod
+    def register(cls, name: str):
+        """Decorator to register a TransformBase subclass under the given name.
+
+        Args:
+            name: The string key used in YAML ``transform.name``.
+
+        Example:
+            @TransformFactory.register("MyTransform")
+            class MyTransform(TransformBase):
+                ...
+        """
+
+        def decorator(cls_):
+            if not issubclass(cls_, TransformBase):
+                raise TypeError(f"{cls_.__name__} must be a subclass of TransformBase")
+            cls._registry[name] = cls_
+            return cls_
+
+        return decorator
+
+    @classmethod
+    def list_transforms(cls) -> list[str]:
+        """Return names of all registered transforms."""
+        return list(cls._registry.keys())
@@ -0,0 +1,3 @@
+from .spin import SpinQuant
+
+__all__ = ["SpinQuant"]
@@ -0,0 +1,77 @@
+# coding=utf-8
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+# This code is based on QuaRot(https://github.com/spcl/QuaRot/tree/main/quarot).
+# Licensed under Apache License 2.0.
+
+import typing
+
+import torch
+
+
+@torch.no_grad()
+def center_embeddings(embedding: torch.nn.Module):
+    """
+    Shift each embedding to have a mean of zero
+
+    :param embedding: embedding module containing embeddings to center
+    """
+    if not hasattr(embedding, "weight"):
+        raise ValueError(f"Cannot fuse norm of type {type(embedding)}")
+
+    weight_dtype = embedding.weight.dtype
+    weight = embedding.weight.to(torch.float64)
+    new_weight = weight - weight.mean(dim=-1, keepdim=True)
+    new_weight = new_weight.to(weight_dtype)
+    embedding.weight.data = new_weight
+
+
+# [TODO] check this function correct or not
+@torch.no_grad()
+def bake_mean_into_linear(linear: torch.nn.Linear) -> None:
+    """
+    This function takes a linear layer and subtracts the means from the
+    weights and biases. This will result in the linear layer performing
+    the mean substitution which is usually done inside layernorm.
+    """
+    linear_dtype = linear.weight.dtype
+    W_ = linear.weight.data.double()
+    linear.weight.data = W_ - W_.mean(dim=-2, keepdim=True)
+    linear.weight.data = linear.weight.data.to(linear_dtype)
+    if linear.bias is not None:
+        b_ = linear.bias.data.double()
+        linear.bias.data = b_ - b_.mean()
+        linear.bias.data = linear.bias.data.to(linear_dtype)
+
+
+@torch.no_grad()
+def fuse_ln_linear(
+    layernorm: torch.nn.Module, linear_layers: typing.Iterable[torch.nn.Linear]
+) -> None:
+    """
+    fuse the linear operations in Layernorm into the adjacent linear blocks.
+    """
+    for linear in linear_layers:
+        linear_dtype = linear.weight.dtype
+
+        # Calculating new weight and bias
+        W_ = linear.weight.data.double()
+        linear.weight.data = (W_ * layernorm.weight.double()).to(linear_dtype)
+
+        if hasattr(layernorm, "bias"):
+            if linear.bias is None:
+                linear.bias = torch.nn.Parameter(
+                    torch.zeros(linear.out_features, dtype=torch.float64)
+                )
+            linear.bias.data = linear.bias.data.double() + torch.matmul(
+                W_, layernorm.bias.double()
+            )
+            linear.bias.data = linear.bias.data.to(linear_dtype)
+
+    if hasattr(layernorm, "bias"):
+        layernorm.bias.data = torch.zeros_like(layernorm.bias.data)
+    layernorm.weight.data = torch.ones_like(layernorm.weight.data)
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+from .spin import SpinQuant`
	`2`	`+`
	`3`	`+__all__ = ["SpinQuant"]`