Extend LoRA adapter tests with transpose scenarios and skip unsupported cases

Shehrozkashif · Shehrozkashif · commit f6aa62b0816b · 2026-02-14T23:34:02.000+05:00
diff --git a/src/nncf/common/tensor_statistics/statistics.py b/src/nncf/common/tensor_statistics/statistics.py
@@ -270,21 +270,16 @@ def __eq__(self, other: Any) -> bool:
         shapes_equal = all(self.shape_values[i] == other.shape_values[i] for i in range(len(self.mean_values)))
         if not shapes_equal:
             return False
-        mean_values_equal = all(
-            fns.allclose(self.mean_values[i], other.mean_values[i]) for i in range(len(self.mean_values))
-        )
-        return mean_values_equal
+        return all(fns.allclose(self.mean_values[i], other.mean_values[i]) for i in range(len(self.mean_values)))
 
     def _get_serialized_data(self) -> dict[str, Tensor]:
-        backend = self.mean_values[0].backend
-        device = self.mean_values[0].device
         return {
             self.MEAN_STAT: fns.stack(self.mean_values),
             self.SHAPE_STAT: fns.tensor(
                 self.shape_values,
-                backend=backend,
+                backend=self.mean_values[0].backend,
                 dtype=TensorDataType.int32,
-                device=device,
+                device=self.mean_values[0].device,
             ),
         }
 
diff --git a/src/nncf/quantization/algorithms/weight_compression/algorithm.py b/src/nncf/quantization/algorithms/weight_compression/algorithm.py
@@ -1152,11 +1152,6 @@ def apply_with_parameters(
                 )
 
             if self._lora_correction:
-                for wc_params in all_weight_params:
-                    if self._backend_entity.matmul_has_transposed_activations(wc_params.node_with_weight, graph):
-                        msg = "Transposed activations are not supported yet for the LoRa correction algorithm"
-                        raise nncf.UnsupportedModelError(msg)
-
                 lora_correction_params = self._advanced_parameters.lora_correction_params
                 lora_correction_algo = LoraCorrectionAlgorithm(statistics, lora_correction_params)
                 description += " with correction of low-rank adapters"
@@ -1370,7 +1365,7 @@ def _get_statistics_for_weights_compression(
         # Where mean_value is a 1D tensor representing an activation reduced over batch and sequence length dimensions,
         # shape is an original shape of an activation before reduction, n is the size of the dataset (or subset_size).
         statistics = {}
-        for (act_node, output_port_id, _), matmul_nodes in matmul_input_to_output_nodes_map.items():
+        for (act_node, output_port_id, _act_channel_axis), matmul_nodes in matmul_input_to_output_nodes_map.items():
             tensor_collectors = list(
                 statistic_points.get_algo_statistics_for_node(
                     act_node.node_name,
diff --git a/src/nncf/quantization/algorithms/weight_compression/lora_correction.py b/src/nncf/quantization/algorithms/weight_compression/lora_correction.py
@@ -108,14 +108,19 @@ def is_applicable(self, wc_params: WeightCompressionParameters):
         return wc_params.compression_config.num_bits == 4
 
     def calculate_adapters(
-        self, weight: Tensor, compressed_weight: CompressedWeight, wc_params: WeightCompressionParameters
+        self,
+        weight: Tensor,
+        compressed_weight: CompressedWeight,
+        wc_params: WeightCompressionParameters,
+        act_ch_axis: int,
     ) -> tuple[Tensor, Tensor, list[float]]:
         """
         Calculates low rank matrices for a given original and compressed weights.
 
         :param weight: original floating-point weight matrix.
         :param compressed_weight: compressed weight matrix.
         :param wc_params: parameters of weight compression.
+        :param act_ch_axis: axis number of the activation tensor which correspond to it channel.
         :return: two low rank matrices in the order of execution of corresponding linear layers.
         """
         layer_name = wc_params.node_with_weight.node_name
@@ -128,6 +133,7 @@ def calculate_adapters(
             wc_params.reduction_axes,
             self._lora_correction_params,
             layer_statistics,
+            act_ch_axis,
             is_debug,
         )
         if is_debug:
@@ -142,6 +148,7 @@ def calculate_low_rank_matrices(
         reduction_axes: tuple[int, ...],
         lora_correction_params: AdvancedLoraCorrectionParameters,
         layer_statistics: WCTensorStatistic,
+        act_ch_axis: int,
         is_debug: Optional[bool] = False,
     ):
         """
@@ -157,6 +164,7 @@ def calculate_low_rank_matrices(
         :param reduction_axes: axes along which different statistics reduced.
         :param lora_correction_params: parameters to configure the algorithm.
         :param layer_statistics: an object containing statistics for the layer.
+        :param act_ch_axis: axis number of the activation tensor which correspond to it channel.
         :param is_debug: whether to collect debug information, defaults to False.
         :return: two low rank matrices in the order of execution of corresponding linear layers and list of mean noises.
             Noises are collected from each step of the algorithm if debug was enabled.
@@ -194,9 +202,6 @@ def calculate_low_rank_matrices(
             svd_residual = fns.transpose(svd_residual)
         residual = svd_residual.clone()  # [H, O]
 
-        # Get the activation channel axis
-        act_ch_axis = getattr(layer_statistics, "act_ch_axis", -1)  # default to last axis
-
         # Pass it to process_stats
         s, X = process_stats(layer_statistics, subset_size, act_ch_axis)
 
diff --git a/src/nncf/quantization/algorithms/weight_compression/openvino_backend.py b/src/nncf/quantization/algorithms/weight_compression/openvino_backend.py
@@ -206,7 +206,8 @@ def insert_adapters(
             A_W = opset.constant(lora_A.data)
             B_W = opset.constant(lora_B.data)
 
-        A_MM = opset.matmul(input_node, A_W, transpose_a=False, transpose_b=True)
+        transpose_a = wc_params.node_with_weight.layer_attributes.input_attributes["transpose"]
+        A_MM = opset.matmul(input_node, A_W, transpose_a=transpose_a, transpose_b=True)
         B_MM = opset.matmul(A_MM, B_W, transpose_a=False, transpose_b=True)
 
         node_output_port = mm_node.output(0)
@@ -349,7 +350,15 @@ def transform_model(
                 compressed_weight.tensor = compressed_weight.tensor.as_numpy_tensor()
                 if compressed_weight.zero_point is not None:
                     compressed_weight.zero_point = compressed_weight.zero_point.as_numpy_tensor()
-                adapters = lora_correction_algo.calculate_adapters(weight, compressed_weight, wc_params)
+
+                activation_port_id = self.get_activation_port_id(wc_params.node_with_weight, graph)
+                activation_edge = graph.get_input_edge_by_port_id(wc_params.node_with_weight, activation_port_id)
+                activation_shape = activation_edge.tensor_shape
+                act_ch_axis = self.get_activation_channel_axis(
+                    wc_params.node_with_weight, activation_port_id, activation_shape
+                )
+
+                adapters = lora_correction_algo.calculate_adapters(weight, compressed_weight, wc_params, act_ch_axis)
                 self.insert_adapters(wc_params, *adapters, int8_lora=lora_correction_algo.use_int8_adapters)
         self.name_to_node_mapping = None
 
diff --git a/tests/openvino/native/quantization/test_weights_compression.py b/tests/openvino/native/quantization/test_weights_compression.py
@@ -1612,12 +1612,25 @@ def test_call_max_var_criterion_with_dataset_gptq_neg_group_size(mode):
 
 
 @pytest.mark.parametrize(
-    "params, transpose_b",
-    ((None, True), (LoraParams(adapter_rank=4, use_int8_adapters=False), False)),
+    "params, transpose_a, transpose_b",
+    (
+        (None, False, True),  # original
+        (LoraParams(adapter_rank=4, use_int8_adapters=False), False, False),  # original
+        pytest.param(
+            LoraParams(adapter_rank=4, use_int8_adapters=False),
+            True,
+            False,
+        ),
+        pytest.param(
+            LoraParams(adapter_rank=8, use_int8_adapters=True),
+            True,
+            True,
+        ),
+    ),
 )
-def test_lora_adapters_in_the_graph(params, transpose_b):
+def test_lora_adapters_in_the_graph(params, transpose_a, transpose_b):
     advanced_parameters = CompressionParams() if params is None else CompressionParams(lora_correction_params=params)
-    model = LMLinearModel(transpose_b=transpose_b)
+    model = LMLinearModel(transpose_a=transpose_a, transpose_b=transpose_b)
     ov_model = model.ov_model
     dataset = Dataset(np.ones(inp.shape) for inp in ov_model.inputs)
 
@@ -2410,7 +2423,7 @@ def test_scale_estimation(self, mocker, is_moe, check_sampling_activation_stats_
     def test_awq_with_ignored_scope(self, mocker, is_3d_weights):
         return super().test_awq_with_ignored_scope(mocker, is_3d_weights)
 
-    # Transpose inputs does not affect mergable pattern code, skippting (True, False)
+    # Transpose inputs does not affect mergable pattern code
     @pytest.mark.parametrize("transpose_a,non_mergable_pattern", [(True, True), (False, True), (False, False)])
     @pytest.mark.parametrize(
         "is_3d_weights", [False, pytest.param(True, marks=pytest.mark.xfail(reason="Ticket - 176465"))]
@@ -2608,3 +2621,39 @@ def test_awq_scale_ref() -> list[dict[str, Tensor]]:
     @pytest.fixture
     def transpose_a_supported(self) -> bool:
         return True
+
+    @pytest.mark.parametrize(
+        "kwargs",
+        [
+            dict(scale_estimation=True),
+            dict(
+                gptq=True,
+                advanced_parameters=CompressionParams(gptq_params=GPTQParams(subset_size=2)),
+            ),
+        ],
+    )
+    def test_compression_skipped_with_transposed_activations(self, transpose_a_supported, kwargs):
+        if not transpose_a_supported:
+            pytest.skip("transpose_a is not supported for the current backend")
+        if kwargs.get("scale_estimation", False) and "scale_estimation" in self.get_not_supported_algorithms():
+            pytest.skip("Scale estimation is not supported")
+        if kwargs.get("gptq", False) and "gptq" in self.get_not_supported_algorithms():
+            pytest.skip("GPTQ is not supported")
+
+        INPUT_SHAPE = (2, 4)
+        model = self.get_transposable_awq_model(transpose_a=True, transpose_b=True, input_shape=INPUT_SHAPE)
+        input = 0.01 * np.arange(0, np.multiply.reduce(INPUT_SHAPE), dtype=np.float32).reshape(INPUT_SHAPE) + 0.02
+        input = self.to_tensor(input)
+        dataset = Dataset([input] * 2, self.get_transform_func())
+
+        with pytest.raises(nncf.UnsupportedModelError):
+            compress_weights(
+                model,
+                mode=CompressWeightsMode.INT4_SYM,
+                ratio=1.0,
+                group_size=1,
+                subset_size=2,
+                dataset=dataset,
+                all_layers=True,
+                **kwargs,
+            )