Fix transpose_a support in LoRA Correction: remove getattr bug and wrong reduction_axis override

Shehrozkashif · claude · Shehrozkashif · commit 5cbe6072eafa · 2026-04-22T02:11:44.000+05:00
- Replace `getattr(node, "transpose_a", False)` (always returned False since
  NNCFNode has no such attribute) with proper access via
  `layer_attributes.input_attributes["transpose"]`, then remove the now-unused
  `transpose_a_flag` and the `transpose_a` parameter from `calculate_low_rank_matrices`.
- Remove the `if transpose_a and reduction_axis != -1: reduction_axis = 1` block
  which would have incorrectly overridden the H-axis group-quantization index
  (e.g. setting it to 1 for a [H, O] weight where H is at axis 0).
- Revert the unrelated inlining of `backend`/`device` locals in
  `WCTensorStatistic._get_serialized_data` to keep the diff focused.
- Fix the `process_stats` docstring to accurately describe the new
  `transpose_a` parameter and the two possible return layouts.

Co-Authored-By: Claude Sonnet 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/src/nncf/common/tensor_statistics/statistics.py b/src/nncf/common/tensor_statistics/statistics.py
@@ -276,13 +276,15 @@ def __eq__(self, other: Any) -> bool:
         return mean_values_equal
 
     def _get_serialized_data(self) -> dict[str, Tensor]:
+        backend = self.mean_values[0].backend
+        device = self.mean_values[0].device
         return {
             self.MEAN_STAT: fns.stack(self.mean_values),
             self.SHAPE_STAT: fns.tensor(
                 self.shape_values,
-                backend=self.mean_values[0].backend,
+                backend=backend,
                 dtype=TensorDataType.int32,
-                device=self.mean_values[0].device,
+                device=device,
             ),
         }
 
diff --git a/src/nncf/quantization/algorithms/weight_compression/activation_stats.py b/src/nncf/quantization/algorithms/weight_compression/activation_stats.py
@@ -29,9 +29,11 @@ def process_stats(
     :param stats: An object containing statistics for the layer.
     :param subset_size: The number of samples for AWQ. If subset_size <= 0, all samples are used.
     :param act_ch_axis: The activation channel axis.
+    :param transpose_a: When True, returns X in [SampleSize, HiddenDim] layout instead of the default
+        [HiddenDim, SampleSize]. Used by LoRA Correction which requires samples as rows.
     :return: tuple of the following tensors:
-        s - maximum channel magnitude across samples [HiddenDim]
-        X - average channel magnitude across tokens in the sequence [HiddenDim, min(SampleSize, ~subset_size)]
+        s - maximum channel magnitude across samples, shape [HiddenDim]
+        X - activation matrix, shape [HiddenDim, SampleSize] normally or [SampleSize, HiddenDim] if transpose_a=True
     """
     X = fns.stack(
         stats.mean_values
diff --git a/src/nncf/quantization/algorithms/weight_compression/lora_correction.py b/src/nncf/quantization/algorithms/weight_compression/lora_correction.py
@@ -124,7 +124,6 @@ def calculate_adapters(
         layer_name = wc_params.node_with_weight.node_name
         layer_statistics = self._statistics[layer_name]
         is_debug = self._debug_interface is not None
-        transpose_a_flag = getattr(wc_params.node_with_weight, "transpose_a", False)
         lora_A, lora_B, mean_noises = self.calculate_low_rank_matrices(
             weight,
             compressed_weight,
@@ -134,7 +133,6 @@ def calculate_adapters(
             layer_statistics,
             act_ch_axis,
             is_debug,
-            transpose_a=transpose_a_flag,
         )
         if is_debug:
             self._debug_interface.add_noises(layer_name, mean_noises)
@@ -150,7 +148,6 @@ def calculate_low_rank_matrices(
         layer_statistics: WCTensorStatistic,
         act_ch_axis: int,
         is_debug: bool | None = False,
-        transpose_a: bool = False,
     ):
         """
         Calculates low rank matrices for a given original and compressed weights.
@@ -185,9 +182,6 @@ def calculate_low_rank_matrices(
         else:
             reduction_axis = -1
 
-        if transpose_a and reduction_axis != -1:
-            reduction_axis = 1
-
         if mode in (CompressWeightsMode.INT4_SYM, CompressWeightsMode.INT4_ASYM):
             fq_weights = do_integer_dequantization(
                 compressed_weight,