Skip to content

Commit f6aa62b

Browse files
committed
Extend LoRA adapter tests with transpose scenarios and skip unsupported cases
1 parent c6632b4 commit f6aa62b

5 files changed

Lines changed: 78 additions & 25 deletions

File tree

src/nncf/common/tensor_statistics/statistics.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -270,21 +270,16 @@ def __eq__(self, other: Any) -> bool:
270270
shapes_equal = all(self.shape_values[i] == other.shape_values[i] for i in range(len(self.mean_values)))
271271
if not shapes_equal:
272272
return False
273-
mean_values_equal = all(
274-
fns.allclose(self.mean_values[i], other.mean_values[i]) for i in range(len(self.mean_values))
275-
)
276-
return mean_values_equal
273+
return all(fns.allclose(self.mean_values[i], other.mean_values[i]) for i in range(len(self.mean_values)))
277274

278275
def _get_serialized_data(self) -> dict[str, Tensor]:
279-
backend = self.mean_values[0].backend
280-
device = self.mean_values[0].device
281276
return {
282277
self.MEAN_STAT: fns.stack(self.mean_values),
283278
self.SHAPE_STAT: fns.tensor(
284279
self.shape_values,
285-
backend=backend,
280+
backend=self.mean_values[0].backend,
286281
dtype=TensorDataType.int32,
287-
device=device,
282+
device=self.mean_values[0].device,
288283
),
289284
}
290285

src/nncf/quantization/algorithms/weight_compression/algorithm.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1152,11 +1152,6 @@ def apply_with_parameters(
11521152
)
11531153

11541154
if self._lora_correction:
1155-
for wc_params in all_weight_params:
1156-
if self._backend_entity.matmul_has_transposed_activations(wc_params.node_with_weight, graph):
1157-
msg = "Transposed activations are not supported yet for the LoRa correction algorithm"
1158-
raise nncf.UnsupportedModelError(msg)
1159-
11601155
lora_correction_params = self._advanced_parameters.lora_correction_params
11611156
lora_correction_algo = LoraCorrectionAlgorithm(statistics, lora_correction_params)
11621157
description += " with correction of low-rank adapters"
@@ -1370,7 +1365,7 @@ def _get_statistics_for_weights_compression(
13701365
# Where mean_value is a 1D tensor representing an activation reduced over batch and sequence length dimensions,
13711366
# shape is an original shape of an activation before reduction, n is the size of the dataset (or subset_size).
13721367
statistics = {}
1373-
for (act_node, output_port_id, _), matmul_nodes in matmul_input_to_output_nodes_map.items():
1368+
for (act_node, output_port_id, _act_channel_axis), matmul_nodes in matmul_input_to_output_nodes_map.items():
13741369
tensor_collectors = list(
13751370
statistic_points.get_algo_statistics_for_node(
13761371
act_node.node_name,

src/nncf/quantization/algorithms/weight_compression/lora_correction.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -108,14 +108,19 @@ def is_applicable(self, wc_params: WeightCompressionParameters):
108108
return wc_params.compression_config.num_bits == 4
109109

110110
def calculate_adapters(
111-
self, weight: Tensor, compressed_weight: CompressedWeight, wc_params: WeightCompressionParameters
111+
self,
112+
weight: Tensor,
113+
compressed_weight: CompressedWeight,
114+
wc_params: WeightCompressionParameters,
115+
act_ch_axis: int,
112116
) -> tuple[Tensor, Tensor, list[float]]:
113117
"""
114118
Calculates low rank matrices for a given original and compressed weights.
115119
116120
:param weight: original floating-point weight matrix.
117121
:param compressed_weight: compressed weight matrix.
118122
:param wc_params: parameters of weight compression.
123+
:param act_ch_axis: axis number of the activation tensor which correspond to it channel.
119124
:return: two low rank matrices in the order of execution of corresponding linear layers.
120125
"""
121126
layer_name = wc_params.node_with_weight.node_name
@@ -128,6 +133,7 @@ def calculate_adapters(
128133
wc_params.reduction_axes,
129134
self._lora_correction_params,
130135
layer_statistics,
136+
act_ch_axis,
131137
is_debug,
132138
)
133139
if is_debug:
@@ -142,6 +148,7 @@ def calculate_low_rank_matrices(
142148
reduction_axes: tuple[int, ...],
143149
lora_correction_params: AdvancedLoraCorrectionParameters,
144150
layer_statistics: WCTensorStatistic,
151+
act_ch_axis: int,
145152
is_debug: Optional[bool] = False,
146153
):
147154
"""
@@ -157,6 +164,7 @@ def calculate_low_rank_matrices(
157164
:param reduction_axes: axes along which different statistics reduced.
158165
:param lora_correction_params: parameters to configure the algorithm.
159166
:param layer_statistics: an object containing statistics for the layer.
167+
:param act_ch_axis: axis number of the activation tensor which correspond to it channel.
160168
:param is_debug: whether to collect debug information, defaults to False.
161169
:return: two low rank matrices in the order of execution of corresponding linear layers and list of mean noises.
162170
Noises are collected from each step of the algorithm if debug was enabled.
@@ -194,9 +202,6 @@ def calculate_low_rank_matrices(
194202
svd_residual = fns.transpose(svd_residual)
195203
residual = svd_residual.clone() # [H, O]
196204

197-
# Get the activation channel axis
198-
act_ch_axis = getattr(layer_statistics, "act_ch_axis", -1) # default to last axis
199-
200205
# Pass it to process_stats
201206
s, X = process_stats(layer_statistics, subset_size, act_ch_axis)
202207

src/nncf/quantization/algorithms/weight_compression/openvino_backend.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,8 @@ def insert_adapters(
206206
A_W = opset.constant(lora_A.data)
207207
B_W = opset.constant(lora_B.data)
208208

209-
A_MM = opset.matmul(input_node, A_W, transpose_a=False, transpose_b=True)
209+
transpose_a = wc_params.node_with_weight.layer_attributes.input_attributes["transpose"]
210+
A_MM = opset.matmul(input_node, A_W, transpose_a=transpose_a, transpose_b=True)
210211
B_MM = opset.matmul(A_MM, B_W, transpose_a=False, transpose_b=True)
211212

212213
node_output_port = mm_node.output(0)
@@ -349,7 +350,15 @@ def transform_model(
349350
compressed_weight.tensor = compressed_weight.tensor.as_numpy_tensor()
350351
if compressed_weight.zero_point is not None:
351352
compressed_weight.zero_point = compressed_weight.zero_point.as_numpy_tensor()
352-
adapters = lora_correction_algo.calculate_adapters(weight, compressed_weight, wc_params)
353+
354+
activation_port_id = self.get_activation_port_id(wc_params.node_with_weight, graph)
355+
activation_edge = graph.get_input_edge_by_port_id(wc_params.node_with_weight, activation_port_id)
356+
activation_shape = activation_edge.tensor_shape
357+
act_ch_axis = self.get_activation_channel_axis(
358+
wc_params.node_with_weight, activation_port_id, activation_shape
359+
)
360+
361+
adapters = lora_correction_algo.calculate_adapters(weight, compressed_weight, wc_params, act_ch_axis)
353362
self.insert_adapters(wc_params, *adapters, int8_lora=lora_correction_algo.use_int8_adapters)
354363
self.name_to_node_mapping = None
355364

tests/openvino/native/quantization/test_weights_compression.py

Lines changed: 54 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1612,12 +1612,25 @@ def test_call_max_var_criterion_with_dataset_gptq_neg_group_size(mode):
16121612

16131613

16141614
@pytest.mark.parametrize(
1615-
"params, transpose_b",
1616-
((None, True), (LoraParams(adapter_rank=4, use_int8_adapters=False), False)),
1615+
"params, transpose_a, transpose_b",
1616+
(
1617+
(None, False, True), # original
1618+
(LoraParams(adapter_rank=4, use_int8_adapters=False), False, False), # original
1619+
pytest.param(
1620+
LoraParams(adapter_rank=4, use_int8_adapters=False),
1621+
True,
1622+
False,
1623+
),
1624+
pytest.param(
1625+
LoraParams(adapter_rank=8, use_int8_adapters=True),
1626+
True,
1627+
True,
1628+
),
1629+
),
16171630
)
1618-
def test_lora_adapters_in_the_graph(params, transpose_b):
1631+
def test_lora_adapters_in_the_graph(params, transpose_a, transpose_b):
16191632
advanced_parameters = CompressionParams() if params is None else CompressionParams(lora_correction_params=params)
1620-
model = LMLinearModel(transpose_b=transpose_b)
1633+
model = LMLinearModel(transpose_a=transpose_a, transpose_b=transpose_b)
16211634
ov_model = model.ov_model
16221635
dataset = Dataset(np.ones(inp.shape) for inp in ov_model.inputs)
16231636

@@ -2410,7 +2423,7 @@ def test_scale_estimation(self, mocker, is_moe, check_sampling_activation_stats_
24102423
def test_awq_with_ignored_scope(self, mocker, is_3d_weights):
24112424
return super().test_awq_with_ignored_scope(mocker, is_3d_weights)
24122425

2413-
# Transpose inputs does not affect mergable pattern code, skippting (True, False)
2426+
# Transpose inputs does not affect mergable pattern code
24142427
@pytest.mark.parametrize("transpose_a,non_mergable_pattern", [(True, True), (False, True), (False, False)])
24152428
@pytest.mark.parametrize(
24162429
"is_3d_weights", [False, pytest.param(True, marks=pytest.mark.xfail(reason="Ticket - 176465"))]
@@ -2608,3 +2621,39 @@ def test_awq_scale_ref() -> list[dict[str, Tensor]]:
26082621
@pytest.fixture
26092622
def transpose_a_supported(self) -> bool:
26102623
return True
2624+
2625+
@pytest.mark.parametrize(
2626+
"kwargs",
2627+
[
2628+
dict(scale_estimation=True),
2629+
dict(
2630+
gptq=True,
2631+
advanced_parameters=CompressionParams(gptq_params=GPTQParams(subset_size=2)),
2632+
),
2633+
],
2634+
)
2635+
def test_compression_skipped_with_transposed_activations(self, transpose_a_supported, kwargs):
2636+
if not transpose_a_supported:
2637+
pytest.skip("transpose_a is not supported for the current backend")
2638+
if kwargs.get("scale_estimation", False) and "scale_estimation" in self.get_not_supported_algorithms():
2639+
pytest.skip("Scale estimation is not supported")
2640+
if kwargs.get("gptq", False) and "gptq" in self.get_not_supported_algorithms():
2641+
pytest.skip("GPTQ is not supported")
2642+
2643+
INPUT_SHAPE = (2, 4)
2644+
model = self.get_transposable_awq_model(transpose_a=True, transpose_b=True, input_shape=INPUT_SHAPE)
2645+
input = 0.01 * np.arange(0, np.multiply.reduce(INPUT_SHAPE), dtype=np.float32).reshape(INPUT_SHAPE) + 0.02
2646+
input = self.to_tensor(input)
2647+
dataset = Dataset([input] * 2, self.get_transform_func())
2648+
2649+
with pytest.raises(nncf.UnsupportedModelError):
2650+
compress_weights(
2651+
model,
2652+
mode=CompressWeightsMode.INT4_SYM,
2653+
ratio=1.0,
2654+
group_size=1,
2655+
subset_size=2,
2656+
dataset=dataset,
2657+
all_layers=True,
2658+
**kwargs,
2659+
)

0 commit comments

Comments
 (0)