Skip to content

Commit c604cb2

Browse files
[OpenVINO] calibration_device parameter
1 parent 00576e0 commit c604cb2

7 files changed

Lines changed: 163 additions & 16 deletions

File tree

src/nncf/openvino/engine.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@
1010
# limitations under the License.
1111

1212

13+
import contextvars
14+
from collections.abc import Generator
15+
from contextlib import contextmanager
16+
1317
import numpy as np
1418
import openvino as ov
1519
from openvino import Type
@@ -19,6 +23,17 @@
1923
from nncf.definitions import NNCF_DATASET_RESET_STATE_KEY
2024
from nncf.openvino.graph.model_utils import model_has_state
2125

26+
_calibration_device: contextvars.ContextVar[str | None] = contextvars.ContextVar("_calibration_device", default=None)
27+
28+
29+
@contextmanager
30+
def calibration_device_context(device: str | None) -> Generator[None, None, None]:
31+
token = _calibration_device.set(device)
32+
try:
33+
yield
34+
finally:
35+
_calibration_device.reset(token)
36+
2237

2338
class OVCompiledModelEngine(Engine):
2439
"""
@@ -79,12 +94,13 @@ def __init__(self, model: ov.Model, use_fp32_precision: bool = True):
7994
:param use_fp32_precision: A flag that determines whether to force the engine to use FP32
8095
precision during inference.
8196
"""
97+
device_name = _calibration_device.get() or "CPU"
8298
config = None
8399
if use_fp32_precision:
84100
config = {inference_precision: Type.f32}
85101
ie = ov.Core()
86102
stateful = model_has_state(model)
87-
compiled_model = ie.compile_model(model, device_name="CPU", config=config)
103+
compiled_model = ie.compile_model(model, device_name=device_name, config=config)
88104
self.engine = OVCompiledModelEngine(compiled_model, stateful)
89105

90106
def infer(

src/nncf/openvino/quantization/quantize_model.py

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from nncf.common.logging import nncf_logger
2222
from nncf.common.quantization.structs import QuantizationPreset
2323
from nncf.data import Dataset
24+
from nncf.openvino.engine import calibration_device_context
2425
from nncf.openvino.graph.metatypes.groups import OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS
2526
from nncf.openvino.graph.metatypes.openvino_metatypes import OVIfMetatype
2627
from nncf.openvino.graph.metatypes.openvino_metatypes import get_node_metatype
@@ -119,9 +120,11 @@ def _extract_all_subgraphs(model: ov.Model, current_id: str) -> None:
119120
f"The model consists of {if_ops_number} If node(-s) with then and else bodies. \
120121
Main model and all If bodies will be quantized recursively."
121122
)
122-
quantized_model, _ = apply_algorithm_if_bodies(
123-
quantization_algorithm, model, graphs, main_model_graph_id, calibration_dataset, subset_size, 1
124-
)
123+
calibration_device = advanced_parameters.calibration_device if advanced_parameters else None
124+
with calibration_device_context(calibration_device):
125+
quantized_model, _ = apply_algorithm_if_bodies(
126+
quantization_algorithm, model, graphs, main_model_graph_id, calibration_dataset, subset_size, 1
127+
)
125128

126129
if is_weight_compression_needed(advanced_parameters):
127130
compress_quantize_weights_transformation(quantized_model)
@@ -168,7 +171,9 @@ def native_quantize_impl(
168171
)
169172
graph = GraphConverter.create_nncf_graph(model)
170173
warning_model_no_batchwise_support(graph, advanced_parameters, model_type, OPERATIONS_OUTPUT_HAS_NO_BATCH_AXIS)
171-
quantized_model = quantization_algorithm.apply(model, graph, dataset=calibration_dataset)
174+
calibration_device = advanced_parameters.calibration_device if advanced_parameters else None
175+
with calibration_device_context(calibration_device):
176+
quantized_model = quantization_algorithm.apply(model, graph, dataset=calibration_dataset)
172177

173178
if is_weight_compression_needed(advanced_parameters):
174179
compress_quantize_weights_transformation(quantized_model)
@@ -296,15 +301,19 @@ def quantize_with_accuracy_control_impl(
296301
advanced_accuracy_restorer_parameters.num_ranking_workers,
297302
advanced_accuracy_restorer_parameters.restore_mode,
298303
)
299-
quantized_model = accuracy_restorer.apply(
300-
model,
301-
initial_metric_results,
302-
quantized_model,
303-
quantized_metric_results,
304-
validation_dataset,
305-
validation_dataset_size,
306-
evaluator,
304+
calibration_device = (
305+
advanced_quantization_parameters.calibration_device if advanced_quantization_parameters else None
307306
)
307+
with calibration_device_context(calibration_device):
308+
quantized_model = accuracy_restorer.apply(
309+
model,
310+
initial_metric_results,
311+
quantized_model,
312+
quantized_metric_results,
313+
validation_dataset,
314+
validation_dataset_size,
315+
evaluator,
316+
)
308317

309318
if compress_weights:
310319
compress_quantize_weights_transformation(quantized_model)
@@ -402,12 +411,15 @@ def compress_weights_impl(
402411
advanced_parameters,
403412
)
404413

414+
calibration_device = advanced_parameters.calibration_device if advanced_parameters else None
415+
405416
statistics_points = None
406417
if advanced_parameters and advanced_parameters.statistics_path:
407418
# If there is no such directory, then caches statistics
408419
statistics_path = Path(advanced_parameters.statistics_path)
409420
if not statistics_path.exists():
410-
cache_weight_compression_statistics(model, graph, dataset, subset_size, statistics_path)
421+
with calibration_device_context(calibration_device):
422+
cache_weight_compression_statistics(model, graph, dataset, subset_size, statistics_path)
411423
statistics_aggregator = StatisticsAggregatorFactory.create(model, dataset)
412424
compression_algorithm.set_backend_entity(model)
413425
_, matmul_input_to_output_nodes_map = compression_algorithm.get_compression_nodes_info(graph)
@@ -421,4 +433,5 @@ def compress_weights_impl(
421433
statistics_aggregator.load_statistics_from_dir(statistics_path)
422434
statistics_points = statistics_aggregator.statistic_points
423435

424-
return compression_algorithm.apply(model, graph, statistics_points, dataset)
436+
with calibration_device_context(calibration_device):
437+
return compression_algorithm.apply(model, graph, statistics_points, dataset)

src/nncf/quantization/advanced_parameters.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,10 @@ class AdvancedQuantizationParameters:
252252
:type smooth_quant_alpha: float
253253
:param backend_params: Backend-specific parameters.
254254
:type backend_params: dict[str, Any]
255+
:param calibration_device: OpenVINO device name to use for calibration inference
256+
(e.g. "CPU", "GPU", "GPU.0", "AUTO:GPU,CPU"). If None, defaults to "CPU".
257+
Only applicable to the OpenVINO backend.
258+
:type calibration_device: Optional[str]
255259
"""
256260

257261
# General parameters
@@ -282,6 +286,9 @@ class AdvancedQuantizationParameters:
282286
# Backend specific parameters
283287
backend_params: dict[str, Any] = field(default_factory=dict)
284288

289+
# Calibration device
290+
calibration_device: str | None = None
291+
285292

286293
@api()
287294
@dataclass
@@ -427,6 +434,10 @@ class AdvancedCompressionParameters:
427434
:type lora_correction_params: AdvancedLoraCorrectionParameters
428435
:param backend_params: Backend-specific parameters.
429436
:type backend_params: dict[str, Any]
437+
:param calibration_device: OpenVINO device name to use for calibration inference
438+
(e.g. "CPU", "GPU", "GPU.0", "AUTO:GPU,CPU"). If None, defaults to "CPU".
439+
Only applicable to the OpenVINO backend.
440+
:type calibration_device: Optional[str]
430441
:param codebook: The codebook (LUT) for the weight compression.
431442
Applicable for vector quantization. Must be a numpy array or ov Tensor.
432443
:type codebook: TTensor
@@ -445,6 +456,7 @@ class AdvancedCompressionParameters:
445456
gptq_params: AdvancedGPTQParameters = field(default_factory=AdvancedGPTQParameters)
446457
lora_correction_params: AdvancedLoraCorrectionParameters = field(default_factory=AdvancedLoraCorrectionParameters)
447458
backend_params: dict[str, Any] = field(default_factory=dict)
459+
calibration_device: str | None = None
448460
codebook: TTensor | None = None
449461
adaptive_codebook_params: AdvancedAdaptiveCodebookParameters = field(
450462
default_factory=AdvancedAdaptiveCodebookParameters

src/nncf/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,4 @@
99
# See the License for the specific language governing permissions and
1010
# limitations under the License.
1111

12-
__version__ = "3.2.0"
12+
__version__ = "3.2.0.dev0+00576e031"

tests/openvino/native/quantization/test_quantize_api.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,30 @@ def test_non_positive_subset_size():
3535
with pytest.raises(nncf.ValidationError) as e:
3636
nncf.quantize(model_to_test, Dataset(MockDataset(INPUT_SHAPE)), subset_size=0)
3737
assert "Subset size must be positive." in e.info
38+
39+
40+
def test_quantize_calibration_device(monkeypatch):
41+
import numpy as np
42+
import openvino as ov
43+
44+
from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters
45+
from tests.openvino.native.models import LinearModel
46+
47+
model_to_test = LinearModel().ov_model
48+
input_shape = [inp.shape for inp in model_to_test.inputs][0]
49+
dataset = Dataset([np.random.rand(*input_shape).astype(np.float32) for _ in range(2)])
50+
captured_devices = []
51+
52+
original_compile = ov.Core.compile_model
53+
54+
def mock_compile(self, model, device_name="CPU", config=None):
55+
captured_devices.append(device_name)
56+
return original_compile(self, model, device_name="CPU", config=config)
57+
58+
monkeypatch.setattr(ov.Core, "compile_model", mock_compile)
59+
nncf.quantize(
60+
model_to_test,
61+
dataset,
62+
advanced_parameters=AdvancedQuantizationParameters(calibration_device="GPU"),
63+
)
64+
assert any(d == "GPU" for d in captured_devices)

tests/openvino/native/quantization/test_weights_compression.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2767,3 +2767,27 @@ def test_phi_rope_model(self):
27672767
group_size=-1,
27682768
)
27692769
assert self.get_num_int8_nodes(compressed_model) == 0
2770+
2771+
2772+
def test_compress_weights_calibration_device(monkeypatch):
2773+
model = AWQMatmulModel().ov_model
2774+
dataset = Dataset([np.ones([2, 8, 8])])
2775+
captured_devices = []
2776+
2777+
original_compile = ov.Core.compile_model
2778+
2779+
def mock_compile(self, model, device_name="CPU", config=None):
2780+
captured_devices.append(device_name)
2781+
return original_compile(self, model, device_name="CPU", config=config)
2782+
2783+
monkeypatch.setattr(ov.Core, "compile_model", mock_compile)
2784+
compress_weights(
2785+
model,
2786+
mode=CompressWeightsMode.INT4_SYM,
2787+
ratio=1.0,
2788+
group_size=2,
2789+
dataset=dataset,
2790+
awq=True,
2791+
advanced_parameters=AdvancedCompressionParameters(calibration_device="GPU"),
2792+
)
2793+
assert any(d == "GPU" for d in captured_devices)

tests/openvino/native/test_engine.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
from nncf.definitions import NNCF_DATASET_RESET_STATE_KEY
1717
from nncf.openvino.engine import OVNativeEngine
18+
from nncf.openvino.engine import calibration_device_context
1819
from tests.openvino.native.models import ConvModel
1920
from tests.openvino.native.models import LinearModel
2021
from tests.openvino.native.models import QuantizedModel
@@ -123,3 +124,57 @@ def _reset_state():
123124
"infer",
124125
"infer",
125126
]
127+
128+
129+
def test_calibration_device_default(monkeypatch):
130+
model = LinearModel().ov_model
131+
captured_device = {}
132+
133+
import openvino as ov
134+
135+
original_compile = ov.Core.compile_model
136+
137+
def mock_compile(self, model, device_name="CPU", config=None):
138+
captured_device["device_name"] = device_name
139+
return original_compile(self, model, device_name="CPU", config=config)
140+
141+
monkeypatch.setattr(ov.Core, "compile_model", mock_compile)
142+
OVNativeEngine(model)
143+
assert captured_device["device_name"] == "CPU"
144+
145+
146+
def test_calibration_device_context(monkeypatch):
147+
model = LinearModel().ov_model
148+
captured_device = {}
149+
150+
import openvino as ov
151+
152+
original_compile = ov.Core.compile_model
153+
154+
def mock_compile(self, model, device_name="CPU", config=None):
155+
captured_device["device_name"] = device_name
156+
return original_compile(self, model, device_name="CPU", config=config)
157+
158+
monkeypatch.setattr(ov.Core, "compile_model", mock_compile)
159+
with calibration_device_context("GPU"):
160+
OVNativeEngine(model)
161+
assert captured_device["device_name"] == "GPU"
162+
163+
164+
def test_calibration_device_context_resets(monkeypatch):
165+
model = LinearModel().ov_model
166+
captured_devices = []
167+
168+
import openvino as ov
169+
170+
original_compile = ov.Core.compile_model
171+
172+
def mock_compile(self, model, device_name="CPU", config=None):
173+
captured_devices.append(device_name)
174+
return original_compile(self, model, device_name="CPU", config=config)
175+
176+
monkeypatch.setattr(ov.Core, "compile_model", mock_compile)
177+
with calibration_device_context("GPU"):
178+
OVNativeEngine(model)
179+
OVNativeEngine(model)
180+
assert captured_devices == ["GPU", "CPU"]

0 commit comments

Comments
 (0)