diff --git a/src/model_api/adapters/onnx_adapter.py b/src/model_api/adapters/onnx_adapter.py index cbb095d7..809320ea 100644 --- a/src/model_api/adapters/onnx_adapter.py +++ b/src/model_api/adapters/onnx_adapter.py @@ -1,17 +1,16 @@ # -# Copyright (C) 2020-2024 Intel Corporation +# Copyright (C) 2020-2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # from __future__ import annotations import sys -from functools import partial, reduce from typing import Any, Callable import numpy as np -from .utils import INTERPOLATION_TYPES, RESIZE_TYPES, InputTransform +from .utils import setup_python_preprocessing_pipeline try: import onnx @@ -145,30 +144,17 @@ def embed_preprocessing( """ Adds external preprocessing steps done before ONNX model execution. """ - preproc_funcs = [np.squeeze] - if resize_mode != "crop": - if resize_mode == "fit_to_window_letterbox": - resize_fn = partial( - RESIZE_TYPES[resize_mode], - size=target_shape, - interpolation=INTERPOLATION_TYPES[interpolation_mode], - pad_value=pad_value, - ) - else: - resize_fn = partial( - RESIZE_TYPES[resize_mode], - size=target_shape, - interpolation=INTERPOLATION_TYPES[interpolation_mode], - ) - else: - resize_fn = partial(RESIZE_TYPES[resize_mode], size=target_shape) - preproc_funcs.append(resize_fn) - input_transform = InputTransform(brg2rgb, mean, scale) - preproc_funcs.extend((input_transform.__call__, partial(change_layout, layout=layout))) - - self.preprocessor = reduce( - lambda f, g: lambda x: f(g(x)), - reversed(preproc_funcs), + self.preprocessor = setup_python_preprocessing_pipeline( + layout=layout, + resize_mode=resize_mode, + interpolation_mode=interpolation_mode, + target_shape=target_shape, + pad_value=pad_value, + dtype=dtype, + brg2rgb=brg2rgb, + mean=mean, + scale=scale, + input_idx=input_idx, ) def get_model(self): @@ -227,18 +213,3 @@ def get_shape_from_onnx(onnx_shape): if isinstance(item, str): onnx_shape[i] = -1 return tuple(onnx_shape) - - -def change_layout(image, layout): - """Changes the input image layout to fit the layout of the model input layer. - - Args: - inputs (ndarray): a single image as 3D array in HWC layout - - Returns: - - the image with layout aligned with the model layout - """ - if "CHW" in layout: - image = image.transpose((2, 0, 1)) # HWC->CHW - image = image.reshape((1, *image.shape)) - return image diff --git a/src/model_api/adapters/openvino_adapter.py b/src/model_api/adapters/openvino_adapter.py index 0566a8a1..8c57fdf5 100644 --- a/src/model_api/adapters/openvino_adapter.py +++ b/src/model_api/adapters/openvino_adapter.py @@ -41,6 +41,7 @@ resize_image, resize_image_letterbox, resize_image_with_aspect, + setup_python_preprocessing_pipeline, ) @@ -143,6 +144,8 @@ def __init__( ) self.is_onnx_file = False self.onnx_metadata = {} + self.preprocessor = lambda arg: arg + self.use_python_preprocessing = False if isinstance(self.model_path, (str, Path)): if Path(self.model_path).suffix == ".onnx" and weights_path: @@ -175,7 +178,52 @@ def __init__( msg = "Model must be bytes or a file" raise RuntimeError(msg) + def reshape_dynamic_inputs(self) -> None: + """For NPU devices, set static shape if the model has dynamic shapes""" + for input in self.model.inputs: + if input.partial_shape.is_dynamic: + input_name = input.get_any_name() + shape = get_input_shape(input) + static_shape = [] + + # Detect likely layout for 4D shapes + is_nchw = False + if len(shape) == 4 and not isinstance(shape[1], tuple) and shape[1] != -1 and shape[1] <= 4: + is_nchw = True + + for i, dim in enumerate(shape): + if isinstance(dim, tuple): + static_shape.append((dim[0] + dim[1]) // 2) + elif dim == -1: + if i == 0: + static_shape.append(1) + elif len(shape) == 4: + if is_nchw: + if i == 1: + static_shape.append(3) + else: + static_shape.append(224) + else: + if i == 3: + static_shape.append(3) + else: + static_shape.append(224) + else: + static_shape.append(1) + else: + static_shape.append(dim) + + log.info( + f"NPU: Reshaping input '{input_name}' from dynamic {shape} to static {static_shape}", + ) + self.reshape_model({input_name: static_shape}) + def load_model(self) -> None: + """Loads the model to the device specified in the constructor""" + devices = parse_devices(self.device) + if any("NPU" in dev.upper() for dev in devices) and self.model.is_dynamic(): + self.reshape_dynamic_inputs() + self.compiled_model = self.core.compile_model( self.model, self.device, @@ -280,11 +328,17 @@ def copy_raw_result(self, request): return {key: request.get_tensor(key).data.copy() for key in self.get_output_layers()} def infer_sync(self, dict_data: dict[str, ndarray]) -> dict[str, ndarray]: + if self.use_python_preprocessing: + for key in dict_data: + dict_data[key] = self.preprocessor(dict_data[key]) self.infer_request = self.async_queue[self.async_queue.get_idle_request_id()] self.infer_request.infer(dict_data) return self.get_raw_result(self.infer_request) def infer_async(self, dict_data, callback_data) -> None: + if self.use_python_preprocessing: + for key in dict_data: + dict_data[key] = self.preprocessor(dict_data[key]) self.async_queue.start_async(dict_data, callback_data) def set_callback(self, callback_fn: Callable): @@ -347,8 +401,32 @@ def embed_preprocessing( input_idx: int = 0, ) -> None: """ - Embeds OpenVINO PrePostProcessor module into the model. + Embeds preprocessing into the model, or sets up Python preprocessing for NPU devices. """ + # Check if we should use Python preprocessing for NPU devices + devices = parse_devices(self.device) + if any("NPU" in dev.upper() for dev in devices): + self.preprocessor = setup_python_preprocessing_pipeline( + layout=layout, + resize_mode=resize_mode, + interpolation_mode=interpolation_mode, + target_shape=target_shape, + pad_value=pad_value, + dtype=dtype, + brg2rgb=brg2rgb, + mean=mean, + scale=scale, + input_idx=input_idx, + ) + self.use_python_preprocessing = True + input_name = self.model.inputs[input_idx].get_any_name() + if layout == "NCHW": + static_shape = [1, 3, target_shape[1], target_shape[0]] + else: + static_shape = [1, target_shape[1], target_shape[0], 3] + self.reshape_model({input_name: static_shape}) + return + ppp = PrePostProcessor(self.model) # Change the input type to the 8-bit image diff --git a/src/model_api/adapters/utils.py b/src/model_api/adapters/utils.py index f163b5a5..83406d23 100644 --- a/src/model_api/adapters/utils.py +++ b/src/model_api/adapters/utils.py @@ -517,6 +517,81 @@ def crop_resize_ocv(image: np.ndarray, size: tuple[int, int]) -> np.ndarray: return cv2.resize(cropped_frame, size) +def setup_python_preprocessing_pipeline( + layout: str, + resize_mode: str, + interpolation_mode: str, + target_shape: tuple[int, ...], + pad_value: int, + dtype: type = int, + brg2rgb: bool = False, + mean: list[Any] | None = None, + scale: list[Any] | None = None, + input_idx: int = 0, +): + """ + Sets up a Python preprocessing pipeline for model adapters. + + Args: + layout: Target layout for the input (e.g., "NCHW", "NHWC") + resize_mode: Type of resizing ("crop", "standard", "fit_to_window", "fit_to_window_letterbox") + interpolation_mode: Interpolation method ("LINEAR", "CUBIC", "NEAREST") + target_shape: Target shape for resizing + pad_value: Padding value for letterbox resizing + dtype: Data type for preprocessing + brg2rgb: Whether to convert BGR to RGB + mean: Mean values for normalization + scale: Scale values for normalization + input_idx: Input index (unused but kept for compatibility) + + Returns: + Callable: A preprocessing function that can be applied to input data + """ + from functools import partial, reduce + + preproc_funcs = [np.squeeze] + if resize_mode != "crop": + if resize_mode == "fit_to_window_letterbox": + resize_fn = partial( + RESIZE_TYPES[resize_mode], + size=target_shape, + interpolation=INTERPOLATION_TYPES[interpolation_mode], + pad_value=pad_value, + ) + else: + resize_fn = partial( + RESIZE_TYPES[resize_mode], + size=target_shape, + interpolation=INTERPOLATION_TYPES[interpolation_mode], + ) + else: + resize_fn = partial(RESIZE_TYPES[resize_mode], size=target_shape) + preproc_funcs.append(resize_fn) + input_transform = InputTransform(brg2rgb, mean, scale) + preproc_funcs.extend((input_transform.__call__, partial(change_layout, layout=layout))) + + return reduce( + lambda f, g: lambda x: f(g(x)), + reversed(preproc_funcs), + ) + + +def change_layout(image, layout): + """Changes the input image layout to fit the layout of the model input layer. + + Args: + image (ndarray): a single image as 3D array in HWC layout + layout (str): target layout + + Returns: + ndarray: the image with layout aligned with the model layout + """ + if "CHW" in layout: + image = image.transpose((2, 0, 1)) # HWC->CHW + image = image.reshape((1, *image.shape)) + return image + + RESIZE_TYPES: dict[str, Callable] = { "crop": crop_resize_ocv, "standard": resize_image_ocv, diff --git a/src/model_api/models/anomaly.py b/src/model_api/models/anomaly.py index 4e108fb1..29eb0443 100644 --- a/src/model_api/models/anomaly.py +++ b/src/model_api/models/anomaly.py @@ -80,6 +80,14 @@ def preprocess(self, inputs: np.ndarray) -> list[dict]: """ original_shape = inputs.shape + if ( + self._is_dynamic + and getattr(self.inference_adapter, "device", "") == "NPU" + and hasattr(self.inference_adapter, "compiled_model") + ): + _, self.c, self.h, self.w = self.inference_adapter.compiled_model.inputs[0].get_shape() + self._is_dynamic = False + if self._is_dynamic: h, w, c = inputs.shape resized_shape = (w, h, c) @@ -98,11 +106,13 @@ def preprocess(self, inputs: np.ndarray) -> list[dict]: if self.embedded_processing: processed_image = inputs[None] else: + # Resize image to expected model input dimensions + resized_image = self.resize(inputs, (self.w, self.h)) # Convert to float32 and normalize for anomalib - if inputs.dtype == np.uint8: - processed_image = inputs.astype(np.float32) / 255.0 + if resized_image.dtype == np.uint8: + processed_image = resized_image.astype(np.float32) / 255.0 else: - processed_image = inputs.astype(np.float32) + processed_image = resized_image.astype(np.float32) processed_image = self._change_layout(processed_image) return [ diff --git a/src/model_api/tilers/detection.py b/src/model_api/tilers/detection.py index 5fb46b39..4c570a11 100644 --- a/src/model_api/tilers/detection.py +++ b/src/model_api/tilers/detection.py @@ -112,7 +112,7 @@ def _merge_results(self, results: list[dict], shape: tuple[int, int, int]) -> De merged_vector = np.mean(feature_vectors, axis=0) if feature_vectors else np.ndarray(0) saliency_map = self._merge_saliency_maps(saliency_maps, shape, tiles_coords) if saliency_maps else np.ndarray(0) - label_names = [self.model.labels[int(label_idx)] for label_idx in detections_array[:, 0]] + label_names = [self.model.get_label_name(int(label_idx)) for label_idx in detections_array[:, 0]] return DetectionResult( bboxes=detections_array[:, 2:].astype(np.int32), diff --git a/src/model_api/tilers/instance_segmentation.py b/src/model_api/tilers/instance_segmentation.py index 0021532c..302a1815 100644 --- a/src/model_api/tilers/instance_segmentation.py +++ b/src/model_api/tilers/instance_segmentation.py @@ -123,7 +123,7 @@ def _merge_results(self, results, shape) -> InstanceSegmentationResult: labels = labels.astype(np.int32) resized_masks, label_names = [], [] for mask, box, label_idx in zip(masks, bboxes, labels): - label_names.append(self.model.labels[int(label_idx.squeeze())]) + label_names.append(self.model.get_label_name(int(label_idx.squeeze()))) resized_masks.append(_segm_postprocess(box, mask, *shape[:-1])) resized_masks = np.stack(resized_masks) if resized_masks else masks