open-edge-platform · tybulewicz · Dec 4, 2025 · Dec 1, 2025
@@ -29,6 +29,7 @@
 from .ssd import SSD
 from .utils import (
     OutputTransform,
+    ResizeMetadata,
     add_rotated_rects,
     get_contours,
 )
@@ -78,6 +79,7 @@
     "OutputTransform",
     "PredictedMask",
     "Prompt",
+    "ResizeMetadata",
     "RotatedSegmentationResult",
     "SAMDecoder",
     "SAMImageEncoder",

@@ -119,7 +119,7 @@ def _get_inputs(self) -> list[str]:
             )
         return image_blob_names
 
-    def preprocess(
+    def base_preprocess(
         self,
         inputs: np.ndarray,
     ) -> tuple[dict[str, np.ndarray], dict[str, tuple[int, ...]]]:

@@ -68,13 +68,7 @@ def __init__(
         super().__init__(inference_adapter, configuration, preload)
         self._check_io_number(1, (1, 4))
 
-    def preprocess(self, inputs: np.ndarray) -> list[dict]:
-        """Data preprocess method for Anomalib models.
-
-        Anomalib models typically expect inputs in [0,1] range as float32.
-        """
-        original_shape = inputs.shape
-
+    def _resize_image(self, image: np.ndarray) -> tuple[np.ndarray, dict]:
         if (
             self._is_dynamic
             and getattr(self.inference_adapter, "device", "") == "NPU"
@@ -83,40 +77,12 @@ def preprocess(self, inputs: np.ndarray) -> list[dict]:
             _, self.c, self.h, self.w = self.inference_adapter.compiled_model.inputs[0].get_shape()
             self._is_dynamic = False
 
-        if self._is_dynamic:
-            h, w, c = inputs.shape
-            resized_shape = (w, h, c)
+        return super()._resize_image(image)
 
-            # For anomalib models, convert to float32 and normalize to [0,1] if needed
-            if inputs.dtype == np.uint8:
-                processed_image = inputs.astype(np.float32) / 255.0
-            else:
-                processed_image = inputs.astype(np.float32)
-
-            # Apply layout change but skip InputTransform (which might apply wrong normalization)
-            processed_image = self._change_layout(processed_image)
-        else:
-            resized_shape = (self.w, self.h, self.c)
-            # For fixed models, use standard preprocessing
-            if self.params.embedded_processing:
-                processed_image = inputs[None]
-            else:
-                # Resize image to expected model input dimensions
-                resized_image = self.resize(inputs, (self.w, self.h))
-                # Convert to float32 and normalize for anomalib
-                if resized_image.dtype == np.uint8:
-                    processed_image = resized_image.astype(np.float32) / 255.0
-                else:
-                    processed_image = resized_image.astype(np.float32)
-                processed_image = self._change_layout(processed_image)
-
-        return [
-            {self.image_blob_name: processed_image},
-            {
-                "original_shape": original_shape,
-                "resized_shape": resized_shape,
-            },
-        ]
+    def _input_transform(self, image: np.ndarray) -> np.ndarray:
+        if image.dtype == np.uint8:
+            return image.astype(np.float32) / 255.0
+        return image.astype(np.float32)
 
     def postprocess(self, outputs: dict[str, np.ndarray], meta: dict[str, Any]) -> AnomalyResult:
         """Post-processes the outputs and returns the results.

@@ -60,36 +60,41 @@ def __init__(self, inference_adapter: InferenceAdapter, configuration: dict = {}
             self._verify_single_output()
 
         self.raw_scores_name = _raw_scores_name
+
         if self.params.hierarchical:
-            self._embedded_processing = True
-            self.out_layer_names = _get_non_xai_names(self.outputs.keys())
-            _append_xai_names(self.outputs.keys(), self.out_layer_names)
-            hierarchical_config = self.params.hierarchical_config
-            if not hierarchical_config:
-                self.raise_error("Hierarchical classification config is empty.")
-            self.raw_scores_name = self.out_layer_names[0]
-            self.hierarchical_info = json.loads(hierarchical_config)
-
-            if self.params.hierarchical_postproc == "probabilistic":
-                self.labels_resolver = ProbabilisticLabelsResolver(
-                    self.hierarchical_info,
-                )
-            else:
-                self.labels_resolver = GreedyLabelsResolver(self.hierarchical_info)
+            self._setup_hierarchical()
+        elif self.params.multilabel:
+            self._setup_multilabel()
+        else:
+            self._setup_single_label()
 
-            if preload:
-                self.load()
-            return
+        _append_xai_names(self.outputs.keys(), self.out_layer_names)
+        if preload:
+            self.load()
 
-        if self.params.multilabel:
-            self._embedded_processing = True
-            self.out_layer_names = _get_non_xai_names(self.outputs.keys())
-            _append_xai_names(self.outputs.keys(), self.out_layer_names)
-            self.raw_scores_name = self.out_layer_names[0]
-            if preload:
-                self.load()
-            return
+    def _setup_hierarchical(self) -> None:
+        """Configure model for hierarchical classification."""
+        self._embedded_processing = True
+        self.out_layer_names = _get_non_xai_names(self.outputs.keys())
+        hierarchical_config = self.params.hierarchical_config
+        if not hierarchical_config:
+            self.raise_error("Hierarchical classification config is empty.")
+        self.raw_scores_name = self.out_layer_names[0]
+        self.hierarchical_info = json.loads(hierarchical_config)
+
+        if self.params.hierarchical_postproc == "probabilistic":
+            self.labels_resolver = ProbabilisticLabelsResolver(self.hierarchical_info)
+        else:
+            self.labels_resolver = GreedyLabelsResolver(self.hierarchical_info)
+
+    def _setup_multilabel(self) -> None:
+        """Configure model for multi-label classification."""
+        self._embedded_processing = True
+        self.out_layer_names = _get_non_xai_names(self.outputs.keys())
+        self.raw_scores_name = self.out_layer_names[0]
 
+    def _setup_single_label(self) -> None:
+        """Configure model for single-label classification with TopK."""
         try:
             addOrFindSoftmaxAndTopkOutputs(
                 self.inference_adapter,
@@ -114,10 +119,6 @@ def __init__(self, inference_adapter: InferenceAdapter, configuration: dict = {}
 
         self.embedded_processing = True
 
-        _append_xai_names(self.outputs.keys(), self.out_layer_names)
-        if preload:
-            self.load()
-
     def _load_labels(self, labels_file: str) -> list:
         with Path(labels_file).open() as f:
             labels = []

@@ -8,7 +8,7 @@
 from .image_model import ImageModel
 from .parameters import ParameterRegistry
 from .result import DetectionResult
-from .utils import load_labels
+from .utils import ResizeMetadata, load_labels
 
 
 class DetectionModel(ImageModel):
@@ -58,6 +58,18 @@ def parameters(cls):
         )
         return parameters
 
+    def preprocess(self, dict_inputs: dict, meta: dict) -> tuple[dict, dict]:
+        input_img_height, input_img_width = meta["original_shape"][:2]
+        resize_meta = ResizeMetadata.compute(
+            original_width=input_img_width,
+            original_height=input_img_height,
+            model_width=self.w,
+            model_height=self.h,
+            resize_type=self.params.resize_type,
+        )
+        meta["resize_info"] = resize_meta.to_dict()
+        return dict_inputs, meta
+
     def _resize_detections(self, detection_result: DetectionResult, meta: dict):
         """Resizes detection bounding boxes according to initial image shape.
 
@@ -68,26 +80,24 @@ def _resize_detections(self, detection_result: DetectionResult, meta: dict):
             detection_result (DetectionList): detection result with coordinates in normalized form
             meta (dict): the input metadata obtained from `preprocess` method
         """
-        input_img_height, input_img_widht = meta["original_shape"][:2]
-        inverted_scale_x = input_img_widht / self.w
-        inverted_scale_y = input_img_height / self.h
-        pad_left = 0
-        pad_top = 0
-        resize_type = self.params.resize_type
-        if resize_type == "fit_to_window" or resize_type == "fit_to_window_letterbox":
-            inverted_scale_x = inverted_scale_y = max(
-                inverted_scale_x,
-                inverted_scale_y,
+        input_img_height, input_img_width = meta["original_shape"][:2]
+
+        if "resize_info" in meta:
+            resize_meta = ResizeMetadata.from_dict(meta["resize_info"])
+        else:
+            resize_meta = ResizeMetadata.compute(
+                original_width=input_img_width,
+                original_height=input_img_height,
+                model_width=self.w,
+                model_height=self.h,
+                resize_type=self.params.resize_type,
             )
-            if resize_type == "fit_to_window_letterbox":
-                pad_left = (self.w - round(input_img_widht / inverted_scale_x)) // 2
-                pad_top = (self.h - round(input_img_height / inverted_scale_y)) // 2
 
         boxes = detection_result.bboxes
-        boxes[:, 0::2] = (boxes[:, 0::2] * self.w - pad_left) * inverted_scale_x
-        boxes[:, 1::2] = (boxes[:, 1::2] * self.h - pad_top) * inverted_scale_y
+        boxes[:, 0::2] = (boxes[:, 0::2] * self.w - resize_meta.pad_left) * resize_meta.inverted_scale_x
+        boxes[:, 1::2] = (boxes[:, 1::2] * self.h - resize_meta.pad_top) * resize_meta.inverted_scale_y
         np.round(boxes, out=boxes)
-        boxes[:, 0::2] = np.clip(boxes[:, 0::2], 0, input_img_widht)
+        boxes[:, 0::2] = np.clip(boxes[:, 0::2], 0, input_img_width)
         boxes[:, 1::2] = np.clip(boxes[:, 1::2], 0, input_img_height)
         detection_result.bboxes = boxes.astype(np.int32)
 

@@ -148,7 +148,7 @@ def _get_inputs(self) -> tuple[list[str], ...]:
             )
         return image_blob_names, image_info_blob_names
 
-    def preprocess(self, inputs: np.ndarray) -> list[dict]:
+    def base_preprocess(self, inputs: np.ndarray) -> list[dict]:
         """Data preprocess method
 
         It performs basic preprocessing of a single image:
@@ -173,35 +173,61 @@ def preprocess(self, inputs: np.ndarray) -> list[dict]:
                 }
             - the input metadata, which might be used in `postprocess` method
         """
-        original_shape = inputs.shape
-
         if self.params.embedded_processing:
-            processed_image = inputs[None]
-            if self._is_dynamic:
-                h, w, c = inputs.shape
-                resized_shape = (w, h, c)
-            else:
-                resized_shape = (self.w, self.h, self.c)
-        elif self._is_dynamic:
+            dict_inputs, meta = self._preprocess_embedded(inputs)
+            dict_inputs, meta = self.preprocess(dict_inputs, meta)
+            return [dict_inputs, meta]
+
+        # 1. Resize
+        resized_image, meta = self._resize_image(inputs)
+
+        # 2. Transform
+        processed_image = self._input_transform(resized_image)
+
+        # 3. Layout
+        processed_image = self._change_layout(processed_image)
+
+        # 4. Pack
+        dict_inputs = {self.image_blob_name: processed_image}
+
+        # 5. Model-specific preprocess
+        dict_inputs, meta = self.preprocess(dict_inputs, meta)
+
+        return [dict_inputs, meta]
+
+    def _preprocess_embedded(self, inputs: np.ndarray) -> tuple[dict, dict]:
+        original_shape = inputs.shape
+        processed_image = inputs[None]
+        if self._is_dynamic:
             h, w, c = inputs.shape
             resized_shape = (w, h, c)
-            processed_image = self.input_transform(inputs)
-            processed_image = self._change_layout(processed_image)
         else:
-            # Fixed model without embedded preprocessing
             resized_shape = (self.w, self.h, self.c)
 
-            resized_image = self.resize(inputs, (self.w, self.h), pad_value=self.params.pad_value)
-            processed_image = self.input_transform(resized_image)
-            processed_image = self._change_layout(processed_image)
-
-        return [
+        return (
             {self.image_blob_name: processed_image},
             {
                 "original_shape": original_shape,
                 "resized_shape": resized_shape,
             },
-        ]
+        )
+
+    def _resize_image(self, image: np.ndarray) -> tuple[np.ndarray, dict]:
+        original_shape = image.shape
+        if self._is_dynamic:
+            h, w, c = image.shape
+            resized_shape = (w, h, c)
+            return image, {"original_shape": original_shape, "resized_shape": resized_shape}
+
+        resized_shape = (self.w, self.h, self.c)
+        resized_image = self.resize(image, (self.w, self.h), pad_value=self.params.pad_value)
+        return resized_image, {"original_shape": original_shape, "resized_shape": resized_shape}
+
+    def _input_transform(self, image: np.ndarray) -> np.ndarray:
+        return self.input_transform(image)
+
+    def preprocess(self, dict_inputs: dict, meta: dict) -> tuple[dict, dict]:
+        return dict_inputs, meta
 
     def _change_layout(self, image: np.ndarray) -> np.ndarray:
         """Changes the input image layout to fit the layout of the model input layer.

@@ -11,7 +11,7 @@
 from .image_model import ImageModel
 from .parameters import ParameterRegistry
 from .result import InstanceSegmentationResult
-from .utils import load_labels
+from .utils import ResizeMetadata, load_labels
 
 
 class MaskRCNNModel(ImageModel):
@@ -95,8 +95,7 @@ def _get_segmentoly_outputs(self) -> dict:
                 )
         return outputs
 
-    def preprocess(self, inputs: np.ndarray) -> list[dict]:
-        dict_inputs, meta = super().preprocess(inputs)
+    def preprocess(self, dict_inputs: dict, meta: dict) -> tuple[dict, dict]:
         input_image_size = meta["resized_shape"][:2]
         if self.is_segmentoly:
             assert len(self.image_info_blob_names) == 1
@@ -105,7 +104,7 @@ def preprocess(self, inputs: np.ndarray) -> list[dict]:
                 dtype=np.float32,
             )
             dict_inputs[self.image_info_blob_names[0]] = input_image_info
-        return [dict_inputs, meta]
+        return dict_inputs, meta
 
     def postprocess(self, outputs: dict, meta: dict) -> InstanceSegmentationResult:
         if (
@@ -141,20 +140,21 @@ def postprocess(self, outputs: dict, meta: dict) -> InstanceSegmentationResult:
             meta["original_shape"][1],
             meta["original_shape"][0],
         )
-        invertedScaleX, invertedScaleY = (
-            inputImgWidth / self.orig_width,
-            inputImgHeight / self.orig_height,
+        resize_meta = ResizeMetadata.compute(
+            original_width=inputImgWidth,
+            original_height=inputImgHeight,
+            model_width=self.orig_width,
+            model_height=self.orig_height,
+            resize_type=self.params.resize_type,
+        )
+
+        boxes -= (resize_meta.pad_left, resize_meta.pad_top, resize_meta.pad_left, resize_meta.pad_top)
+        boxes *= (
+            resize_meta.inverted_scale_x,
+            resize_meta.inverted_scale_y,
+            resize_meta.inverted_scale_x,
+            resize_meta.inverted_scale_y,
         )
-        padLeft, padTop = 0, 0
-        resize_type = self.params.resize_type
-        if resize_type == "fit_to_window" or resize_type == "fit_to_window_letterbox":
-            invertedScaleX = invertedScaleY = max(invertedScaleX, invertedScaleY)
-            if resize_type == "fit_to_window_letterbox":
-                padLeft = (self.orig_width - round(inputImgWidth / invertedScaleX)) // 2
-                padTop = (self.orig_height - round(inputImgHeight / invertedScaleY)) // 2
-
-        boxes -= (padLeft, padTop, padLeft, padTop)
-        boxes *= (invertedScaleX, invertedScaleY, invertedScaleX, invertedScaleY)
         np.around(boxes, out=boxes)
         np.clip(
             boxes,