Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/model_api/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from .ssd import SSD
from .utils import (
OutputTransform,
ResizeMetadata,
add_rotated_rects,
get_contours,
)
Expand Down Expand Up @@ -78,6 +79,7 @@
"OutputTransform",
"PredictedMask",
"Prompt",
"ResizeMetadata",
"RotatedSegmentationResult",
"SAMDecoder",
"SAMImageEncoder",
Expand Down
2 changes: 1 addition & 1 deletion src/model_api/models/action_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def _get_inputs(self) -> list[str]:
)
return image_blob_names

def preprocess(
def base_preprocess(
self,
inputs: np.ndarray,
) -> tuple[dict[str, np.ndarray], dict[str, tuple[int, ...]]]:
Expand Down
46 changes: 6 additions & 40 deletions src/model_api/models/anomaly.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,7 @@ def __init__(
super().__init__(inference_adapter, configuration, preload)
self._check_io_number(1, (1, 4))

def preprocess(self, inputs: np.ndarray) -> list[dict]:
"""Data preprocess method for Anomalib models.

Anomalib models typically expect inputs in [0,1] range as float32.
"""
original_shape = inputs.shape

def _resize_image(self, image: np.ndarray) -> tuple[np.ndarray, dict]:
if (
self._is_dynamic
and getattr(self.inference_adapter, "device", "") == "NPU"
Expand All @@ -83,40 +77,12 @@ def preprocess(self, inputs: np.ndarray) -> list[dict]:
_, self.c, self.h, self.w = self.inference_adapter.compiled_model.inputs[0].get_shape()
self._is_dynamic = False

if self._is_dynamic:
h, w, c = inputs.shape
resized_shape = (w, h, c)
return super()._resize_image(image)

# For anomalib models, convert to float32 and normalize to [0,1] if needed
if inputs.dtype == np.uint8:
processed_image = inputs.astype(np.float32) / 255.0
else:
processed_image = inputs.astype(np.float32)

# Apply layout change but skip InputTransform (which might apply wrong normalization)
processed_image = self._change_layout(processed_image)
else:
resized_shape = (self.w, self.h, self.c)
# For fixed models, use standard preprocessing
if self.params.embedded_processing:
processed_image = inputs[None]
else:
# Resize image to expected model input dimensions
resized_image = self.resize(inputs, (self.w, self.h))
# Convert to float32 and normalize for anomalib
if resized_image.dtype == np.uint8:
processed_image = resized_image.astype(np.float32) / 255.0
else:
processed_image = resized_image.astype(np.float32)
processed_image = self._change_layout(processed_image)

return [
{self.image_blob_name: processed_image},
{
"original_shape": original_shape,
"resized_shape": resized_shape,
},
]
def _input_transform(self, image: np.ndarray) -> np.ndarray:
if image.dtype == np.uint8:
return image.astype(np.float32) / 255.0
return image.astype(np.float32)

def postprocess(self, outputs: dict[str, np.ndarray], meta: dict[str, Any]) -> AnomalyResult:
"""Post-processes the outputs and returns the results.
Expand Down
61 changes: 31 additions & 30 deletions src/model_api/models/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,36 +60,41 @@ def __init__(self, inference_adapter: InferenceAdapter, configuration: dict = {}
self._verify_single_output()

self.raw_scores_name = _raw_scores_name

if self.params.hierarchical:
self._embedded_processing = True
self.out_layer_names = _get_non_xai_names(self.outputs.keys())
_append_xai_names(self.outputs.keys(), self.out_layer_names)
hierarchical_config = self.params.hierarchical_config
if not hierarchical_config:
self.raise_error("Hierarchical classification config is empty.")
self.raw_scores_name = self.out_layer_names[0]
self.hierarchical_info = json.loads(hierarchical_config)

if self.params.hierarchical_postproc == "probabilistic":
self.labels_resolver = ProbabilisticLabelsResolver(
self.hierarchical_info,
)
else:
self.labels_resolver = GreedyLabelsResolver(self.hierarchical_info)
self._setup_hierarchical()
elif self.params.multilabel:
self._setup_multilabel()
else:
self._setup_single_label()

if preload:
self.load()
return
_append_xai_names(self.outputs.keys(), self.out_layer_names)
if preload:
self.load()

if self.params.multilabel:
self._embedded_processing = True
self.out_layer_names = _get_non_xai_names(self.outputs.keys())
_append_xai_names(self.outputs.keys(), self.out_layer_names)
self.raw_scores_name = self.out_layer_names[0]
if preload:
self.load()
return
def _setup_hierarchical(self) -> None:
"""Configure model for hierarchical classification."""
self._embedded_processing = True
self.out_layer_names = _get_non_xai_names(self.outputs.keys())
hierarchical_config = self.params.hierarchical_config
if not hierarchical_config:
self.raise_error("Hierarchical classification config is empty.")
self.raw_scores_name = self.out_layer_names[0]
self.hierarchical_info = json.loads(hierarchical_config)

if self.params.hierarchical_postproc == "probabilistic":
self.labels_resolver = ProbabilisticLabelsResolver(self.hierarchical_info)
else:
self.labels_resolver = GreedyLabelsResolver(self.hierarchical_info)

def _setup_multilabel(self) -> None:
"""Configure model for multi-label classification."""
self._embedded_processing = True
self.out_layer_names = _get_non_xai_names(self.outputs.keys())
self.raw_scores_name = self.out_layer_names[0]

def _setup_single_label(self) -> None:
"""Configure model for single-label classification with TopK."""
try:
addOrFindSoftmaxAndTopkOutputs(
self.inference_adapter,
Expand All @@ -114,10 +119,6 @@ def __init__(self, inference_adapter: InferenceAdapter, configuration: dict = {}

self.embedded_processing = True

_append_xai_names(self.outputs.keys(), self.out_layer_names)
if preload:
self.load()

def _load_labels(self, labels_file: str) -> list:
with Path(labels_file).open() as f:
labels = []
Expand Down
44 changes: 27 additions & 17 deletions src/model_api/models/detection_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from .image_model import ImageModel
from .parameters import ParameterRegistry
from .result import DetectionResult
from .utils import load_labels
from .utils import ResizeMetadata, load_labels


class DetectionModel(ImageModel):
Expand Down Expand Up @@ -58,6 +58,18 @@ def parameters(cls):
)
return parameters

def preprocess(self, dict_inputs: dict, meta: dict) -> tuple[dict, dict]:
input_img_height, input_img_width = meta["original_shape"][:2]
resize_meta = ResizeMetadata.compute(
original_width=input_img_width,
original_height=input_img_height,
model_width=self.w,
model_height=self.h,
resize_type=self.params.resize_type,
)
meta["resize_info"] = resize_meta.to_dict()
return dict_inputs, meta

def _resize_detections(self, detection_result: DetectionResult, meta: dict):
"""Resizes detection bounding boxes according to initial image shape.

Expand All @@ -68,26 +80,24 @@ def _resize_detections(self, detection_result: DetectionResult, meta: dict):
detection_result (DetectionList): detection result with coordinates in normalized form
meta (dict): the input metadata obtained from `preprocess` method
"""
input_img_height, input_img_widht = meta["original_shape"][:2]
inverted_scale_x = input_img_widht / self.w
inverted_scale_y = input_img_height / self.h
pad_left = 0
pad_top = 0
resize_type = self.params.resize_type
if resize_type == "fit_to_window" or resize_type == "fit_to_window_letterbox":
inverted_scale_x = inverted_scale_y = max(
inverted_scale_x,
inverted_scale_y,
input_img_height, input_img_width = meta["original_shape"][:2]

if "resize_info" in meta:
resize_meta = ResizeMetadata.from_dict(meta["resize_info"])
else:
resize_meta = ResizeMetadata.compute(
original_width=input_img_width,
original_height=input_img_height,
model_width=self.w,
model_height=self.h,
resize_type=self.params.resize_type,
)
if resize_type == "fit_to_window_letterbox":
pad_left = (self.w - round(input_img_widht / inverted_scale_x)) // 2
pad_top = (self.h - round(input_img_height / inverted_scale_y)) // 2

boxes = detection_result.bboxes
boxes[:, 0::2] = (boxes[:, 0::2] * self.w - pad_left) * inverted_scale_x
boxes[:, 1::2] = (boxes[:, 1::2] * self.h - pad_top) * inverted_scale_y
boxes[:, 0::2] = (boxes[:, 0::2] * self.w - resize_meta.pad_left) * resize_meta.inverted_scale_x
boxes[:, 1::2] = (boxes[:, 1::2] * self.h - resize_meta.pad_top) * resize_meta.inverted_scale_y
np.round(boxes, out=boxes)
boxes[:, 0::2] = np.clip(boxes[:, 0::2], 0, input_img_widht)
boxes[:, 0::2] = np.clip(boxes[:, 0::2], 0, input_img_width)
boxes[:, 1::2] = np.clip(boxes[:, 1::2], 0, input_img_height)
detection_result.bboxes = boxes.astype(np.int32)

Expand Down
64 changes: 45 additions & 19 deletions src/model_api/models/image_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def _get_inputs(self) -> tuple[list[str], ...]:
)
return image_blob_names, image_info_blob_names

def preprocess(self, inputs: np.ndarray) -> list[dict]:
def base_preprocess(self, inputs: np.ndarray) -> list[dict]:
"""Data preprocess method

It performs basic preprocessing of a single image:
Expand All @@ -173,35 +173,61 @@ def preprocess(self, inputs: np.ndarray) -> list[dict]:
}
- the input metadata, which might be used in `postprocess` method
"""
original_shape = inputs.shape

if self.params.embedded_processing:
processed_image = inputs[None]
if self._is_dynamic:
h, w, c = inputs.shape
resized_shape = (w, h, c)
else:
resized_shape = (self.w, self.h, self.c)
elif self._is_dynamic:
dict_inputs, meta = self._preprocess_embedded(inputs)
dict_inputs, meta = self.preprocess(dict_inputs, meta)
return [dict_inputs, meta]

# 1. Resize
resized_image, meta = self._resize_image(inputs)

# 2. Transform
processed_image = self._input_transform(resized_image)

# 3. Layout
processed_image = self._change_layout(processed_image)

# 4. Pack
dict_inputs = {self.image_blob_name: processed_image}

# 5. Model-specific preprocess
dict_inputs, meta = self.preprocess(dict_inputs, meta)

return [dict_inputs, meta]

def _preprocess_embedded(self, inputs: np.ndarray) -> tuple[dict, dict]:
original_shape = inputs.shape
processed_image = inputs[None]
if self._is_dynamic:
h, w, c = inputs.shape
resized_shape = (w, h, c)
processed_image = self.input_transform(inputs)
processed_image = self._change_layout(processed_image)
else:
# Fixed model without embedded preprocessing
resized_shape = (self.w, self.h, self.c)

resized_image = self.resize(inputs, (self.w, self.h), pad_value=self.params.pad_value)
processed_image = self.input_transform(resized_image)
processed_image = self._change_layout(processed_image)

return [
return (
{self.image_blob_name: processed_image},
{
"original_shape": original_shape,
"resized_shape": resized_shape,
},
]
)

def _resize_image(self, image: np.ndarray) -> tuple[np.ndarray, dict]:
original_shape = image.shape
if self._is_dynamic:
h, w, c = image.shape
resized_shape = (w, h, c)
return image, {"original_shape": original_shape, "resized_shape": resized_shape}

resized_shape = (self.w, self.h, self.c)
resized_image = self.resize(image, (self.w, self.h), pad_value=self.params.pad_value)
return resized_image, {"original_shape": original_shape, "resized_shape": resized_shape}

def _input_transform(self, image: np.ndarray) -> np.ndarray:
return self.input_transform(image)

def preprocess(self, dict_inputs: dict, meta: dict) -> tuple[dict, dict]:
return dict_inputs, meta

def _change_layout(self, image: np.ndarray) -> np.ndarray:
"""Changes the input image layout to fit the layout of the model input layer.
Expand Down
34 changes: 17 additions & 17 deletions src/model_api/models/instance_segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from .image_model import ImageModel
from .parameters import ParameterRegistry
from .result import InstanceSegmentationResult
from .utils import load_labels
from .utils import ResizeMetadata, load_labels


class MaskRCNNModel(ImageModel):
Expand Down Expand Up @@ -95,8 +95,7 @@ def _get_segmentoly_outputs(self) -> dict:
)
return outputs

def preprocess(self, inputs: np.ndarray) -> list[dict]:
dict_inputs, meta = super().preprocess(inputs)
def preprocess(self, dict_inputs: dict, meta: dict) -> tuple[dict, dict]:
input_image_size = meta["resized_shape"][:2]
if self.is_segmentoly:
assert len(self.image_info_blob_names) == 1
Expand All @@ -105,7 +104,7 @@ def preprocess(self, inputs: np.ndarray) -> list[dict]:
dtype=np.float32,
)
dict_inputs[self.image_info_blob_names[0]] = input_image_info
return [dict_inputs, meta]
return dict_inputs, meta

def postprocess(self, outputs: dict, meta: dict) -> InstanceSegmentationResult:
if (
Expand Down Expand Up @@ -141,20 +140,21 @@ def postprocess(self, outputs: dict, meta: dict) -> InstanceSegmentationResult:
meta["original_shape"][1],
meta["original_shape"][0],
)
invertedScaleX, invertedScaleY = (
inputImgWidth / self.orig_width,
inputImgHeight / self.orig_height,
resize_meta = ResizeMetadata.compute(
original_width=inputImgWidth,
original_height=inputImgHeight,
model_width=self.orig_width,
model_height=self.orig_height,
resize_type=self.params.resize_type,
)

boxes -= (resize_meta.pad_left, resize_meta.pad_top, resize_meta.pad_left, resize_meta.pad_top)
boxes *= (
resize_meta.inverted_scale_x,
resize_meta.inverted_scale_y,
resize_meta.inverted_scale_x,
resize_meta.inverted_scale_y,
)
padLeft, padTop = 0, 0
resize_type = self.params.resize_type
if resize_type == "fit_to_window" or resize_type == "fit_to_window_letterbox":
invertedScaleX = invertedScaleY = max(invertedScaleX, invertedScaleY)
if resize_type == "fit_to_window_letterbox":
padLeft = (self.orig_width - round(inputImgWidth / invertedScaleX)) // 2
padTop = (self.orig_height - round(inputImgHeight / invertedScaleY)) // 2

boxes -= (padLeft, padTop, padLeft, padTop)
boxes *= (invertedScaleX, invertedScaleY, invertedScaleX, invertedScaleY)
np.around(boxes, out=boxes)
np.clip(
boxes,
Expand Down
Loading
Loading