Skip to content

Commit c8d2811

Browse files
Add sigmoid smoothing to instance segmentation post-processing of YOLO models family in inference-models (#2217)
* Add sigmoid smoothing to instance segmentation post-processing of YOLO models family in inference-models * Adjust tests cases * Make linters happy * Adjust tests cases * Adjust tests cases * Update docs * Bump version of inference requirements
1 parent 410890a commit c8d2811

23 files changed

Lines changed: 221 additions & 151 deletions

inference_models/docs/changelog.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,25 @@
11
# Changelog
22

3+
## `0.24.3`
4+
5+
### Changed
6+
7+
- Added `sigmoid` smoothing for instance-segmentation masks in YOLOv8, YOLOv11, YOLOv12 models family.
8+
Smoothing can be enabled / disabled via `masks_smoothing_enabled` parameter of `post_process(...)` method
9+
(which can be passed as `**kwarg` to `forward(...)`) with default set with
10+
`INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_MASKS_SMOOTHING_ENABLED` (set to `True`). Additionally, the binarization
11+
threshold for masks can be controlled via `masks_binarization_threshold` parameter - default to be
12+
controlled with `INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_MASKS_BINARIZATION_THRESHOLD` (set to `0.5` or `0.0`
13+
depending on `INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_MASKS_SMOOTHING_ENABLED`).
14+
15+
!!! warning "Instance-segmentation masks will change"
16+
17+
Due to smoothing, there is slight change to segmentation masks expected - mainly regarding edges
18+
of predictions which should be smoother now. Change is dictated by alignment to old `inference` versions
19+
behaviour, effectively drifting from `ultralytics` post-processing.
20+
21+
---
22+
323
## `0.24.2`
424

525
### Fixed

inference_models/inference_models/configuration.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,18 @@
350350
variable_name="INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_CLASS_AGNOSTIC_NMS",
351351
default=INFERENCE_MODELS_DEFAULT_CLASS_AGNOSTIC_NMS,
352352
)
353+
INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_MASKS_SMOOTHING_ENABLED = get_boolean_from_env(
354+
variable_name="INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_MASKS_SMOOTHING_ENABLED",
355+
default=True,
356+
)
357+
INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_MASKS_BINARIZATION_THRESHOLD = get_float_from_env(
358+
variable_name="INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_MASKS_BINARIZATION_THRESHOLD",
359+
default=(
360+
0.5
361+
if INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_MASKS_SMOOTHING_ENABLED
362+
else 0.0
363+
),
364+
)
353365
INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_KEY_POINTS_THRESHOLD = get_float_from_env(
354366
variable_name="INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_KEY_POINTS_THRESHOLD",
355367
default=0.0,

inference_models/inference_models/models/auto_loaders/auto_resolution_cache.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,8 @@ def invalidate(self, auto_negotiation_hash: str) -> None:
125125

126126

127127
def generate_auto_resolution_cache_path(auto_negotiation_hash: str) -> str:
128-
return os.path.abspath(os.path.join(
129-
INFERENCE_HOME, "auto-resolution-cache", f"{auto_negotiation_hash}.json"
130-
))
128+
return os.path.abspath(
129+
os.path.join(
130+
INFERENCE_HOME, "auto-resolution-cache", f"{auto_negotiation_hash}.json"
131+
)
132+
)

inference_models/inference_models/models/auto_loaders/core.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1510,7 +1510,9 @@ def generate_shared_blobs_path() -> str:
15101510
def generate_model_package_cache_path(model_id: str, package_id: str) -> str:
15111511
ensure_package_id_is_os_safe(model_id=model_id, package_id=package_id)
15121512
model_id_slug = slugify_model_id_to_os_safe_format(model_id=model_id)
1513-
return os.path.abspath(os.path.join(INFERENCE_HOME, "models-cache", model_id_slug, package_id))
1513+
return os.path.abspath(
1514+
os.path.join(INFERENCE_HOME, "models-cache", model_id_slug, package_id)
1515+
)
15141516

15151517

15161518
def ensure_package_id_is_os_safe(model_id: str, package_id: str) -> None:

inference_models/inference_models/models/common/roboflow/post_processing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,7 @@ def crop_masks_to_boxes(
314314
scaling: float = 0.25,
315315
) -> torch.Tensor:
316316
n, h, w = masks.shape
317-
scaled_boxes = boxes * scaling
317+
scaled_boxes = torch.round(boxes * scaling)
318318
x1, y1, x2, y2 = (
319319
scaled_boxes[:, 0][:, None, None],
320320
scaled_boxes[:, 1][:, None, None],

inference_models/inference_models/models/yololite/yololite_object_detection_onnx.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -200,11 +200,18 @@ def post_process(
200200
**kwargs,
201201
) -> List[Detections]:
202202
# Backward compatibility: earlier model packages have no post_processing config — always unfused 3-tensor output
203-
if self._inference_config.post_processing and self._inference_config.post_processing.fused:
203+
if (
204+
self._inference_config.post_processing
205+
and self._inference_config.post_processing.fused
206+
):
204207
nms_results = self._post_process_fused(model_results, confidence)
205208
else:
206209
nms_results = self._post_process_unfused(
207-
model_results, confidence, iou_threshold, max_detections, class_agnostic_nms,
210+
model_results,
211+
confidence,
212+
iou_threshold,
213+
max_detections,
214+
class_agnostic_nms,
208215
)
209216
rescaled_results = rescale_detections(
210217
detections=nms_results,
@@ -228,7 +235,9 @@ def _post_process_fused(
228235
) -> List[torch.Tensor]:
229236
# Single output tensor [B, max_det, 6]: x1, y1, x2, y2, conf, class_id
230237
output = model_results[0]
231-
return post_process_nms_fused_model_output(output=output, conf_thresh=confidence)
238+
return post_process_nms_fused_model_output(
239+
output=output, conf_thresh=confidence
240+
)
232241

233242
def _post_process_unfused(
234243
self,
@@ -240,7 +249,9 @@ def _post_process_unfused(
240249
) -> List[torch.Tensor]:
241250
# Decoded outputs without fused NMS: boxes_xyxy [B,N,4], obj_logits [B,N,1], cls_logits [B,N,C]
242251
boxes_xyxy, obj_logits, cls_logits = (
243-
model_results[0], model_results[1], model_results[2],
252+
model_results[0],
253+
model_results[1],
254+
model_results[2],
244255
)
245256
obj_conf = torch.sigmoid(obj_logits)
246257
cls_conf = torch.sigmoid(cls_logits)

inference_models/inference_models/models/yolov8/yolov8_instance_segmentation_onnx.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_CLASS_AGNOSTIC_NMS,
1515
INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_CONFIDENCE,
1616
INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_IOU_THRESHOLD,
17+
INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_MASKS_BINARIZATION_THRESHOLD,
18+
INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_MASKS_SMOOTHING_ENABLED,
1719
INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_MAX_DETECTIONS,
1820
)
1921
from inference_models.entities import ColorFormat
@@ -207,6 +209,8 @@ def post_process(
207209
iou_threshold: float = INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_IOU_THRESHOLD,
208210
max_detections: int = INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_MAX_DETECTIONS,
209211
class_agnostic_nms: bool = INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_CLASS_AGNOSTIC_NMS,
212+
masks_smoothing_enabled: bool = INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_MASKS_SMOOTHING_ENABLED,
213+
masks_binarization_threshold: float = INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_MASKS_BINARIZATION_THRESHOLD,
210214
**kwargs,
211215
) -> List[InstanceDetections]:
212216
instances, protos = model_results
@@ -230,6 +234,8 @@ def post_process(
230234
protos=image_protos,
231235
masks_in=image_bboxes[:, 6:],
232236
)
237+
if masks_smoothing_enabled:
238+
pre_processed_masks = torch.nn.functional.sigmoid(pre_processed_masks)
233239
cropped_masks = crop_masks_to_boxes(
234240
image_bboxes[:, :4], pre_processed_masks
235241
)
@@ -249,6 +255,7 @@ def post_process(
249255
size_after_pre_processing=image_meta.size_after_pre_processing,
250256
inference_size=image_meta.inference_size,
251257
static_crop_offset=image_meta.static_crop_offset,
258+
binarization_threshold=masks_binarization_threshold,
252259
)
253260
final_results.append(
254261
InstanceDetections(

inference_models/inference_models/models/yolov8/yolov8_instance_segmentation_torch_script.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_CLASS_AGNOSTIC_NMS,
1616
INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_CONFIDENCE,
1717
INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_IOU_THRESHOLD,
18+
INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_MASKS_BINARIZATION_THRESHOLD,
19+
INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_MASKS_SMOOTHING_ENABLED,
1820
INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_MAX_DETECTIONS,
1921
)
2022
from inference_models.entities import ColorFormat
@@ -170,6 +172,8 @@ def post_process(
170172
iou_threshold: float = INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_IOU_THRESHOLD,
171173
max_detections: int = INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_MAX_DETECTIONS,
172174
class_agnostic_nms: bool = INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_CLASS_AGNOSTIC_NMS,
175+
masks_smoothing_enabled: bool = INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_MASKS_SMOOTHING_ENABLED,
176+
masks_binarization_threshold: float = INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_MASKS_BINARIZATION_THRESHOLD,
173177
**kwargs,
174178
) -> List[InstanceDetections]:
175179
instances, protos = model_results
@@ -193,6 +197,8 @@ def post_process(
193197
protos=image_protos,
194198
masks_in=image_bboxes[:, 6:],
195199
)
200+
if masks_smoothing_enabled:
201+
pre_processed_masks = torch.nn.functional.sigmoid(pre_processed_masks)
196202
cropped_masks = crop_masks_to_boxes(
197203
image_bboxes[:, :4], pre_processed_masks
198204
)
@@ -212,6 +218,7 @@ def post_process(
212218
size_after_pre_processing=image_meta.size_after_pre_processing,
213219
inference_size=image_meta.inference_size,
214220
static_crop_offset=image_meta.static_crop_offset,
221+
binarization_threshold=masks_binarization_threshold,
215222
)
216223
final_results.append(
217224
InstanceDetections(

inference_models/inference_models/models/yolov8/yolov8_instance_segmentation_trt.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_CLASS_AGNOSTIC_NMS,
1616
INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_CONFIDENCE,
1717
INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_IOU_THRESHOLD,
18+
INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_MASKS_BINARIZATION_THRESHOLD,
19+
INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_MASKS_SMOOTHING_ENABLED,
1820
INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_MAX_DETECTIONS,
1921
)
2022
from inference_models.entities import ColorFormat
@@ -265,6 +267,8 @@ def post_process(
265267
iou_threshold: float = INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_IOU_THRESHOLD,
266268
max_detections: int = INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_MAX_DETECTIONS,
267269
class_agnostic_nms: bool = INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_CLASS_AGNOSTIC_NMS,
270+
masks_smoothing_enabled: bool = INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_MASKS_SMOOTHING_ENABLED,
271+
masks_binarization_threshold: float = INFERENCE_MODELS_YOLO_ULTRALYTICS_DEFAULT_MASKS_BINARIZATION_THRESHOLD,
268272
**kwargs,
269273
) -> List[InstanceDetections]:
270274
with torch.cuda.stream(self._post_process_stream):
@@ -291,6 +295,10 @@ def post_process(
291295
protos=image_protos,
292296
masks_in=image_bboxes[:, 6:],
293297
)
298+
if masks_smoothing_enabled:
299+
pre_processed_masks = torch.nn.functional.sigmoid(
300+
pre_processed_masks
301+
)
294302
cropped_masks = crop_masks_to_boxes(
295303
image_bboxes[:, :4], pre_processed_masks
296304
)
@@ -310,6 +318,7 @@ def post_process(
310318
size_after_pre_processing=image_meta.size_after_pre_processing,
311319
inference_size=image_meta.inference_size,
312320
static_crop_offset=image_meta.static_crop_offset,
321+
binarization_threshold=masks_binarization_threshold,
313322
)
314323
final_results.append(
315324
InstanceDetections(

inference_models/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "inference-models"
3-
version = "0.24.2"
3+
version = "0.24.3"
44
description = "The new inference engine for Computer Vision models"
55
readme = "README.md"
66
requires-python = ">=3.10,<3.13"

0 commit comments

Comments
 (0)