forked from insight-platform/Savant
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconverter.py
More file actions
103 lines (82 loc) · 3.28 KB
/
converter.py
File metadata and controls
103 lines (82 loc) · 3.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
"""YOLOv8Pose postprocessing (converter)."""
from typing import Any, Tuple
import numpy as np
from numba.typed import List
from savant.base.converter import BaseComplexModelOutputConverter
from savant.base.model import ComplexModel
from savant.utils.nms import nms_cpu
class YoloV8PoseConverter(BaseComplexModelOutputConverter):
"""`YOLOv8Pose converter."""
def __init__(
self,
confidence_threshold: float = 0.6,
nms_iou_threshold: float = 0.45,
top_k: int = 100,
**kwargs,
):
super().__init__(**kwargs)
self.confidence_threshold = confidence_threshold
self.nms_iou_threshold = nms_iou_threshold
self.top_k = top_k
def __call__(
self,
*output_layers: np.ndarray,
model: ComplexModel,
roi: Tuple[float, float, float, float],
) -> Tuple[np.ndarray, List[List[Tuple[str, Any, float]]]]:
"""Converts output layer tensors to bboxes and key points.
:param output_layers: Output layers tensor
:param model: Model definition, required parameters: input tensor shape
:param roi: [top, left, width, height] of the rectangle
on which the model infers
:return: a combination of :py:class:`.BaseObjectModelOutputConverter` and
:py:class:`.BaseAttributeModelOutputConverter` outputs:
* BBox tensor ``(class_id, confidence, xc, yc, width, height, [angle])``
offset by roi upper left and scaled by roi width and height,
* list of attributes values with confidences
``(attr_name, value, confidence)``
"""
output = np.transpose(output_layers[0])
ret_empty = np.float32([]), []
confidences = output[:, 4]
keep = confidences > self.confidence_threshold
if not keep.any():
return ret_empty
output = output[keep]
bboxes = output[:, :4]
confidences = output[:, 4]
keep = nms_cpu(
bboxes,
confidences,
self.nms_iou_threshold,
self.top_k,
)
if not keep.any():
return ret_empty
output = output[keep]
# person class id = 0
class_ids = np.zeros((output.shape[0], 1), dtype=np.float32)
confidences = output[:, 4:5]
bboxes = output[:, :4]
key_points = output[:, 5:]
key_points = key_points.reshape(key_points.shape[0], -1, 3)
# scale
roi_left, roi_top, roi_width, roi_height = roi
if model.input.maintain_aspect_ratio:
ratio_x = ratio_y = min(
model.input.width / roi_width,
model.input.height / roi_height,
)
else:
ratio_x = model.input.width / roi_width
ratio_y = model.input.height / roi_height
bboxes /= np.float32([ratio_x, ratio_y, ratio_x, ratio_y])
bboxes[:, 0] += roi_left
bboxes[:, 1] += roi_top
key_points /= np.float32([ratio_x, ratio_y, 1.0])
key_points[:, 0] += roi_left
key_points[:, 1] += roi_top
bboxes = np.concatenate((class_ids, confidences, bboxes), axis=1)
attr_name = model.output.attributes[0].name
key_points = [[(attr_name, pts, 1.0)] for pts in key_points]
return bboxes, key_points