Skip to content

Commit e53c332

Browse files
[TRT] Support YOLO 11 (#468)
1 parent a54d337 commit e53c332

6 files changed

Lines changed: 311 additions & 0 deletions

File tree

examples/lite/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,4 +110,5 @@ add_lite_executable(lite_face_swap cv)
110110
add_lite_executable(lite_face_restoration cv)
111111
add_lite_executable(lite_facefusion_pipeline cv)
112112
add_lite_executable(lite_yolov8 cv)
113+
add_lite_executable(lite_yolov11 cv)
113114
add_lite_executable(lite_sd_pipeline sd)
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
//
2+
// Created by wangizijian on 24-7-8.
3+
//
4+
5+
#include "lite/lite.h"
6+
7+
8+
9+
static void test_tensorrt()
10+
{
11+
#ifdef ENABLE_TENSORRT
12+
std::string engine_path = "/root/autodl-tmp/lite.ai.toolkit/examples/hub/onnx/cv/yolo11_fp32.engine";
13+
std::string test_img_path = "/root/autodl-tmp/lite.ai.toolkit/examples/lite/resources/test_lite_yolov5_2.jpg";
14+
std::string save_img_path = "/root/autodl-tmp/lite.ai.toolkit/examples/test_lite_yolov8_trt_1.jpg";
15+
16+
lite::trt::cv::detection::YOLOV11 *yolov11 = new lite::trt::cv::detection::YOLOV11(engine_path);
17+
18+
cv::Mat test_image = cv::imread(test_img_path);
19+
20+
std::vector<lite::types::Boxf> detected_boxes;
21+
22+
yolov11->detect(test_image,detected_boxes,0.5f,0.4f);
23+
24+
std::cout<<"trt yolov8 detect done!"<<std::endl;
25+
lite::utils::draw_boxes_inplace(test_image, detected_boxes);
26+
cv::imwrite(save_img_path, test_image);
27+
28+
delete yolov11;
29+
#endif
30+
}
31+
32+
static void test_lite()
33+
{
34+
test_tensorrt();
35+
}
36+
37+
38+
39+
int main(__unused int argc, __unused char *argv[])
40+
{
41+
test_lite();
42+
return 0;
43+
}

lite/models.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@
135135
#include "lite/trt/core/trt_core.h"
136136
#include "lite/trt/cv/trt_yolofacev8.h"
137137
#include "lite/trt/cv/trt_yolov5.h"
138+
#include "lite/trt/cv/trt_yolov11.h"
138139
#include "lite/trt/cv/trt_yolox.h"
139140
#include "lite/trt/cv/trt_yolov8.h"
140141
#include "lite/trt/cv/trt_yolov6.h"
@@ -764,6 +765,7 @@ namespace lite{
764765
typedef trtcv::TRTYoloFaceV8 _TRT_YOLOFaceNet;
765766
typedef trtcv::TRTYoloV5 _TRT_YOLOv5;
766767
typedef trtcv::TRTYoloV8 _TRT_YOLOv8;
768+
typedef trtcv::TRTYOLOV11 _TRT_YOLOV11;
767769
typedef trtcv::TRTYoloX _TRT_YoloX;
768770
typedef trtcv::TRTYoloV6 _TRT_YOLOv6;
769771
typedef trtcv::TRTYOLO5Face _TRT_YOLO5Face;
@@ -789,6 +791,7 @@ namespace lite{
789791
typedef _TRT_YOLOv8 YOLOV8;
790792
typedef _TRT_YoloX YoloX;
791793
typedef _TRT_YOLOv6 YOLOV6;
794+
typedef _TRT_YOLOV11 YOLOV11;
792795
}
793796
namespace face
794797
{

lite/trt/core/trt_core.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ namespace trtcv{
1616
class LITE_EXPORTS TRTYoloV8; // [4] * reference: https://github.com/ultralytics/ultralytics/tree/main
1717
class LITE_EXPORTS TRTYoloV6; // [5] * reference: https://github.com/meituan/YOLOv6
1818
class LITE_EXPORTS TRTYOLO5Face; // [6] * reference: https://github.com/deepcam-cn/yolov5-face
19+
class LITE_EXPORTS TRTYOLOV11;
1920
}
2021

2122
namespace trtcv{

lite/trt/cv/trt_yolov11.cpp

Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
//
2+
// Created by wangzijian.
3+
//
4+
5+
#include "trt_yolov11.h"
6+
using trtcv::TRTYOLOV11;
7+
8+
void TRTYOLOV11::nms(std::vector<types::Boxf> &input, std::vector<types::Boxf> &output,
9+
float iou_threshold, unsigned int topk, unsigned int nms_type)
10+
{
11+
if (nms_type == NMS::BLEND) lite::utils::blending_nms(input, output, iou_threshold, topk);
12+
else if (nms_type == NMS::OFFSET) lite::utils::offset_nms(input, output, iou_threshold, topk);
13+
else lite::utils::hard_nms(input, output, iou_threshold, topk);
14+
}
15+
16+
void TRTYOLOV11::generate_bboxes(std::vector<types::Boxf> &bbox_collection, float* output, float score_threshold,
17+
float scale, float pad_w, float pad_h) {
18+
auto pred_dims = output_node_dims[0]; // [1, 84, 8400]
19+
const unsigned int num_anchors = pred_dims[2];
20+
const unsigned int num_classes = pred_dims[1] - 4;
21+
22+
bbox_collection.clear();
23+
unsigned int count = 0;
24+
25+
for (unsigned int i = 0; i < num_anchors; ++i) {
26+
float max_cls_conf = -1.f;
27+
unsigned int label = 0;
28+
29+
// 寻找最大类别分数
30+
for (unsigned int j = 0; j < num_classes; ++j) {
31+
float cls_score = output[(4 + j) * num_anchors + i];
32+
if (cls_score > max_cls_conf) {
33+
max_cls_conf = cls_score;
34+
label = j;
35+
}
36+
}
37+
38+
if (max_cls_conf < score_threshold) continue;
39+
40+
float cx = output[0 * num_anchors + i];
41+
float cy = output[1 * num_anchors + i];
42+
float w = output[2 * num_anchors + i];
43+
float h = output[3 * num_anchors + i];
44+
45+
float x1_net = cx - w / 2.f;
46+
float y1_net = cy - h / 2.f;
47+
48+
float x1 = (x1_net - pad_w) / scale;
49+
float y1 = (y1_net - pad_h) / scale;
50+
float w_original = w / scale;
51+
float h_original = h / scale;
52+
53+
float x2 = x1 + w_original;
54+
float y2 = y1 + h_original;
55+
56+
types::Boxf box;
57+
box.x1 = std::max(0.f, x1);
58+
box.y1 = std::max(0.f, y1);
59+
box.x2 = x2;
60+
box.y2 = y2;
61+
box.score = max_cls_conf;
62+
box.label = label;
63+
box.label_text = class_names[label];
64+
box.flag = true;
65+
bbox_collection.push_back(box);
66+
67+
count += 1;
68+
if (count > max_nms)
69+
break;
70+
}
71+
72+
#if LITETRT_DEBUG
73+
std::cout << "detected num_anchors: " << num_anchors << "\n";
74+
std::cout << "generate_bboxes num: " << bbox_collection.size() << "\n";
75+
#endif
76+
}
77+
78+
void TRTYOLOV11::letterbox(const cv::Mat &image, cv::Mat &out_image,
79+
const cv::Size &new_shape,
80+
int stride, const cv::Scalar &color,
81+
bool fixed_shape, bool scale_up) {
82+
cv::Size shape = image.size();
83+
float r = std::min((float)new_shape.height / (float)shape.height,
84+
(float)new_shape.width / (float)shape.width);
85+
if (!scale_up) {
86+
r = std::min(r, 1.0f);
87+
}
88+
89+
int new_unpad_w = int(round(shape.width * r));
90+
int new_unpad_h = int(round(shape.height * r));
91+
int dw = new_shape.width - new_unpad_w;
92+
int dh = new_shape.height - new_unpad_h;
93+
94+
if (fixed_shape) {
95+
dw = dw % stride;
96+
dh = dh % stride;
97+
}
98+
99+
dw /= 2;
100+
dh /= 2;
101+
102+
if (shape.width != new_unpad_w || shape.height != new_unpad_h) {
103+
cv::resize(image, out_image, cv::Size(new_unpad_w, new_unpad_h));
104+
} else {
105+
out_image = image;
106+
}
107+
108+
int top = int(round(dh - 0.1));
109+
int bottom = int(round(dh + 0.1));
110+
int left = int(round(dw - 0.1));
111+
int right = int(round(dw + 0.1));
112+
113+
cv::copyMakeBorder(out_image, out_image, top, bottom, left, right, cv::BORDER_CONSTANT, color);
114+
115+
if (out_image.size() != new_shape) {
116+
cv::resize(out_image, out_image, new_shape);
117+
}
118+
}
119+
120+
void TRTYOLOV11::preprocess(cv::Mat &input_image) {
121+
// 1. Convert BGR -> RGB
122+
cv::cvtColor(input_image, input_image, cv::COLOR_BGR2RGB);
123+
// 2. Normalize (0-255 -> 0.0-1.0)
124+
input_image.convertTo(input_image, CV_32F, scale_val, mean_val);
125+
}
126+
127+
// main func
128+
void TRTYOLOV11::detect(const cv::Mat &mat, std::vector<types::Boxf> &detected_boxes, float score_threshold,
129+
float iou_threshold, unsigned int topk, unsigned int nms_type) {
130+
131+
if (mat.empty()) return;
132+
133+
134+
int target_h = input_node_dims[2];
135+
int target_w = input_node_dims[3];
136+
int img_h = mat.rows;
137+
int img_w = mat.cols;
138+
139+
140+
float r = std::min((float)target_h / img_h, (float)target_w / img_w);
141+
int new_unpad_w = int(round(img_w * r));
142+
int new_unpad_h = int(round(img_h * r));
143+
144+
int dw = (target_w - new_unpad_w) / 2;
145+
int dh = (target_h - new_unpad_h) / 2;
146+
147+
cv::Mat mat_rs;
148+
if (img_h != new_unpad_h || img_w != new_unpad_w) {
149+
cv::resize(mat, mat_rs, cv::Size(new_unpad_w, new_unpad_h));
150+
} else {
151+
mat_rs = mat.clone();
152+
}
153+
154+
int top = dh;
155+
int bottom = target_h - new_unpad_h - top;
156+
int left = dw;
157+
int right = target_w - new_unpad_w - left;
158+
159+
cv::copyMakeBorder(mat_rs, mat_rs, top, bottom, left, right, cv::BORDER_CONSTANT, cv::Scalar(114, 114, 114));
160+
// -------------------------------
161+
162+
preprocess(mat_rs);
163+
164+
// 1. Make the input (HWC -> CHW)
165+
std::vector<float> input;
166+
trtcv::utils::transform::create_tensor(mat_rs, input, input_node_dims, trtcv::utils::transform::CHW);
167+
168+
// 2. Inference
169+
cudaMemcpyAsync(buffers[0], input.data(),
170+
input_node_dims[0] * input_node_dims[1] * input_node_dims[2] * input_node_dims[3] * sizeof(float),
171+
cudaMemcpyHostToDevice, stream);
172+
173+
cudaStreamSynchronize(stream);
174+
175+
bool status = trt_context->enqueueV3(stream); // TensorRT 8.5+ usage
176+
if (!status){
177+
std::cerr << "Failed to infer by TensorRT." << std::endl;
178+
return;
179+
}
180+
181+
cudaStreamSynchronize(stream);
182+
183+
// D -> H
184+
auto pred_dims = output_node_dims[0];
185+
size_t output_size = pred_dims[0] * pred_dims[1] * pred_dims[2];
186+
std::vector<float> output(output_size);
187+
188+
cudaMemcpyAsync(output.data(), buffers[1], output_size * sizeof(float),
189+
cudaMemcpyDeviceToHost, stream);
190+
cudaStreamSynchronize(stream);
191+
192+
// 3. postprocess
193+
std::vector<types::Boxf> bbox_collection;
194+
195+
// restore letterbox
196+
generate_bboxes(bbox_collection, output.data(), score_threshold, r, (float)left, (float)top);
197+
198+
nms(bbox_collection, detected_boxes, iou_threshold, topk, nms_type);
199+
}

lite/trt/cv/trt_yolov11.h

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
//
2+
// Created by wangzijian.
3+
//
4+
5+
#ifndef LITE_AI_TOOLKIT_TRT_YOLOV11_H
6+
#define LITE_AI_TOOLKIT_TRT_YOLOV11_H
7+
8+
#include "lite/trt/core/trt_core.h"
9+
#include "lite/utils.h"
10+
#include "lite/trt/core/trt_utils.h"
11+
#include <algorithm>
12+
13+
namespace trtcv {
14+
class LITE_EXPORTS TRTYOLOV11 : public BasicTRTHandler {
15+
public:
16+
explicit TRTYOLOV11(const std::string &_trt_model_path, unsigned int _num_threads = 1) :
17+
BasicTRTHandler(_trt_model_path, _num_threads) {};
18+
19+
~TRTYOLOV11() override = default;
20+
21+
private:
22+
static constexpr const float mean_val = 0.f;
23+
static constexpr const float scale_val = 1.0 / 255.f;
24+
const char *class_names[80] = {
25+
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
26+
"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
27+
"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
28+
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
29+
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
30+
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
31+
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard",
32+
"cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase",
33+
"scissors", "teddy bear", "hair drier", "toothbrush"
34+
};
35+
enum NMS {
36+
HARD = 0, BLEND = 1, OFFSET = 2
37+
};
38+
static constexpr const unsigned int max_nms = 30000;
39+
40+
private:
41+
void letterbox(const cv::Mat &image, cv::Mat &out_image,
42+
const cv::Size &new_shape,
43+
int stride, const cv::Scalar &color,
44+
bool fixed_shape, bool scale_up);
45+
46+
void preprocess(cv::Mat &input_image);
47+
48+
void generate_bboxes(std::vector<types::Boxf> &bbox_collection,
49+
float *output,
50+
float score_threshold,
51+
float scale, float pad_w, float pad_h);
52+
53+
void nms(std::vector<types::Boxf> &input, std::vector<types::Boxf> &output,
54+
float iou_threshold, unsigned int topk, unsigned int nms_type);
55+
56+
public:
57+
void detect(const cv::Mat &mat, std::vector<types::Boxf> &detected_boxes,
58+
float score_threshold = 0.25f, float iou_threshold = 0.45f,
59+
unsigned int topk = 100, unsigned int nms_type = NMS::OFFSET);
60+
};
61+
62+
} // namespace trtcv
63+
64+
#endif //LITE_AI_TOOLKIT_TRT_YOLOV11_H

0 commit comments

Comments
 (0)