1+ //
2+ // Created by wangzijian.
3+ //
4+
5+ #include " trt_yolov11.h"
6+ using trtcv::TRTYOLOV11 ;
7+
8+ void TRTYOLOV11::nms (std::vector<types::Boxf> &input, std::vector<types::Boxf> &output,
9+ float iou_threshold, unsigned int topk, unsigned int nms_type)
10+ {
11+ if (nms_type == NMS ::BLEND ) lite::utils::blending_nms (input, output, iou_threshold, topk);
12+ else if (nms_type == NMS ::OFFSET ) lite::utils::offset_nms (input, output, iou_threshold, topk);
13+ else lite::utils::hard_nms (input, output, iou_threshold, topk);
14+ }
15+
16+ void TRTYOLOV11::generate_bboxes (std::vector<types::Boxf> &bbox_collection, float * output, float score_threshold,
17+ float scale, float pad_w, float pad_h) {
18+ auto pred_dims = output_node_dims[0 ]; // [1, 84, 8400]
19+ const unsigned int num_anchors = pred_dims[2 ];
20+ const unsigned int num_classes = pred_dims[1 ] - 4 ;
21+
22+ bbox_collection.clear ();
23+ unsigned int count = 0 ;
24+
25+ for (unsigned int i = 0 ; i < num_anchors; ++i) {
26+ float max_cls_conf = -1 .f ;
27+ unsigned int label = 0 ;
28+
29+ // 寻找最大类别分数
30+ for (unsigned int j = 0 ; j < num_classes; ++j) {
31+ float cls_score = output[(4 + j) * num_anchors + i];
32+ if (cls_score > max_cls_conf) {
33+ max_cls_conf = cls_score;
34+ label = j;
35+ }
36+ }
37+
38+ if (max_cls_conf < score_threshold) continue ;
39+
40+ float cx = output[0 * num_anchors + i];
41+ float cy = output[1 * num_anchors + i];
42+ float w = output[2 * num_anchors + i];
43+ float h = output[3 * num_anchors + i];
44+
45+ float x1_net = cx - w / 2 .f ;
46+ float y1_net = cy - h / 2 .f ;
47+
48+ float x1 = (x1_net - pad_w) / scale;
49+ float y1 = (y1_net - pad_h) / scale;
50+ float w_original = w / scale;
51+ float h_original = h / scale;
52+
53+ float x2 = x1 + w_original;
54+ float y2 = y1 + h_original;
55+
56+ types::Boxf box;
57+ box.x1 = std::max (0 .f , x1);
58+ box.y1 = std::max (0 .f , y1);
59+ box.x2 = x2;
60+ box.y2 = y2;
61+ box.score = max_cls_conf;
62+ box.label = label;
63+ box.label_text = class_names[label];
64+ box.flag = true ;
65+ bbox_collection.push_back (box);
66+
67+ count += 1 ;
68+ if (count > max_nms)
69+ break ;
70+ }
71+
72+ #if LITETRT_DEBUG
73+ std::cout << " detected num_anchors: " << num_anchors << " \n " ;
74+ std::cout << " generate_bboxes num: " << bbox_collection.size () << " \n " ;
75+ #endif
76+ }
77+
78+ void TRTYOLOV11::letterbox (const cv::Mat &image, cv::Mat &out_image,
79+ const cv::Size &new_shape,
80+ int stride, const cv::Scalar &color,
81+ bool fixed_shape, bool scale_up) {
82+ cv::Size shape = image.size ();
83+ float r = std::min ((float )new_shape.height / (float )shape.height ,
84+ (float )new_shape.width / (float )shape.width );
85+ if (!scale_up) {
86+ r = std::min (r, 1 .0f );
87+ }
88+
89+ int new_unpad_w = int (round (shape.width * r));
90+ int new_unpad_h = int (round (shape.height * r));
91+ int dw = new_shape.width - new_unpad_w;
92+ int dh = new_shape.height - new_unpad_h;
93+
94+ if (fixed_shape) {
95+ dw = dw % stride;
96+ dh = dh % stride;
97+ }
98+
99+ dw /= 2 ;
100+ dh /= 2 ;
101+
102+ if (shape.width != new_unpad_w || shape.height != new_unpad_h) {
103+ cv::resize (image, out_image, cv::Size (new_unpad_w, new_unpad_h));
104+ } else {
105+ out_image = image;
106+ }
107+
108+ int top = int (round (dh - 0.1 ));
109+ int bottom = int (round (dh + 0.1 ));
110+ int left = int (round (dw - 0.1 ));
111+ int right = int (round (dw + 0.1 ));
112+
113+ cv::copyMakeBorder (out_image, out_image, top, bottom, left, right, cv::BORDER_CONSTANT , color);
114+
115+ if (out_image.size () != new_shape) {
116+ cv::resize (out_image, out_image, new_shape);
117+ }
118+ }
119+
120+ void TRTYOLOV11::preprocess (cv::Mat &input_image) {
121+ // 1. Convert BGR -> RGB
122+ cv::cvtColor (input_image, input_image, cv::COLOR_BGR2RGB );
123+ // 2. Normalize (0-255 -> 0.0-1.0)
124+ input_image.convertTo (input_image, CV_32F , scale_val, mean_val);
125+ }
126+
127+ // main func
128+ void TRTYOLOV11::detect (const cv::Mat &mat, std::vector<types::Boxf> &detected_boxes, float score_threshold,
129+ float iou_threshold, unsigned int topk, unsigned int nms_type) {
130+
131+ if (mat.empty ()) return ;
132+
133+
134+ int target_h = input_node_dims[2 ];
135+ int target_w = input_node_dims[3 ];
136+ int img_h = mat.rows ;
137+ int img_w = mat.cols ;
138+
139+
140+ float r = std::min ((float )target_h / img_h, (float )target_w / img_w);
141+ int new_unpad_w = int (round (img_w * r));
142+ int new_unpad_h = int (round (img_h * r));
143+
144+ int dw = (target_w - new_unpad_w) / 2 ;
145+ int dh = (target_h - new_unpad_h) / 2 ;
146+
147+ cv::Mat mat_rs;
148+ if (img_h != new_unpad_h || img_w != new_unpad_w) {
149+ cv::resize (mat, mat_rs, cv::Size (new_unpad_w, new_unpad_h));
150+ } else {
151+ mat_rs = mat.clone ();
152+ }
153+
154+ int top = dh;
155+ int bottom = target_h - new_unpad_h - top;
156+ int left = dw;
157+ int right = target_w - new_unpad_w - left;
158+
159+ cv::copyMakeBorder (mat_rs, mat_rs, top, bottom, left, right, cv::BORDER_CONSTANT , cv::Scalar (114 , 114 , 114 ));
160+ // -------------------------------
161+
162+ preprocess (mat_rs);
163+
164+ // 1. Make the input (HWC -> CHW)
165+ std::vector<float > input;
166+ trtcv::utils::transform::create_tensor (mat_rs, input, input_node_dims, trtcv::utils::transform::CHW );
167+
168+ // 2. Inference
169+ cudaMemcpyAsync (buffers[0 ], input.data (),
170+ input_node_dims[0 ] * input_node_dims[1 ] * input_node_dims[2 ] * input_node_dims[3 ] * sizeof (float ),
171+ cudaMemcpyHostToDevice, stream);
172+
173+ cudaStreamSynchronize (stream);
174+
175+ bool status = trt_context->enqueueV3 (stream); // TensorRT 8.5+ usage
176+ if (!status){
177+ std::cerr << " Failed to infer by TensorRT." << std::endl;
178+ return ;
179+ }
180+
181+ cudaStreamSynchronize (stream);
182+
183+ // D -> H
184+ auto pred_dims = output_node_dims[0 ];
185+ size_t output_size = pred_dims[0 ] * pred_dims[1 ] * pred_dims[2 ];
186+ std::vector<float > output (output_size);
187+
188+ cudaMemcpyAsync (output.data (), buffers[1 ], output_size * sizeof (float ),
189+ cudaMemcpyDeviceToHost, stream);
190+ cudaStreamSynchronize (stream);
191+
192+ // 3. postprocess
193+ std::vector<types::Boxf> bbox_collection;
194+
195+ // restore letterbox
196+ generate_bboxes (bbox_collection, output.data (), score_threshold, r, (float )left, (float )top);
197+
198+ nms (bbox_collection, detected_boxes, iou_threshold, topk, nms_type);
199+ }
0 commit comments