foundationpose_cpp/simple_tests/include/tests/help_func.hpp at 8b39573f7f28e60fada8a83ecf757b5b58e8adbd · zz990099/foundationpose_cpp · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
#pragma once

#include <tuple>
#include <opencv2/opencv.hpp>
#include <glog/logging.h>
#include <glog/log_severity.h>
#include <Eigen/Dense>
#include <fstream>

inline std::tuple<cv::Mat, cv::Mat, cv::Mat> ReadRgbDepthMask(const std::string &rgb_path,
                                                              const std::string &depth_path,
                                                              const std::string &mask_path)
{
  cv::Mat rgb   = cv::imread(rgb_path);
  cv::Mat depth = cv::imread(depth_path, cv::IMREAD_UNCHANGED);
  cv::Mat mask  = cv::imread(mask_path, cv::IMREAD_UNCHANGED);

  CHECK(!rgb.empty()) << "Failed reading rgb from path : " << rgb_path;
  CHECK(!depth.empty()) << "Failed reading depth from path : " << depth_path;
  CHECK(!mask.empty()) << "Failed reading mask from path : " << mask_path;

  depth.convertTo(depth, CV_32FC1);
  depth = depth / 1000.f;

  cv::cvtColor(rgb, rgb, cv::COLOR_BGR2RGB);
  if (mask.channels() == 3)
  {
    cv::cvtColor(mask, mask, cv::COLOR_BGR2RGB);
    std::vector<cv::Mat> channels;
    cv::split(mask, channels);
    mask = channels[0];
  }

  return {rgb, depth, mask};
}

inline std::tuple<cv::Mat, cv::Mat> ReadRgbDepth(const std::string &rgb_path,
                                                 const std::string &depth_path)
{
  cv::Mat rgb   = cv::imread(rgb_path);
  cv::Mat depth = cv::imread(depth_path, cv::IMREAD_UNCHANGED);

  CHECK(!rgb.empty()) << "Failed reading rgb from path : " << rgb_path;
  CHECK(!depth.empty()) << "Failed reading depth from path : " << depth_path;

  depth.convertTo(depth, CV_32FC1);
  depth = depth / 1000.f;

  cv::cvtColor(rgb, rgb, cv::COLOR_BGR2RGB);

  return {rgb, depth};
}

inline void draw3DBoundingBox(const Eigen::Matrix3f &intrinsic,
                              const Eigen::Matrix4f &pose,
                              int                    input_image_H,
                              int                    input_image_W,
                              const Eigen::Vector3f &dimension,
                              cv::Mat               &image)
{
  // 目标的长宽高
  float l = dimension(0) / 2;
  float w = dimension(1) / 2;
  float h = dimension(2) / 2;

  // 目标的八个顶点在物体坐标系中的位置
  Eigen::Vector3f points[8] = {
      {-l, -w, h},  {l, -w, h},   {l, w, h},   {-l, w, h},
      {-l, -w, -h}, {l, -w, -h},  {l, w, -h},  {-l, w, -h}
  };

  // 变换到世界坐标系
  Eigen::Vector4f transformed_points[8];
  for (int i = 0; i < 8; ++i)
  {
    transformed_points[i] = pose * Eigen::Vector4f(points[i](0), points[i](1), points[i](2), 1);
  }

  // 投影到图像平面
  std::vector<cv::Point2f> image_points;
  for (int i = 0; i < 8; ++i)
  {
    float x = transformed_points[i](0) / transformed_points[i](2);
    float y = transformed_points[i](1) / transformed_points[i](2);

    // 使用内参矩阵进行投影
    float u = intrinsic(0, 0) * x + intrinsic(0, 2);
    float v = intrinsic(1, 1) * y + intrinsic(1, 2);

    image_points.emplace_back(static_cast<float>(u), static_cast<float>(v));
  }

  // 绘制边框（连接顶点）
  std::vector<std::pair<int, int>> edges = {
      {0, 1}, {1, 2}, {2, 3}, {3, 0}, // 底面
      {4, 5}, {5, 6}, {6, 7}, {7, 4}, // 顶面
      {0, 4}, {1, 5}, {2, 6}, {3, 7}  // 侧面
  };

  for (const auto &edge : edges)
  {
    if (edge.first < image_points.size() && edge.second < image_points.size())
    {
      cv::line(image, image_points[edge.first], image_points[edge.second], cv::Scalar(0, 255, 0), 2); // 绿色边框
    }
  }

  // 计算物体中心在世界坐标系中的位置
  Eigen::Vector4f center_obj(0, 0, 0, 1);  // 物体坐标系中心原点
  Eigen::Vector4f center_world = pose * center_obj;

  // 定义三个轴的终点
  float axis_length = (dimension(0) + dimension(1) + dimension(2)) / 6;
  Eigen::Vector4f x_end_obj(axis_length, 0, 0, 1);
  Eigen::Vector4f y_end_obj(0, axis_length, 0, 1);
  Eigen::Vector4f z_end_obj(0, 0, axis_length, 1);

  Eigen::Vector4f x_end_world = pose * x_end_obj;
  Eigen::Vector4f y_end_world = pose * y_end_obj;
  Eigen::Vector4f z_end_world = pose * z_end_obj;

  std::vector<Eigen::Vector4f> axis_end_points = {x_end_world, y_end_world, z_end_world};
  std::vector<cv::Scalar> axis_colors = {
      cv::Scalar(0, 0, 255),     // X轴: 红色
      cv::Scalar(0, 255, 0),     // Y轴: 绿色
      cv::Scalar(255, 0, 0)      // Z轴: 蓝色
  };

  float cx = center_world(0) / center_world(2);
  float cy = center_world(1) / center_world(2);
  float cu = intrinsic(0, 0) * cx + intrinsic(0, 2);
  float cv = intrinsic(1, 1) * cy + intrinsic(1, 2);
  cv::Point center_pt(cu, cv);

  for (int k = 0; k < 3; ++k)
  {
    float ex = axis_end_points[k](0) / axis_end_points[k](2);
    float ey = axis_end_points[k](1) / axis_end_points[k](2);
    float eu = intrinsic(0, 0) * ex + intrinsic(0, 2);
    float ev = intrinsic(1, 1) * ey + intrinsic(1, 2);

    cv::line(image, center_pt, cv::Point(eu, ev), axis_colors[k], 3);
  }
}

inline Eigen::Matrix3f ReadCamK(const std::string &cam_K_path)
{
  Eigen::Matrix3f K;

  // 打开文件
  std::ifstream file(cam_K_path.c_str());
  CHECK(file) << "Failed open file : " << cam_K_path;

  // 读取数据并存入矩阵
  for (int i = 0; i < 3; ++i)
  {
    for (int j = 0; j < 3; ++j)
    {
      file >> K(i, j);
    }
  }

  // 关闭文件
  file.close();

  return K;
}

inline void saveVideo(const std::vector<cv::Mat> &frames,
                      const std::string          &outputPath,
                      double                      fps = 30.0)
{
  if (frames.empty())
  {
    std::cerr << "Error: No frames to write!" << std::endl;
    return;
  }

  // 获取帧的宽度和高度
  int frameWidth  = frames[0].cols;
  int frameHeight = frames[0].rows;

  // 定义视频编码格式（MP4 使用 `cv::VideoWriter::fourcc('m', 'p', '4', 'v')` 或
  // `cv::VideoWriter::fourcc('H', '2', '6', '4')`）
  int fourcc = cv::VideoWriter::fourcc('m', 'p', '4', 'v'); // MPEG-4 编码
  // int fourcc = cv::VideoWriter::fourcc('H', '2', '6', '4'); // H.264
  // 编码（可能需要额外的编解码器支持）

  // 创建 VideoWriter 对象
  cv::VideoWriter writer(outputPath, fourcc, fps, cv::Size(frameWidth, frameHeight));

  // 检查是否成功打开
  if (!writer.isOpened())
  {
    std::cerr << "Error: Could not open the video file for writing!" << std::endl;
    return;
  }

  // 写入所有帧
  for (const auto &frame : frames)
  {
    // 确保所有帧大小一致
    if (frame.cols != frameWidth || frame.rows != frameHeight)
    {
      std::cerr << "Error: Frame size mismatch!" << std::endl;
      break;
    }
    writer.write(frame);
  }

  // 释放资源
  writer.release();
  std::cout << "Video saved successfully: " << outputPath << std::endl;
}