Merge pull request #24 from zz990099/develop_support_multi_iteration_refinement

zz990099 · web-flow · commit d27931ae8c2b · 2025-04-29T18:31:06.000+08:00
Support multi iteration refinement
diff --git a/detection_6d_foundationpose/include/detection_6d_foundationpose/foundationpose.hpp b/detection_6d_foundationpose/include/detection_6d_foundationpose/foundationpose.hpp
@@ -29,14 +29,16 @@ class Base6DofDetectionModel {
    * @param mask Object mask (CV_8UC1 format, positive pixels > 0)
    * @param target_name Object category name (must match construction mapping)
    * @param out_pose_in_mesh Output pose in mesh coordinate frame
+   * @param refine_itr Refinement process iteration num
    * @return true Registration successful
    * @return false Registration failed
    */
   virtual bool Register(const cv::Mat     &rgb,
                         const cv::Mat     &depth,
                         const cv::Mat     &mask,
                         const std::string &target_name,
-                        Eigen::Matrix4f   &out_pose_in_mesh) = 0;
+                        Eigen::Matrix4f   &out_pose_in_mesh,
+                        size_t             refine_itr = 1) = 0;
 
   /**
    * @brief Track object pose from subsequent frames (lightweight version of Register)
@@ -50,14 +52,16 @@ class Base6DofDetectionModel {
    * @param hyp_pose_in_mesh Hypothesis pose in mesh frame (from Register or other sources)
    * @param target_name Object category name (must match construction mapping)
    * @param out_pose_in_mesh Output pose in mesh coordinate frame
+   * @param refine_itr Refinement process iteration num
    * @return true Tracking successful
    * @return false Tracking failed
    */
   virtual bool Track(const cv::Mat         &rgb,
                      const cv::Mat         &depth,
                      const Eigen::Matrix4f &hyp_pose_in_mesh,
                      const std::string     &target_name,
-                     Eigen::Matrix4f       &out_pose_in_mesh) = 0;
+                     Eigen::Matrix4f       &out_pose_in_mesh,
+                     size_t                 refine_itr = 1) = 0;
 
   /**
    * @brief Virtual destructor for proper resource cleanup
diff --git a/detection_6d_foundationpose/src/foundationpose.cpp b/detection_6d_foundationpose/src/foundationpose.cpp
diff --git a/detection_6d_foundationpose/src/foundationpose_render.cpp b/detection_6d_foundationpose/src/foundationpose_render.cpp
@@ -220,15 +220,13 @@ void WrapFloatPtrToNHWCTensor(
 FoundationPoseRenderer::FoundationPoseRenderer(std::shared_ptr<BaseMeshLoader> mesh_loader,
                                                const Eigen::Matrix3f          &intrinsic,
                                                const int                       input_poses_num,
-                                               const float                     crop_ratio,
                                                const int                       crop_window_H,
                                                const int                       crop_window_W,
                                                const float                     min_depth,
                                                const float                     max_depth)
     : mesh_loader_(mesh_loader),
       intrinsic_(intrinsic),
       input_poses_num_(input_poses_num),
-      crop_ratio_(crop_ratio),
       crop_window_H_(crop_window_H),
       crop_window_W_(crop_window_W),
       min_depth_(min_depth),
@@ -820,14 +818,15 @@ bool FoundationPoseRenderer::RenderAndTransform(const std::vector<Eigen::Matrix4
                                                 int   input_image_height,
                                                 int   input_image_width,
                                                 void *render_buffer,
-                                                void *transf_buffer)
+                                                void *transf_buffer,
+                                                float crop_ratio)
 {
   const int input_poses_num = _poses.size();
 
   // 1. 根据目标位姿计算变换矩阵
   std::vector<Eigen::MatrixXf> poses(_poses.begin(), _poses.end());
   Eigen::Vector2i              out_size = {crop_window_H_, crop_window_W_};
-  auto tfs = ComputeCropWindowTF(poses, intrinsic_, out_size, crop_ratio_, mesh_diameter_);
+  auto tfs = ComputeCropWindowTF(poses, intrinsic_, out_size, crop_ratio, mesh_diameter_);
   CHECK_STATE(tfs.size() != 0, "[FoundationposeRender] The transform matrix vector is empty");
 
   // 2. 将输入的poses拷贝到device端
diff --git a/detection_6d_foundationpose/src/foundationpose_render.hpp b/detection_6d_foundationpose/src/foundationpose_render.hpp
@@ -21,10 +21,9 @@ class FoundationPoseRenderer {
   FoundationPoseRenderer(std::shared_ptr<BaseMeshLoader> mesh_loader,
                          const Eigen::Matrix3f          &intrinsic,
                          const int                       input_poses_num,
-                         const float                     crop_ratio    = 1.2,
                          const int                       crop_window_H = 160,
                          const int                       crop_window_W = 160,
-                         const float                     min_depth     = 0.1,
+                         const float                     min_depth     = 0.001,
                          const float                     max_depth     = 4.0);
 
   bool RenderAndTransform(const std::vector<Eigen::Matrix4f> &_poses,
@@ -34,7 +33,8 @@ class FoundationPoseRenderer {
                           int                                 input_image_height,
                           int                                 input_image_width,
                           void                               *render_buffer,
-                          void                               *transf_buffer);
+                          void                               *transf_buffer,
+                          float                               crop_ratio);
 
   ~FoundationPoseRenderer();
 
@@ -94,7 +94,6 @@ class FoundationPoseRenderer {
   // crop window size (model input size)
   const int             crop_window_H_;
   const int             crop_window_W_;
-  const float           crop_ratio_; // refine,    score->1.1
   const Eigen::Matrix3f intrinsic_;
 
   // depth threshold
diff --git a/detection_6d_foundationpose/src/foundationpose_sampling.cpp b/detection_6d_foundationpose/src/foundationpose_sampling.cpp
@@ -300,7 +300,6 @@ bool GuessTranslation(const Eigen::MatrixXf  &depth,
 FoundationPoseSampler::FoundationPoseSampler(const int              max_input_image_H,
                                              const int              max_input_image_W,
                                              const float            min_depth,
-                                             const float            max_depth,
                                              const Eigen::Matrix3f &intrinsic)
     : max_input_image_H_(max_input_image_H),
       max_input_image_W_(max_input_image_W),
diff --git a/detection_6d_foundationpose/src/foundationpose_sampling.hpp b/detection_6d_foundationpose/src/foundationpose_sampling.hpp
@@ -13,7 +13,6 @@ class FoundationPoseSampler {
   FoundationPoseSampler(const int              max_input_image_H,
                         const int              max_input_image_W,
                         const float            min_depth,
-                        const float            max_depth,
                         const Eigen::Matrix3f &intrinsic);
 
   bool GetHypPoses(void                         *_depth_on_device,
diff --git a/detection_6d_foundationpose/src/foundationpose_utils.hpp b/detection_6d_foundationpose/src/foundationpose_utils.hpp
@@ -54,12 +54,8 @@ struct FoundationPosePipelinePackage : public async_pipeline::IPipelinePackage {
   std::shared_ptr<void> depth_on_device;
   // device端由depth转换得到的xyz_map
   std::shared_ptr<void> xyz_map_on_device;
-  // device端的输入mask缓存
-  // std::shared_ptr<void> mask_on_device;
   // 生成的假设位姿
   std::vector<Eigen::Matrix4f> hyp_poses;
-  // refine后的位姿
-  std::vector<Eigen::Matrix4f> refine_poses;
 
   // 保存refine阶段用的推理缓存
   std::shared_ptr<inference_core::IBlobsBuffer> refiner_blobs_buffer;
diff --git a/simple_tests/src/test_foundationpose.cpp b/simple_tests/src/test_foundationpose.cpp
@@ -17,6 +17,7 @@ static const std::string demo_textured_obj_path = demo_data_path_ + "/mesh/textu
 static const std::string demo_textured_map_path = demo_data_path_ + "/mesh/texture_map.png";
 static const std::string demo_name_             = "mustard";
 static const std::string frame_id               = "1581120424100262102";
+static const size_t      refine_itr             = 1;
 
 std::tuple<std::shared_ptr<Base6DofDetectionModel>, std::shared_ptr<BaseMeshLoader>> CreateModel()
 {
@@ -58,7 +59,7 @@ TEST(foundationpose_test, test)
   const Eigen::Vector3f object_dimension = mesh_loader->GetObjectDimension();
 
   Eigen::Matrix4f out_pose;
-  CHECK(foundation_pose->Register(rgb.clone(), depth, mask, demo_name_, out_pose));
+  CHECK(foundation_pose->Register(rgb.clone(), depth, mask, demo_name_, out_pose, refine_itr));
   LOG(WARNING) << "first Pose : " << out_pose;
 
   // [temp] for test