Skip to content

Commit 2d66100

Browse files
awoll-bdaiexploy-bot
authored andcommitted
Support multi layer images (#79)
### What change is being made Please provide a detailed description of WHAT change is being made such that the reader is able to understand the change holistically. ### Why this change is being made Please provide the rationale behind the change and additional context like links to documents, related work or issues. ### Tested Please provide a description how this change was tested, e.g. unit tests, hardware tests or commands you run. GitOrigin-RevId: 07952e29e2a1b88ea2c3659fb1479bc6d87ef457
1 parent 55da8a0 commit 2d66100

13 files changed

Lines changed: 293 additions & 182 deletions

control/components.cpp

Lines changed: 46 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -352,57 +352,78 @@ bool HeightScanInput::read(OnnxRuntime& runtime, RobotStateInterface& state, Com
352352
LOG_STREAM(ERROR, fmt::format("Failed to get input buffer {}.{}", key_, layer_name));
353353
return false;
354354
}
355-
copyToBuffer(maybe_scan.value()->layers.at(layer_name), maybe_buffer.value());
355+
copyToBuffer(maybe_scan.value()->float_layers.at(layer_name), maybe_buffer.value());
356356
}
357357
return true;
358358
}
359359

360-
// Implementation of RangeImageInput methods
361-
RangeImageInput::RangeImageInput(const std::string& key,
362-
const metadata::RangeImageMetadata& metadata)
363-
: key_(key), metadata_(metadata) {}
360+
// Implementation of SphericalImageInput methods
361+
SphericalImageInput::SphericalImageInput(const std::string& key, const std::string& sensor_name,
362+
const std::unordered_set<std::string>& channel_names,
363+
const metadata::SphericalImageMetadata& metadata)
364+
: key_(key), sensor_name_(sensor_name), channel_names_(channel_names), metadata_(metadata) {}
364365

365-
bool RangeImageInput::init(RobotStateInterface& state, CommandInterface&) {
366+
bool SphericalImageInput::init(RobotStateInterface& state, CommandInterface&) {
366367
SphericalImageConfig config;
367368
config.v_res = static_cast<int>(metadata_.v_res);
368369
config.h_res = static_cast<int>(metadata_.h_res);
369370
config.v_fov_min_deg = metadata_.v_fov_min_deg;
370371
config.v_fov_max_deg = metadata_.v_fov_max_deg;
371372
config.unobserved_value = metadata_.unobserved_value;
372-
return state.initRangeImage(config);
373+
config.channel_names = channel_names_;
374+
return state.initSphericalImage(sensor_name_, config);
373375
}
374376

375-
bool RangeImageInput::read(OnnxRuntime& runtime, RobotStateInterface& state, CommandInterface&) {
376-
auto maybe_buffer = runtime.inputBuffer<float>(key_);
377-
if (!maybe_buffer.has_value()) return false;
378-
auto maybe_image = state.rangeImage();
379-
if (!maybe_image.has_value()) return false;
380-
copyToBuffer(maybe_image.value(), maybe_buffer.value());
377+
bool SphericalImageInput::read(OnnxRuntime& runtime, RobotStateInterface& state,
378+
CommandInterface&) {
379+
auto maybe_image = state.sphericalImage(sensor_name_, channel_names_);
380+
if (!maybe_image.has_value()) {
381+
LOG_STREAM(ERROR, "Failed to get spherical image data for SphericalImageInput");
382+
return false;
383+
}
384+
for (const auto& channel_name : channel_names_) {
385+
auto maybe_buffer = runtime.inputBuffer<float>(fmt::format("{}.{}", key_, channel_name));
386+
if (!maybe_buffer.has_value()) {
387+
LOG_STREAM(ERROR, fmt::format("Failed to get input buffer {}.{}", key_, channel_name));
388+
return false;
389+
}
390+
copyToBuffer(maybe_image.value()->float_channels.at(channel_name), maybe_buffer.value());
391+
}
381392
return true;
382393
}
383394

384-
// Implementation of DepthImageInput methods
385-
DepthImageInput::DepthImageInput(const std::string& key,
386-
const metadata::DepthImageMetadata& metadata)
387-
: key_(key), metadata_(metadata) {}
395+
// Implementation of PinholeImageInput methods
396+
PinholeImageInput::PinholeImageInput(const std::string& key, const std::string& sensor_name,
397+
const std::unordered_set<std::string>& channel_names,
398+
const metadata::PinholeImageMetadata& metadata)
399+
: key_(key), sensor_name_(sensor_name), channel_names_(channel_names), metadata_(metadata) {}
388400

389-
bool DepthImageInput::init(RobotStateInterface& state, CommandInterface&) {
401+
bool PinholeImageInput::init(RobotStateInterface& state, CommandInterface&) {
390402
PinholeImageConfig config;
391403
config.width = metadata_.width;
392404
config.height = metadata_.height;
393405
config.fx = metadata_.fx;
394406
config.fy = metadata_.fy;
395407
config.cx = metadata_.cx;
396408
config.cy = metadata_.cy;
397-
return state.initDepthImage(config);
409+
config.channel_names = channel_names_;
410+
return state.initPinholeImage(sensor_name_, config);
398411
}
399412

400-
bool DepthImageInput::read(OnnxRuntime& runtime, RobotStateInterface& state, CommandInterface&) {
401-
auto maybe_buffer = runtime.inputBuffer<float>(key_);
402-
if (!maybe_buffer.has_value()) return false;
403-
auto maybe_image = state.depthImage();
404-
if (!maybe_image.has_value()) return false;
405-
copyToBuffer(maybe_image.value(), maybe_buffer.value());
413+
bool PinholeImageInput::read(OnnxRuntime& runtime, RobotStateInterface& state, CommandInterface&) {
414+
auto maybe_image = state.pinholeImage(sensor_name_, channel_names_);
415+
if (!maybe_image.has_value()) {
416+
LOG_STREAM(ERROR, "Failed to get pinhole image data for PinholeImageInput");
417+
return false;
418+
}
419+
for (const auto& channel_name : channel_names_) {
420+
auto maybe_buffer = runtime.inputBuffer<float>(fmt::format("{}.{}", key_, channel_name));
421+
if (!maybe_buffer.has_value()) {
422+
LOG_STREAM(ERROR, fmt::format("Failed to get input buffer {}.{}", key_, channel_name));
423+
return false;
424+
}
425+
copyToBuffer(maybe_image.value()->float_channels.at(channel_name), maybe_buffer.value());
426+
}
406427
return true;
407428
}
408429

control/components.hpp

Lines changed: 36 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -299,52 +299,67 @@ class HeightScanInput : public Input {
299299
};
300300

301301
/**
302-
* @brief Input component that reads range/depth image from LiDAR sensor.
302+
* @brief Input component that reads spherical image data with multiple channels.
303303
*
304-
* Reads range image data (distances) from a LiDAR sensor and copies it to the
305-
* ONNX input buffer. Configuration includes resolution, field of view, and
306-
* sentinel value for unobserved points.
304+
* Reads spherical image data from a named sensor with configurable channels (e.g., range,
305+
* risk). Each channel is written to a separate ONNX input buffer with the
306+
* naming pattern "key.channel_name".
307307
*/
308-
class RangeImageInput : public Input {
308+
class SphericalImageInput : public Input {
309309
public:
310310
/**
311-
* @brief Construct a range image input component.
311+
* @brief Construct a spherical image input component.
312312
*
313-
* @param key ONNX input tensor name (e.g., "sensors.lidar.range").
314-
* @param metadata Range image metadata containing resolution, FOV, and sentinel value.
313+
* @param key ONNX input tensor base name (e.g., "sensor.spherical_image.lidar1").
314+
* @param sensor_name Name of the spherical image sensor to read from.
315+
* @param channel_names Set of channel names to include (each creates a buffer
316+
* "key.channel_name").
317+
* @param metadata Spherical image metadata containing resolution, FOV, and sentinel value.
315318
*/
316-
RangeImageInput(const std::string& key, const metadata::RangeImageMetadata& metadata);
319+
SphericalImageInput(const std::string& key, const std::string& sensor_name,
320+
const std::unordered_set<std::string>& channel_names,
321+
const metadata::SphericalImageMetadata& metadata);
317322

318323
bool init(RobotStateInterface& state, CommandInterface& command) override;
319324
bool read(OnnxRuntime& runtime, RobotStateInterface& state, CommandInterface& command) override;
320325

321326
private:
322-
std::string key_; ///< ONNX input tensor name.
323-
metadata::RangeImageMetadata metadata_; ///< Range image configuration.
327+
std::string key_; ///< ONNX input tensor base name.
328+
std::string sensor_name_; ///< Spherical image sensor name.
329+
std::unordered_set<std::string> channel_names_; ///< Channel names to read.
330+
metadata::SphericalImageMetadata metadata_; ///< Spherical image configuration.
324331
};
325332

326333
/**
327-
* @brief Input component that reads depth image from camera sensor.
334+
* @brief Input component that reads pinhole camera image data with multiple channels.
328335
*
329-
* Reads depth image data from a camera sensor and copies it to the ONNX input
330-
* buffer. Configuration includes image dimensions and camera intrinsic parameters.
336+
* Reads pinhole image data from a named sensor with configurable channels (e.g., depth,
337+
* risk). Each channel is written to a separate ONNX input buffer with the
338+
* naming pattern "key.channel_name".
331339
*/
332-
class DepthImageInput : public Input {
340+
class PinholeImageInput : public Input {
333341
public:
334342
/**
335-
* @brief Construct a depth image input component.
343+
* @brief Construct a pinhole image input component.
336344
*
337-
* @param key ONNX input tensor name (e.g., "sensors.camera.depth").
338-
* @param metadata Depth image metadata containing width, height, and camera intrinsics.
345+
* @param key ONNX input tensor base name (e.g., "sensor.pinhole_image.cam1").
346+
* @param sensor_name Name of the pinhole image sensor to read from.
347+
* @param channel_names Set of channel names to include (each creates a buffer
348+
* "key.channel_name").
349+
* @param metadata Pinhole image metadata containing width, height, and camera intrinsics.
339350
*/
340-
DepthImageInput(const std::string& key, const metadata::DepthImageMetadata& metadata);
351+
PinholeImageInput(const std::string& key, const std::string& sensor_name,
352+
const std::unordered_set<std::string>& channel_names,
353+
const metadata::PinholeImageMetadata& metadata);
341354

342355
bool init(RobotStateInterface& state, CommandInterface& command) override;
343356
bool read(OnnxRuntime& runtime, RobotStateInterface& state, CommandInterface& command) override;
344357

345358
private:
346-
std::string key_; ///< ONNX input tensor name.
347-
metadata::DepthImageMetadata metadata_; ///< Depth image configuration.
359+
std::string key_; ///< ONNX input tensor base name.
360+
std::string sensor_name_; ///< Pinhole image sensor name.
361+
std::unordered_set<std::string> channel_names_; ///< Channel names to read.
362+
metadata::PinholeImageMetadata metadata_; ///< Pinhole image configuration.
348363
};
349364

350365
/**

control/controller.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,6 @@ OnnxRLController::OnnxRLController(RobotStateInterface& state, CommandInterface&
2323
context_.registerMatcher(std::make_unique<SE2VelocityMatcher>());
2424
context_.registerMatcher(std::make_unique<IMUAngularVelocityMatcher>());
2525
context_.registerMatcher(std::make_unique<IMUOrientationMatcher>());
26-
context_.registerMatcher(std::make_unique<RangeImageMatcher>());
27-
context_.registerMatcher(std::make_unique<DepthImageMatcher>());
2826
context_.registerMatcher(std::make_unique<BodyPositionMatcher>());
2927
context_.registerMatcher(std::make_unique<BodyOrientationMatcher>());
3028
context_.registerMatcher(std::make_unique<CommandSE3PoseMatcher>());
@@ -36,6 +34,8 @@ OnnxRLController::OnnxRLController(RobotStateInterface& state, CommandInterface&
3634
context_.registerGroupMatcher(std::make_unique<JointMatcher>());
3735
context_.registerGroupMatcher(std::make_unique<JointTargetMatcher>());
3836
context_.registerGroupMatcher(std::make_unique<HeightScanMatcher>());
37+
context_.registerGroupMatcher(std::make_unique<SphericalImageMatcher>());
38+
context_.registerGroupMatcher(std::make_unique<PinholeImageMatcher>());
3939
context_.registerGroupMatcher(std::make_unique<MemoryMatcher>());
4040
}
4141

control/interfaces.hpp

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@
55
#include <Eigen/Geometry>
66

77
#include <optional>
8+
#include <span>
89
#include <string>
910
#include <string_view>
11+
#include <unordered_map>
1012
#include <vector>
1113

1214
/**
@@ -101,12 +103,28 @@ struct SE2VelocityRanges {
101103
*/
102104
struct HeightScan {
103105
/**
104-
* @brief Map of layer names to their data vectors.
106+
* @brief Map of layer names to their data spans.
105107
*
106-
* Each entry maps a layer name (e.g., "height", "color") to a vector of
107-
* double values representing the flattened grid data for that layer.
108+
* Each entry maps a layer name (e.g., "height", "color") to a span of
109+
* float values representing the flattened grid data for that layer.
108110
*/
109-
std::unordered_map<std::string, std::vector<double>> layers;
111+
std::unordered_map<std::string, std::span<const float>> float_layers;
112+
};
113+
114+
/**
115+
* @brief A flattened multi-channel image.
116+
*
117+
* Each channel represents a different data type (e.g., depth, range, risk).
118+
* All channels share the same projection and must have the same length.
119+
*/
120+
struct MultiChannelImage {
121+
/**
122+
* @brief Map of channel names to their data spans.
123+
*
124+
* Each entry maps a channel name to a span of float values representing the flattened image data
125+
* for that channel.
126+
*/
127+
std::unordered_map<std::string, std::span<const float>> float_channels;
110128
};
111129

112130
} // namespace exploy::control

0 commit comments

Comments
 (0)