Skip to content

Commit 1953373

Browse files
committed
First shot at deduplicating CV code
1 parent 4e64116 commit 1953373

File tree

18 files changed

+650
-372
lines changed

18 files changed

+650
-372
lines changed
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
#include "CVProcessing.h"
2+
#include <algorithm>
3+
#include <cmath>
4+
#include <rnexecutorch/Error.h>
5+
#include <rnexecutorch/ErrorCodes.h>
6+
#include <rnexecutorch/Log.h>
7+
8+
namespace rnexecutorch::cv_processing {
9+
10+
float computeIoU(const BBox &a, const BBox &b) {
11+
float x1 = std::max(a.x1, b.x1);
12+
float y1 = std::max(a.y1, b.y1);
13+
float x2 = std::min(a.x2, b.x2);
14+
float y2 = std::min(a.y2, b.y2);
15+
16+
float intersectionArea = std::max(0.0f, x2 - x1) * std::max(0.0f, y2 - y1);
17+
float areaA = a.area();
18+
float areaB = b.area();
19+
float unionArea = areaA + areaB - intersectionArea;
20+
21+
return (unionArea > 0.0f) ? (intersectionArea / unionArea) : 0.0f;
22+
}
23+
24+
std::optional<cv::Scalar> validateNormParam(const std::vector<float> &values,
25+
const char *paramName) {
26+
if (values.size() == 3) {
27+
return cv::Scalar(values[0], values[1], values[2]);
28+
} else if (!values.empty()) {
29+
log(LOG_LEVEL::Warn,
30+
std::string(paramName) +
31+
" must have 3 elements — ignoring provided value.");
32+
}
33+
return std::nullopt;
34+
}
35+
36+
std::set<int32_t>
37+
prepareAllowedClasses(const std::vector<int32_t> &classIndices) {
38+
std::set<int32_t> allowedClasses;
39+
if (!classIndices.empty()) {
40+
allowedClasses.insert(classIndices.begin(), classIndices.end());
41+
}
42+
return allowedClasses;
43+
}
44+
45+
void validateThresholds(double confidenceThreshold, double iouThreshold) {
46+
if (confidenceThreshold < 0.0 || confidenceThreshold > 1.0) {
47+
throw RnExecutorchError(RnExecutorchErrorCode::InvalidConfig,
48+
"Confidence threshold must be in range [0, 1].");
49+
}
50+
51+
if (iouThreshold < 0.0 || iouThreshold > 1.0) {
52+
throw RnExecutorchError(RnExecutorchErrorCode::InvalidConfig,
53+
"IoU threshold must be in range [0, 1].");
54+
}
55+
}
56+
57+
} // namespace rnexecutorch::cv_processing
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
#pragma once
2+
3+
#include "CVTypes.h"
4+
#include <algorithm>
5+
#include <opencv2/opencv.hpp>
6+
#include <optional>
7+
#include <set>
8+
#include <vector>
9+
10+
namespace rnexecutorch::cv_processing {
11+
12+
/**
13+
* @brief Compute Intersection over Union (IoU) between two bounding boxes
14+
* @param a First bounding box
15+
* @param b Second bounding box
16+
* @return IoU value between 0.0 and 1.0
17+
*
18+
* Moved from utils/computer_vision/Processing.h for consolidation.
19+
*/
20+
float computeIoU(const BBox &a, const BBox &b);
21+
22+
/**
23+
* @brief Non-Maximum Suppression for detection/segmentation results
24+
* @tparam T Type that has bbox and score fields (satisfies HasBBoxAndScore)
25+
* @param items Vector of items to filter
26+
* @param iouThreshold IoU threshold for suppression (typically 0.5)
27+
* @return Filtered vector with overlapping detections removed
28+
*
29+
* Moved from utils/computer_vision/Processing.h for consolidation.
30+
* Handles both class-aware and class-agnostic NMS automatically.
31+
*/
32+
template <HasBBoxAndScore T>
33+
std::vector<T> nonMaxSuppression(std::vector<T> items, double iouThreshold) {
34+
if (items.empty()) {
35+
return {};
36+
}
37+
38+
// Sort by score in descending order
39+
std::ranges::sort(items,
40+
[](const T &a, const T &b) { return a.score > b.score; });
41+
42+
std::vector<T> result;
43+
std::vector<bool> suppressed(items.size(), false);
44+
45+
for (size_t i = 0; i < items.size(); ++i) {
46+
if (suppressed[i]) {
47+
continue;
48+
}
49+
50+
result.push_back(items[i]);
51+
52+
// Suppress overlapping boxes
53+
for (size_t j = i + 1; j < items.size(); ++j) {
54+
if (suppressed[j]) {
55+
continue;
56+
}
57+
58+
// If type has classIndex, only suppress boxes of same class
59+
if constexpr (requires(T t) { t.classIndex; }) {
60+
if (items[i].classIndex != items[j].classIndex) {
61+
continue;
62+
}
63+
}
64+
65+
float iou = computeIoU(items[i].bbox, items[j].bbox);
66+
if (iou > iouThreshold) {
67+
suppressed[j] = true;
68+
}
69+
}
70+
}
71+
72+
return result;
73+
}
74+
75+
/**
76+
* @brief Validate and convert normalization parameter vector to cv::Scalar
77+
* @param values Vector of normalization values (should have 3 elements for RGB)
78+
* @param paramName Parameter name for logging (e.g., "normMean", "normStd")
79+
* @return Optional cv::Scalar if valid (3 elements), nullopt otherwise
80+
*
81+
* Replaces duplicate validation logic across ObjectDetection,
82+
* BaseInstanceSegmentation, and BaseSemanticSegmentation.
83+
*/
84+
std::optional<cv::Scalar> validateNormParam(const std::vector<float> &values,
85+
const char *paramName);
86+
87+
/**
88+
* @brief Convert class indices vector to a set for efficient filtering
89+
* @param classIndices Vector of class indices to allow
90+
* @return Set of allowed class indices (empty set = allow all classes)
91+
*
92+
* Used by detection and segmentation models to filter results by class.
93+
*/
94+
std::set<int32_t>
95+
prepareAllowedClasses(const std::vector<int32_t> &classIndices);
96+
97+
/**
98+
* @brief Validate confidence and IoU thresholds are in valid range [0, 1]
99+
* @param confidenceThreshold Detection confidence threshold
100+
* @param iouThreshold Non-maximum suppression IoU threshold
101+
* @throws RnExecutorchError if either threshold is out of range
102+
*
103+
* Used by detection and segmentation models to validate user input.
104+
*/
105+
void validateThresholds(double confidenceThreshold, double iouThreshold);
106+
107+
} // namespace rnexecutorch::cv_processing
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
#pragma once
2+
3+
#include <concepts>
4+
#include <opencv2/opencv.hpp>
5+
6+
namespace rnexecutorch::cv_processing {
7+
8+
/**
9+
* @brief Bounding box representation with x1, y1, x2, y2 coordinates
10+
*
11+
* Moved from utils/computer_vision/Types.h for consolidation.
12+
*/
13+
struct BBox {
14+
float x1, y1, x2, y2;
15+
16+
float width() const { return x2 - x1; }
17+
18+
float height() const { return y2 - y1; }
19+
20+
float area() const { return width() * height(); }
21+
22+
bool isValid() const {
23+
return x2 > x1 && y2 > y1 && x1 >= 0.0f && y1 >= 0.0f;
24+
}
25+
26+
BBox scale(float widthRatio, float heightRatio) const {
27+
return {x1 * widthRatio, y1 * heightRatio, x2 * widthRatio,
28+
y2 * heightRatio};
29+
}
30+
};
31+
32+
/**
33+
* @brief Concept for types that have a bounding box and confidence score
34+
*
35+
* Used for NMS and other detection/segmentation operations.
36+
*/
37+
template <typename T>
38+
concept HasBBoxAndScore = requires(T t) {
39+
{ t.bbox } -> std::convertible_to<BBox>;
40+
{ t.score } -> std::convertible_to<float>;
41+
};
42+
43+
/**
44+
* @brief Scale ratios for mapping between original and model input dimensions
45+
*
46+
* Replaces duplicate scale ratio calculation code across multiple models.
47+
*/
48+
struct ScaleRatios {
49+
float widthRatio;
50+
float heightRatio;
51+
52+
/**
53+
* @brief Compute scale ratios from original size to model input size
54+
* @param original Original image dimensions
55+
* @param model Model input dimensions
56+
* @return ScaleRatios struct containing width and height ratios
57+
*/
58+
static ScaleRatios compute(cv::Size original, cv::Size model) {
59+
return {static_cast<float>(original.width) / model.width,
60+
static_cast<float>(original.height) / model.height};
61+
}
62+
};
63+
64+
} // namespace rnexecutorch::cv_processing

packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <rnexecutorch/host_objects/JSTensorViewOut.h>
1616
#include <rnexecutorch/jsi/OwningArrayBuffer.h>
1717

18+
#include <rnexecutorch/data_processing/CVTypes.h>
1819
#include <rnexecutorch/metaprogramming/TypeConcepts.h>
1920
#include <rnexecutorch/models/instance_segmentation/Types.h>
2021
#include <rnexecutorch/models/object_detection/Constants.h>
@@ -25,7 +26,6 @@
2526
#include <rnexecutorch/models/speech_to_text/common/types/TranscriptionResult.h>
2627
#include <rnexecutorch/models/style_transfer/Types.h>
2728
#include <rnexecutorch/models/voice_activity_detection/Types.h>
28-
#include <rnexecutorch/utils/computer_vision/Types.h>
2929

3030
using namespace rnexecutorch::models::speech_to_text;
3131

@@ -433,7 +433,7 @@ getJsiValue(const std::unordered_map<std::string_view, float> &map,
433433
return mapObj;
434434
}
435435

436-
inline jsi::Value getJsiValue(const utils::computer_vision::BBox &bbox,
436+
inline jsi::Value getJsiValue(const cv_processing::BBox &bbox,
437437
jsi::Runtime &runtime) {
438438
jsi::Object obj(runtime);
439439
obj.setProperty(runtime, "x1", bbox.x1);

packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "VisionModel.h"
22
#include <rnexecutorch/Error.h>
33
#include <rnexecutorch/ErrorCodes.h>
4+
#include <rnexecutorch/data_processing/CVProcessing.h>
45
#include <rnexecutorch/utils/FrameProcessor.h>
56
#include <rnexecutorch/utils/FrameTransform.h>
67

@@ -18,6 +19,18 @@ void VisionModel::unload() noexcept {
1819
}
1920

2021
cv::Size VisionModel::modelInputSize() const {
22+
// For multi-method models, query the currently loaded method's input shape
23+
if (!currentlyLoadedMethod_.empty()) {
24+
auto inputShapes = getAllInputShapes(currentlyLoadedMethod_);
25+
if (!inputShapes.empty() && !inputShapes[0].empty() &&
26+
inputShapes[0].size() >= 2) {
27+
const auto &shape = inputShapes[0];
28+
return {static_cast<int>(shape[shape.size() - 2]),
29+
static_cast<int>(shape[shape.size() - 1])};
30+
}
31+
}
32+
33+
// Default: use cached modelInputShape_ from single-method models
2134
if (modelInputShape_.size() < 2) {
2235
return {0, 0};
2336
}
@@ -51,4 +64,42 @@ cv::Mat VisionModel::extractFromPixels(const JSTensorViewIn &tensorView) const {
5164
return ::rnexecutorch::utils::pixelsToMat(tensorView);
5265
}
5366

67+
void VisionModel::ensureMethodLoaded(const std::string &methodName) {
68+
if (methodName.empty()) {
69+
throw RnExecutorchError(
70+
RnExecutorchErrorCode::InvalidConfig,
71+
"Method name cannot be empty. Use 'forward' for single-method models "
72+
"or 'forward_{inputSize}' for multi-method models.");
73+
}
74+
75+
if (currentlyLoadedMethod_ == methodName) {
76+
return;
77+
}
78+
79+
if (!module_) {
80+
throw RnExecutorchError(RnExecutorchErrorCode::ModuleNotLoaded,
81+
"Model not loaded. Cannot load method '" +
82+
methodName + "'.");
83+
}
84+
85+
if (!currentlyLoadedMethod_.empty()) {
86+
module_->unload_method(currentlyLoadedMethod_);
87+
}
88+
89+
auto loadResult = module_->load_method(methodName);
90+
if (loadResult != executorch::runtime::Error::Ok) {
91+
throw RnExecutorchError(
92+
loadResult, "Failed to load method '" + methodName +
93+
"'. Ensure the method exists in the exported model.");
94+
}
95+
96+
currentlyLoadedMethod_ = methodName;
97+
}
98+
99+
void VisionModel::initializeNormalization(const std::vector<float> &normMean,
100+
const std::vector<float> &normStd) {
101+
normMean_ = cv_processing::validateNormParam(normMean, "normMean");
102+
normStd_ = cv_processing::validateNormParam(normStd, "normStd");
103+
}
104+
54105
} // namespace rnexecutorch::models

packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include <jsi/jsi.h>
44
#include <mutex>
55
#include <opencv2/opencv.hpp>
6+
#include <optional>
67
#include <rnexecutorch/metaprogramming/ConstructorHelpers.h>
78
#include <rnexecutorch/models/BaseModel.h>
89
#include <rnexecutorch/utils/FrameTransform.h>
@@ -78,6 +79,42 @@ class VisionModel : public BaseModel {
7879
*/
7980
mutable std::mutex inference_mutex_;
8081

82+
/// Name of the currently loaded method (for multi-method models).
83+
/// Empty for single-method models using default "forward".
84+
std::string currentlyLoadedMethod_;
85+
86+
/// Optional per-channel mean for input normalisation.
87+
std::optional<cv::Scalar> normMean_;
88+
89+
/// Optional per-channel standard deviation for input normalisation.
90+
std::optional<cv::Scalar> normStd_;
91+
92+
/**
93+
* @brief Ensures the specified method is loaded, unloading any previous
94+
* method if necessary.
95+
*
96+
* For single-method models, pass "forward" (the default).
97+
* For multi-method models, pass the specific method name (e.g.,
98+
* "forward_384").
99+
*
100+
* @param methodName Name of the method to load. Defaults to "forward".
101+
* @throws RnExecutorchError if the method cannot be loaded.
102+
*/
103+
void ensureMethodLoaded(const std::string &methodName = "forward");
104+
105+
/**
106+
* @brief Initializes normalization parameters from vectors.
107+
*
108+
* Uses cv_processing::validateNormParam() for validation.
109+
*
110+
* @param normMean Per-channel mean values (must be exactly 3 elements, or
111+
* empty to skip).
112+
* @param normStd Per-channel std dev values (must be exactly 3 elements, or
113+
* empty to skip).
114+
*/
115+
void initializeNormalization(const std::vector<float> &normMean,
116+
const std::vector<float> &normStd);
117+
81118
/**
82119
* @brief Resize an RGB image to the model's expected input size
83120
*

0 commit comments

Comments
 (0)