Skip to content

Commit 2f29c67

Browse files
refactor: apply code review fixes for vision camera integration
- visionHostFunction: preserve RnExecutorchError code in catch block - OCR/VerticalOCR generateFromFrame: add 90° CW rotation for landscape frames - VisionModel: lift preprocessFrame and modelImageSize from 5 subclasses into base class Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 751f50c commit 2f29c67

File tree

15 files changed

+68
-174
lines changed

15 files changed

+68
-174
lines changed

packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,8 +298,24 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
298298

299299
return jsi_conversion::getJsiValue(std::move(result), runtime);
300300
}
301+
} catch (const RnExecutorchError &e) {
302+
jsi::Object errorData(runtime);
303+
errorData.setProperty(runtime, "code", e.getNumericCode());
304+
errorData.setProperty(runtime, "message",
305+
jsi::String::createFromUtf8(runtime, e.what()));
306+
throw jsi::JSError(runtime, jsi::Value(runtime, std::move(errorData)));
307+
} catch (const std::runtime_error &e) {
308+
// This catch should be merged with the next one
309+
// (std::runtime_error inherits from std::exception) HOWEVER react
310+
// native has broken RTTI which breaks proper exception type
311+
// checking. Remove when the following change is present in our
312+
// version:
313+
// https://github.com/facebook/react-native/commit/3132cc88dd46f95898a756456bebeeb6c248f20e
314+
throw jsi::JSError(runtime, e.what());
301315
} catch (const std::exception &e) {
302316
throw jsi::JSError(runtime, e.what());
317+
} catch (...) {
318+
throw jsi::JSError(runtime, "Unknown error in vision function");
303319
}
304320
}
305321

packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,34 @@ cv::Mat VisionModel::extractFromFrame(jsi::Runtime &runtime,
2323
return frame;
2424
}
2525

26+
cv::Mat VisionModel::preprocessFrame(const cv::Mat &frame) const {
27+
cv::Mat rgb;
28+
29+
if (frame.channels() == 4) {
30+
#ifdef __APPLE__
31+
cv::cvtColor(frame, rgb, cv::COLOR_BGRA2RGB);
32+
#else
33+
cv::cvtColor(frame, rgb, cv::COLOR_RGBA2RGB);
34+
#endif
35+
} else if (frame.channels() == 3) {
36+
rgb = frame;
37+
} else {
38+
char errorMessage[100];
39+
std::snprintf(errorMessage, sizeof(errorMessage),
40+
"Unsupported frame format: %d channels", frame.channels());
41+
throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
42+
errorMessage);
43+
}
44+
45+
if (rgb.size() != modelImageSize) {
46+
cv::Mat resized;
47+
cv::resize(rgb, resized, modelImageSize);
48+
return resized;
49+
}
50+
51+
return rgb;
52+
}
53+
2654
cv::Mat VisionModel::extractFromPixels(const JSTensorViewIn &tensorView) const {
2755
if (tensorView.sizes.size() != 3) {
2856
char errorMessage[100];

packages/react-native-executorch/common/rnexecutorch/models/VisionModel.h

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -86,11 +86,9 @@ class VisionModel : public BaseModel {
8686
/**
8787
* @brief Preprocess a camera frame for model input
8888
*
89-
* This method should implement model-specific preprocessing such as:
90-
* - Resizing to the model's expected input size
91-
* - Color space conversion (e.g., BGR to RGB)
92-
* - Normalization
93-
* - Any other model-specific transformations
89+
* Converts 4-channel frames (BGRA on iOS, RGBA on Android) to RGB and
90+
* resizes to modelImageSize if needed. Subclasses may override for
91+
* model-specific preprocessing (e.g., normalisation).
9492
*
9593
* @param frame Input frame from camera (already extracted and rotated by
9694
* FrameExtractor)
@@ -99,7 +97,11 @@ class VisionModel : public BaseModel {
9997
* @note The input frame is already in RGB format and rotated 90° clockwise
10098
* @note This method is called under mutex protection in generateFromFrame()
10199
*/
102-
virtual cv::Mat preprocessFrame(const cv::Mat &frame) const = 0;
100+
virtual cv::Mat preprocessFrame(const cv::Mat &frame) const;
101+
102+
/// Expected input image dimensions derived from the model's input shape.
103+
/// Set by subclass constructors after loading the model.
104+
cv::Size modelImageSize{0, 0};
103105

104106
/**
105107
* @brief Extract and preprocess frame from VisionCamera in one call

packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.cpp

Lines changed: 1 addition & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ Classification::Classification(const std::string &modelSource,
2222
if (modelInputShape.size() < 2) {
2323
char errorMessage[100];
2424
std::snprintf(errorMessage, sizeof(errorMessage),
25-
"Unexpected model input size, expected at least 2 dimentions "
25+
"Unexpected model input size, expected at least 2 dimensions "
2626
"but got: %zu.",
2727
modelInputShape.size());
2828
throw RnExecutorchError(RnExecutorchErrorCode::WrongDimensions,
@@ -32,34 +32,6 @@ Classification::Classification(const std::string &modelSource,
3232
modelInputShape[modelInputShape.size() - 2]);
3333
}
3434

35-
cv::Mat Classification::preprocessFrame(const cv::Mat &frame) const {
36-
cv::Mat rgb;
37-
38-
if (frame.channels() == 4) {
39-
#ifdef __APPLE__
40-
cv::cvtColor(frame, rgb, cv::COLOR_BGRA2RGB);
41-
#else
42-
cv::cvtColor(frame, rgb, cv::COLOR_RGBA2RGB);
43-
#endif
44-
} else if (frame.channels() == 3) {
45-
rgb = frame;
46-
} else {
47-
char errorMessage[100];
48-
std::snprintf(errorMessage, sizeof(errorMessage),
49-
"Unsupported frame format: %d channels", frame.channels());
50-
throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
51-
errorMessage);
52-
}
53-
54-
if (rgb.size() != modelImageSize) {
55-
cv::Mat resized;
56-
cv::resize(rgb, resized, modelImageSize);
57-
return resized;
58-
}
59-
60-
return rgb;
61-
}
62-
6335
std::unordered_map<std::string_view, float>
6436
Classification::runInference(cv::Mat image) {
6537
std::scoped_lock lock(inference_mutex_);

packages/react-native-executorch/common/rnexecutorch/models/classification/Classification.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,15 +31,10 @@ class Classification : public VisionModel {
3131
std::string_view, float>
3232
generateFromPixels(JSTensorViewIn pixelData);
3333

34-
protected:
35-
cv::Mat preprocessFrame(const cv::Mat &frame) const override;
36-
3734
private:
3835
std::unordered_map<std::string_view, float> runInference(cv::Mat image);
3936

4037
std::unordered_map<std::string_view, float> postprocess(const Tensor &tensor);
41-
42-
cv::Size modelImageSize{0, 0};
4338
};
4439
} // namespace models::classification
4540

packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.cpp

Lines changed: 1 addition & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ ImageEmbeddings::ImageEmbeddings(
2222
if (modelInputShape.size() < 2) {
2323
char errorMessage[100];
2424
std::snprintf(errorMessage, sizeof(errorMessage),
25-
"Unexpected model input size, expected at least 2 dimentions "
25+
"Unexpected model input size, expected at least 2 dimensions "
2626
"but got: %zu.",
2727
modelInputShape.size());
2828
throw RnExecutorchError(RnExecutorchErrorCode::WrongDimensions,
@@ -32,34 +32,6 @@ ImageEmbeddings::ImageEmbeddings(
3232
modelInputShape[modelInputShape.size() - 2]);
3333
}
3434

35-
cv::Mat ImageEmbeddings::preprocessFrame(const cv::Mat &frame) const {
36-
cv::Mat rgb;
37-
38-
if (frame.channels() == 4) {
39-
#ifdef __APPLE__
40-
cv::cvtColor(frame, rgb, cv::COLOR_BGRA2RGB);
41-
#else
42-
cv::cvtColor(frame, rgb, cv::COLOR_RGBA2RGB);
43-
#endif
44-
} else if (frame.channels() == 3) {
45-
rgb = frame;
46-
} else {
47-
char errorMessage[100];
48-
std::snprintf(errorMessage, sizeof(errorMessage),
49-
"Unsupported frame format: %d channels", frame.channels());
50-
throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
51-
errorMessage);
52-
}
53-
54-
if (rgb.size() != modelImageSize) {
55-
cv::Mat resized;
56-
cv::resize(rgb, resized, modelImageSize);
57-
return resized;
58-
}
59-
60-
return rgb;
61-
}
62-
6335
std::shared_ptr<OwningArrayBuffer>
6436
ImageEmbeddings::runInference(cv::Mat image) {
6537
std::scoped_lock lock(inference_mutex_);

packages/react-native-executorch/common/rnexecutorch/models/embeddings/image/ImageEmbeddings.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,8 @@ class ImageEmbeddings final : public VisionModel {
3131
"Registered non-void function")]] std::shared_ptr<OwningArrayBuffer>
3232
generateFromPixels(JSTensorViewIn pixelData);
3333

34-
protected:
35-
cv::Mat preprocessFrame(const cv::Mat &frame) const override;
36-
3734
private:
3835
std::shared_ptr<OwningArrayBuffer> runInference(cv::Mat image);
39-
40-
cv::Size modelImageSize{0, 0};
4136
};
4237
} // namespace models::embeddings
4338

packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp

Lines changed: 1 addition & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ ObjectDetection::ObjectDetection(
2323
if (modelInputShape.size() < 2) {
2424
char errorMessage[100];
2525
std::snprintf(errorMessage, sizeof(errorMessage),
26-
"Unexpected model input size, expected at least 2 dimentions "
26+
"Unexpected model input size, expected at least 2 dimensions "
2727
"but got: %zu.",
2828
modelInputShape.size());
2929
throw RnExecutorchError(RnExecutorchErrorCode::UnexpectedNumInputs,
@@ -45,39 +45,6 @@ ObjectDetection::ObjectDetection(
4545
}
4646
}
4747

48-
cv::Mat ObjectDetection::preprocessFrame(const cv::Mat &frame) const {
49-
const std::vector<int32_t> tensorDims = getAllInputShapes()[0];
50-
cv::Size tensorSize = cv::Size(tensorDims[tensorDims.size() - 1],
51-
tensorDims[tensorDims.size() - 2]);
52-
53-
cv::Mat rgb;
54-
55-
if (frame.channels() == 4) {
56-
#ifdef __APPLE__
57-
cv::cvtColor(frame, rgb, cv::COLOR_BGRA2RGB);
58-
#else
59-
cv::cvtColor(frame, rgb, cv::COLOR_RGBA2RGB);
60-
#endif
61-
} else if (frame.channels() == 3) {
62-
rgb = frame;
63-
} else {
64-
char errorMessage[100];
65-
std::snprintf(errorMessage, sizeof(errorMessage),
66-
"Unsupported frame format: %d channels", frame.channels());
67-
throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
68-
errorMessage);
69-
}
70-
71-
// Only resize if dimensions don't match
72-
if (rgb.size() != tensorSize) {
73-
cv::Mat resized;
74-
cv::resize(rgb, resized, tensorSize);
75-
return resized;
76-
}
77-
78-
return rgb;
79-
}
80-
8148
std::vector<types::Detection>
8249
ObjectDetection::postprocess(const std::vector<EValue> &tensors,
8350
cv::Size originalSize, double detectionThreshold) {

packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ class ObjectDetection : public VisionModel {
7777
protected:
7878
std::vector<types::Detection> runInference(cv::Mat image,
7979
double detectionThreshold);
80-
cv::Mat preprocessFrame(const cv::Mat &frame) const override;
8180

8281
private:
8382
/**
@@ -100,9 +99,6 @@ class ObjectDetection : public VisionModel {
10099
cv::Size originalSize,
101100
double detectionThreshold);
102101

103-
/// Expected input image dimensions derived from the model's input shape.
104-
cv::Size modelImageSize{0, 0};
105-
106102
/// Optional per-channel mean for input normalisation (set in constructor).
107103
std::optional<cv::Scalar> normMean_;
108104

packages/react-native-executorch/common/rnexecutorch/models/ocr/OCR.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,12 @@ std::vector<types::OCRDetection>
5353
OCR::generateFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData) {
5454
auto frameObj = frameData.asObject(runtime);
5555
cv::Mat frame = ::rnexecutorch::utils::extractFrame(runtime, frameObj);
56+
// Camera sensors deliver landscape frames; rotate to portrait orientation.
57+
if (frame.cols > frame.rows) {
58+
cv::Mat upright;
59+
cv::rotate(frame, upright, cv::ROTATE_90_CLOCKWISE);
60+
frame = std::move(upright);
61+
}
5662
// extractFrame returns RGB; convert to BGR for consistency with readImage
5763
cv::cvtColor(frame, frame, cv::COLOR_RGB2BGR);
5864
return runInference(frame);

0 commit comments

Comments
 (0)