Skip to content

Commit f552710

Browse files
refactor: errors, logs, unnecessary comments, use existing TensorPtr
1 parent 3527d01 commit f552710

12 files changed

Lines changed: 256 additions & 229 deletions

File tree

apps/computer-vision/app/object_detection/index.tsx

Lines changed: 33 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,26 @@ import ScreenWrapper from '../../ScreenWrapper';
1313
import ColorPalette from '../../colors';
1414
import { Images } from 'react-native-nitro-image';
1515

16-
// Helper function to convert image URI to raw pixel data using NitroImage
16+
// Helper function to convert BGRA to RGB
17+
function convertBGRAtoRGB(
18+
buffer: ArrayBuffer,
19+
width: number,
20+
height: number
21+
): ArrayBuffer {
22+
const source = new Uint8Array(buffer);
23+
const rgb = new Uint8Array(width * height * 3);
24+
25+
for (let i = 0; i < width * height; i++) {
26+
// BGRA format: [B, G, R, A] → RGB: [R, G, B]
27+
rgb[i * 3 + 0] = source[i * 4 + 2]; // R
28+
rgb[i * 3 + 1] = source[i * 4 + 1]; // G
29+
rgb[i * 3 + 2] = source[i * 4 + 0]; // B
30+
}
31+
32+
return rgb.buffer;
33+
}
34+
35+
// Helper function to convert image URI to raw RGB pixel data
1736
async function imageUriToPixelData(
1837
uri: string,
1938
targetWidth: number,
@@ -29,32 +48,19 @@ async function imageUriToPixelData(
2948
const image = await Images.loadFromFileAsync(uri);
3049
const resized = image.resize(targetWidth, targetHeight);
3150

32-
// Get pixel data as ArrayBuffer (RGBA format)
33-
const pixelData = resized.toRawPixelData();
51+
// Get pixel data as ArrayBuffer (BGRA format from NitroImage)
52+
const rawPixelData = resized.toRawPixelData();
3453
const buffer =
35-
pixelData instanceof ArrayBuffer ? pixelData : pixelData.buffer;
36-
37-
// Calculate actual buffer dimensions (accounts for device pixel ratio)
38-
const bufferSize = buffer?.byteLength || 0;
39-
const totalPixels = bufferSize / 4; // RGBA = 4 bytes per pixel
40-
const aspectRatio = targetWidth / targetHeight;
41-
const actualHeight = Math.sqrt(totalPixels / aspectRatio);
42-
const actualWidth = totalPixels / actualHeight;
54+
rawPixelData instanceof ArrayBuffer ? rawPixelData : rawPixelData.buffer;
4355

44-
console.log('Requested:', targetWidth, 'x', targetHeight);
45-
console.log('Buffer size:', bufferSize);
46-
console.log(
47-
'Actual dimensions:',
48-
Math.round(actualWidth),
49-
'x',
50-
Math.round(actualHeight)
51-
);
56+
// Convert BGRA to RGB as required by the native API
57+
const rgbBuffer = convertBGRAtoRGB(buffer, targetWidth, targetHeight);
5258

5359
return {
54-
data: buffer,
55-
width: Math.round(actualWidth),
56-
height: Math.round(actualHeight),
57-
channels: 4, // RGBA
60+
data: rgbBuffer,
61+
width: targetWidth,
62+
height: targetHeight,
63+
channels: 3, // RGB
5864
};
5965
} catch (error) {
6066
console.error('Error loading image with NitroImage:', error);
@@ -106,12 +112,11 @@ export default function ObjectDetectionScreen() {
106112
if (imageUri && imageDimensions) {
107113
try {
108114
console.log('Converting image to pixel data...');
109-
// Resize to 640x640 to avoid memory issues
110-
const intermediateSize = 640;
115+
// Use original dimensions - let the model resize internally
111116
const pixelData = await imageUriToPixelData(
112117
imageUri,
113-
intermediateSize,
114-
intermediateSize
118+
imageDimensions.width,
119+
imageDimensions.height
115120
);
116121

117122
console.log('Running forward with pixel data...', {
@@ -122,7 +127,7 @@ export default function ObjectDetectionScreen() {
122127
});
123128

124129
// Run inference using unified forward() API
125-
const output = await ssdLite.forward(pixelData, 0.5);
130+
const output = await ssdLite.forward(pixelData, 0.3);
126131
console.log('Pixel data result:', output.length, 'detections');
127132
setResults(output);
128133
} catch (e) {

apps/computer-vision/package.json

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
"@react-navigation/native": "^7.1.6",
1818
"@shopify/react-native-skia": "2.2.12",
1919
"expo": "^54.0.27",
20+
"expo-build-properties": "~1.0.10",
2021
"expo-constants": "~18.0.11",
2122
"expo-font": "~14.0.10",
2223
"expo-linking": "~8.0.10",
@@ -30,17 +31,19 @@
3031
"react-native-gesture-handler": "~2.28.0",
3132
"react-native-image-picker": "^7.2.2",
3233
"react-native-loading-spinner-overlay": "^3.0.1",
33-
"react-native-reanimated": "~4.1.1",
34+
"react-native-nitro-image": "0.10.2",
35+
"react-native-nitro-modules": "0.33.4",
36+
"react-native-reanimated": "~4.2.1",
3437
"react-native-safe-area-context": "~5.6.0",
3538
"react-native-screens": "~4.16.0",
3639
"react-native-svg": "15.12.1",
3740
"react-native-svg-transformer": "^1.5.0",
38-
"react-native-worklets": "0.5.1"
41+
"react-native-worklets": "^0.7.2"
3942
},
4043
"devDependencies": {
4144
"@babel/core": "^7.25.2",
4245
"@types/pngjs": "^6.0.5",
43-
"@types/react": "~19.1.10"
46+
"@types/react": "~19.2.0"
4447
},
4548
"private": true
4649
}

packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,25 @@ inline jsi::Value getJsiValue(uint64_t val, jsi::Runtime &runtime) {
360360
return {runtime, bigInt};
361361
}
362362

363+
inline jsi::Value getJsiValue(const std::vector<uint64_t> &vec,
364+
jsi::Runtime &runtime) {
365+
jsi::Array array(runtime, vec.size());
366+
for (size_t i = 0; i < vec.size(); i++) {
367+
// JS numbers are doubles. Large uint64s > 2^53 will lose precision.
368+
array.setValueAtIndex(runtime, i, jsi::Value(static_cast<double>(vec[i])));
369+
}
370+
return {runtime, array};
371+
}
372+
373+
inline jsi::Value getJsiValue(const std::vector<int64_t> &vec,
374+
jsi::Runtime &runtime) {
375+
jsi::Array array(runtime, vec.size());
376+
for (size_t i = 0; i < vec.size(); i++) {
377+
array.setValueAtIndex(runtime, i, jsi::Value(static_cast<double>(vec[i])));
378+
}
379+
return {runtime, array};
380+
}
381+
363382
inline jsi::Value getJsiValue(int val, jsi::Runtime &runtime) {
364383
return {runtime, val};
365384
}

packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h

Lines changed: 60 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,14 +158,12 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
158158
"stream"));
159159
}
160160

161-
// Register generateFromFrame for all VisionModel subclasses
162161
if constexpr (meta::DerivedFromOrSameAs<Model, models::VisionModel>) {
163162
addFunctions(JSI_EXPORT_FUNCTION(
164163
ModelHostObject<Model>, synchronousHostFunction<&Model::streamStop>,
165164
"streamStop"));
166165
}
167166

168-
// Register generateFromPixels for models that support it
169167
if constexpr (meta::HasGenerateFromPixels<Model>) {
170168
addFunctions(
171169
JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
@@ -221,6 +219,66 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
221219
}
222220
}
223221

222+
template <auto FnPtr> JSI_HOST_FUNCTION(visionHostFunction) {
223+
// 1. Check Argument Count
224+
// (We rely on our new FunctionTraits)
225+
constexpr std::size_t cppArgCount =
226+
meta::FunctionTraits<decltype(FnPtr)>::arity;
227+
228+
// We expect JS args = (Total C++ Args) - (2 injected args: Runtime + Value)
229+
constexpr std::size_t expectedJsArgs = cppArgCount - 1;
230+
log(LOG_LEVEL::Debug, cppArgCount, count);
231+
if (count != expectedJsArgs) {
232+
throw jsi::JSError(runtime, "Argument count mismatch in vision function");
233+
}
234+
235+
try {
236+
// 2. The Magic Trick
237+
// We get a pointer to a dummy function: void dummy(Rest...) {}
238+
// This function has exactly the signature of the arguments we want to
239+
// parse.
240+
auto dummyFuncPtr = &meta::TailSignature<decltype(FnPtr)>::dummy;
241+
242+
// 3. Let existing helpers do the work
243+
// We pass the dummy pointer. The helper inspects its arguments (Rest...)
244+
// and converts args[0]...args[N] accordingly.
245+
// Note: We pass (args + 1) because JS args[0] is the PixelData, which we
246+
// handle manually. Note: We use expectedJsArgs - 1 because we skipped one
247+
// JS arg.
248+
auto tailArgsTuple =
249+
meta::createArgsTupleFromJsi(dummyFuncPtr, args + 1, runtime);
250+
251+
// 4. Invoke
252+
using ReturnType =
253+
typename meta::FunctionTraits<decltype(FnPtr)>::return_type;
254+
255+
if constexpr (std::is_void_v<ReturnType>) {
256+
std::apply(
257+
[&](auto &&...tailArgs) {
258+
(model.get()->*FnPtr)(
259+
runtime,
260+
args[0], // 1. PixelData (Manually passed)
261+
std::forward<decltype(tailArgs)>(
262+
tailArgs)...); // 2. The rest (Auto parsed)
263+
},
264+
std::move(tailArgsTuple));
265+
return jsi::Value::undefined();
266+
} else {
267+
auto result = std::apply(
268+
[&](auto &&...tailArgs) {
269+
return (model.get()->*FnPtr)(
270+
runtime, args[0],
271+
std::forward<decltype(tailArgs)>(tailArgs)...);
272+
},
273+
std::move(tailArgsTuple));
274+
275+
return jsi_conversion::getJsiValue(std::move(result), runtime);
276+
}
277+
} catch (const std::exception &e) {
278+
throw jsi::JSError(runtime, e.what());
279+
}
280+
}
281+
224282
// A generic host function that resolves a promise with a result of a
225283
// function. JSI arguments are converted to the types provided in the function
226284
// signature, and the return value is converted back to JSI before resolving.

packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp

Lines changed: 38 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
11
#include "VisionModel.h"
2+
#include <rnexecutorch/Error.h>
3+
#include <rnexecutorch/ErrorCodes.h>
4+
#include <rnexecutorch/Log.h>
5+
#include <rnexecutorch/host_objects/JSTensorViewIn.h>
6+
#include <rnexecutorch/host_objects/JsiConversions.h>
27
#include <rnexecutorch/utils/FrameProcessor.h>
38

49
namespace rnexecutorch {
@@ -18,45 +23,47 @@ cv::Mat VisionModel::extractFromFrame(jsi::Runtime &runtime,
1823

1924
cv::Mat VisionModel::extractFromPixels(jsi::Runtime &runtime,
2025
const jsi::Object &pixelData) const {
21-
// Extract width, height, and channels
22-
if (!pixelData.hasProperty(runtime, "width") ||
23-
!pixelData.hasProperty(runtime, "height") ||
24-
!pixelData.hasProperty(runtime, "channels") ||
25-
!pixelData.hasProperty(runtime, "data")) {
26-
throw std::runtime_error(
27-
"Invalid pixel data: must contain width, height, channels, and data");
28-
}
29-
30-
int width = pixelData.getProperty(runtime, "width").asNumber();
31-
int height = pixelData.getProperty(runtime, "height").asNumber();
32-
int channels = pixelData.getProperty(runtime, "channels").asNumber();
26+
// PixelData follows TensorPtr structure (dataPtr, sizes, scalarType)
27+
// Use JSI conversion helper to extract the data
28+
auto tensorView = jsi::fromHostObject<JSTensorViewIn>(runtime, pixelData);
3329

34-
// Get the ArrayBuffer
35-
auto dataValue = pixelData.getProperty(runtime, "data");
36-
if (!dataValue.isObject() ||
37-
!dataValue.asObject(runtime).isArrayBuffer(runtime)) {
38-
throw std::runtime_error(
39-
"pixel data 'data' property must be an ArrayBuffer");
30+
// Validate dimensions: sizes must be [height, width, channels]
31+
if (tensorView.sizes.size() != 3) {
32+
char errorMessage[100];
33+
std::snprintf(errorMessage, sizeof(errorMessage),
34+
"Invalid pixel data: sizes must have 3 elements "
35+
"[height, width, channels], got %zu",
36+
tensorView.sizes.size());
37+
throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
38+
errorMessage);
4039
}
4140

42-
auto arrayBuffer = dataValue.asObject(runtime).getArrayBuffer(runtime);
43-
size_t expectedSize = width * height * channels;
41+
int height = tensorView.sizes[0];
42+
int width = tensorView.sizes[1];
43+
int channels = tensorView.sizes[2];
4444

45-
if (arrayBuffer.size(runtime) != expectedSize) {
46-
throw std::runtime_error(
47-
"ArrayBuffer size does not match width * height * channels");
45+
// Pixel data must be RGB (3 channels) and BYTE type
46+
if (channels != 3) {
47+
char errorMessage[100];
48+
std::snprintf(errorMessage, sizeof(errorMessage),
49+
"Invalid pixel data: expected 3 channels (RGB), got %d",
50+
channels);
51+
throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
52+
errorMessage);
4853
}
4954

50-
// Create cv::Mat and copy the data
51-
// OpenCV uses BGR/BGRA format internally, but we'll create as-is and let
52-
// preprocessFrame handle conversion
53-
int cvType = (channels == 3) ? CV_8UC3 : CV_8UC4;
54-
cv::Mat image(height, width, cvType);
55+
if (tensorView.scalarType != ScalarType::Byte) {
56+
throw RnExecutorchError(
57+
RnExecutorchErrorCode::InvalidUserInput,
58+
"Invalid pixel data: scalarType must be BYTE (Uint8Array)");
59+
}
5560

56-
// Copy data from ArrayBuffer to cv::Mat
57-
std::memcpy(image.data, arrayBuffer.data(runtime), expectedSize);
61+
// Create cv::Mat directly from dataPtr (zero-copy view)
62+
uint8_t *dataPtr = static_cast<uint8_t *>(tensorView.dataPtr);
63+
cv::Mat image(height, width, CV_8UC3, dataPtr);
5864

59-
return image;
65+
// Clone to own the data, since JS memory may be GC'd
66+
return image.clone();
6067
}
6168

6269
} // namespace models

0 commit comments

Comments
 (0)