Skip to content

Commit 14f4604

Browse files
refactor: errors, logs, unnecessary comments, use existing TensorPtr
1 parent 3ed6abd commit 14f4604

15 files changed

Lines changed: 626 additions & 567 deletions

File tree

apps/computer-vision/app/camera_object_detection/index.tsx

Lines changed: 384 additions & 265 deletions
Large diffs are not rendered by default.

apps/computer-vision/app/index.tsx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,12 +53,12 @@ export default function Home() {
5353
>
5454
<Text style={styles.buttonText}>Image Generation</Text>
5555
</TouchableOpacity>
56-
<TouchableOpacity
56+
{/* <TouchableOpacity
5757
style={[styles.button, styles.cameraButton]}
5858
onPress={() => router.navigate('camera_object_detection/')}
5959
>
6060
<Text style={styles.buttonText}>🎥 Camera Object Detection</Text>
61-
</TouchableOpacity>
61+
</TouchableOpacity> */}
6262
</View>
6363
</View>
6464
);

apps/computer-vision/app/object_detection/index.tsx

Lines changed: 33 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,26 @@ import ScreenWrapper from '../../ScreenWrapper';
1313
import ColorPalette from '../../colors';
1414
import { Images } from 'react-native-nitro-image';
1515

16-
// Helper function to convert image URI to raw pixel data using NitroImage
16+
// Helper function to convert BGRA to RGB
17+
function convertBGRAtoRGB(
18+
buffer: ArrayBuffer,
19+
width: number,
20+
height: number
21+
): ArrayBuffer {
22+
const source = new Uint8Array(buffer);
23+
const rgb = new Uint8Array(width * height * 3);
24+
25+
for (let i = 0; i < width * height; i++) {
26+
// BGRA format: [B, G, R, A] → RGB: [R, G, B]
27+
rgb[i * 3 + 0] = source[i * 4 + 2]; // R
28+
rgb[i * 3 + 1] = source[i * 4 + 1]; // G
29+
rgb[i * 3 + 2] = source[i * 4 + 0]; // B
30+
}
31+
32+
return rgb.buffer;
33+
}
34+
35+
// Helper function to convert image URI to raw RGB pixel data
1736
async function imageUriToPixelData(
1837
uri: string,
1938
targetWidth: number,
@@ -29,32 +48,19 @@ async function imageUriToPixelData(
2948
const image = await Images.loadFromFileAsync(uri);
3049
const resized = image.resize(targetWidth, targetHeight);
3150

32-
// Get pixel data as ArrayBuffer (RGBA format)
33-
const pixelData = resized.toRawPixelData();
51+
// Get pixel data as ArrayBuffer (BGRA format from NitroImage)
52+
const rawPixelData = resized.toRawPixelData();
3453
const buffer =
35-
pixelData instanceof ArrayBuffer ? pixelData : pixelData.buffer;
36-
37-
// Calculate actual buffer dimensions (accounts for device pixel ratio)
38-
const bufferSize = buffer?.byteLength || 0;
39-
const totalPixels = bufferSize / 4; // RGBA = 4 bytes per pixel
40-
const aspectRatio = targetWidth / targetHeight;
41-
const actualHeight = Math.sqrt(totalPixels / aspectRatio);
42-
const actualWidth = totalPixels / actualHeight;
54+
rawPixelData instanceof ArrayBuffer ? rawPixelData : rawPixelData.buffer;
4355

44-
console.log('Requested:', targetWidth, 'x', targetHeight);
45-
console.log('Buffer size:', bufferSize);
46-
console.log(
47-
'Actual dimensions:',
48-
Math.round(actualWidth),
49-
'x',
50-
Math.round(actualHeight)
51-
);
56+
// Convert BGRA to RGB as required by the native API
57+
const rgbBuffer = convertBGRAtoRGB(buffer, targetWidth, targetHeight);
5258

5359
return {
54-
data: buffer,
55-
width: Math.round(actualWidth),
56-
height: Math.round(actualHeight),
57-
channels: 4, // RGBA
60+
data: rgbBuffer,
61+
width: targetWidth,
62+
height: targetHeight,
63+
channels: 3, // RGB
5864
};
5965
} catch (error) {
6066
console.error('Error loading image with NitroImage:', error);
@@ -106,12 +112,11 @@ export default function ObjectDetectionScreen() {
106112
if (imageUri && imageDimensions) {
107113
try {
108114
console.log('Converting image to pixel data...');
109-
// Resize to 640x640 to avoid memory issues
110-
const intermediateSize = 640;
115+
// Use original dimensions - let the model resize internally
111116
const pixelData = await imageUriToPixelData(
112117
imageUri,
113-
intermediateSize,
114-
intermediateSize
118+
imageDimensions.width,
119+
imageDimensions.height
115120
);
116121

117122
console.log('Running forward with pixel data...', {
@@ -122,7 +127,7 @@ export default function ObjectDetectionScreen() {
122127
});
123128

124129
// Run inference using unified forward() API
125-
const output = await ssdLite.forward(pixelData, 0.5);
130+
const output = await ssdLite.forward(pixelData, 0.3);
126131
console.log('Pixel data result:', output.length, 'detections');
127132
setResults(output);
128133
} catch (e) {

apps/computer-vision/package.json

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,15 +31,14 @@
3131
"react-native-gesture-handler": "~2.28.0",
3232
"react-native-image-picker": "^7.2.2",
3333
"react-native-loading-spinner-overlay": "^3.0.1",
34+
"react-native-nitro-image": "0.10.2",
35+
"react-native-nitro-modules": "0.33.4",
3436
"react-native-reanimated": "~4.2.1",
3537
"react-native-safe-area-context": "~5.6.0",
3638
"react-native-screens": "~4.16.0",
3739
"react-native-svg": "15.12.1",
3840
"react-native-svg-transformer": "^1.5.0",
39-
"react-native-vision-camera": "4.7.3",
40-
"react-native-worklets": "^0.7.2",
41-
"react-native-worklets-core": "^1.6.2",
42-
"vision-camera-resize-plugin": "^3.2.0"
41+
"react-native-worklets": "^0.7.2"
4342
},
4443
"devDependencies": {
4544
"@babel/core": "^7.25.2",

packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,6 @@ getValue<std::vector<int64_t>>(const jsi::Value &val, jsi::Runtime &runtime) {
223223
return getArrayAsVector<int64_t>(val, runtime);
224224
}
225225

226-
// ✅ Fix: Add support for uint64_t vectors (fixes Undefined Symbol error)
227226
template <>
228227
inline std::vector<uint64_t>
229228
getValue<std::vector<uint64_t>>(const jsi::Value &val, jsi::Runtime &runtime) {
@@ -285,7 +284,6 @@ inline std::span<int64_t> getValue<std::span<int64_t>>(const jsi::Value &val,
285284
return getTypedArrayAsSpan<int64_t>(val, runtime);
286285
}
287286

288-
// ✅ Fix: Add support for uint64_t spans (fixes Undefined Symbol error)
289287
template <>
290288
inline std::span<uint64_t>
291289
getValue<std::span<uint64_t>>(const jsi::Value &val, jsi::Runtime &runtime) {
@@ -328,8 +326,6 @@ inline jsi::Value getJsiValue(const std::vector<char> &vec,
328326
return {runtime, array};
329327
}
330328

331-
// ✅ Fix: Add support for uint64_t (unsigned long long) vectors
332-
// This fixes the error in TokenizerModule::encode/decode
333329
inline jsi::Value getJsiValue(const std::vector<uint64_t> &vec,
334330
jsi::Runtime &runtime) {
335331
jsi::Array array(runtime, vec.size());
@@ -340,7 +336,6 @@ inline jsi::Value getJsiValue(const std::vector<uint64_t> &vec,
340336
return {runtime, array};
341337
}
342338

343-
// ✅ Fix: Add support for int64_t vectors
344339
inline jsi::Value getJsiValue(const std::vector<int64_t> &vec,
345340
jsi::Runtime &runtime) {
346341
jsi::Array array(runtime, vec.size());

packages/react-native-executorch/common/rnexecutorch/host_objects/ModelHostObject.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -158,14 +158,12 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
158158
// "setFixedModel"));
159159
}
160160

161-
// Register generateFromFrame for all VisionModel subclasses
162161
if constexpr (meta::DerivedFromOrSameAs<Model, models::VisionModel>) {
163162
addFunctions(JSI_EXPORT_FUNCTION(
164163
ModelHostObject<Model>, visionHostFunction<&Model::generateFromFrame>,
165164
"generateFromFrame"));
166165
}
167166

168-
// Register generateFromPixels for models that support it
169167
if constexpr (meta::HasGenerateFromPixels<Model>) {
170168
addFunctions(
171169
JSI_EXPORT_FUNCTION(ModelHostObject<Model>,
@@ -235,7 +233,7 @@ template <typename Model> class ModelHostObject : public JsiHostObject {
235233
}
236234

237235
try {
238-
// 2. The Magic Trick 🪄
236+
// 2. The Magic Trick
239237
// We get a pointer to a dummy function: void dummy(Rest...) {}
240238
// This function has exactly the signature of the arguments we want to
241239
// parse.

packages/react-native-executorch/common/rnexecutorch/models/VisionModel.cpp

Lines changed: 38 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
11
#include "VisionModel.h"
2+
#include <rnexecutorch/Error.h>
3+
#include <rnexecutorch/ErrorCodes.h>
4+
#include <rnexecutorch/Log.h>
5+
#include <rnexecutorch/host_objects/JSTensorViewIn.h>
6+
#include <rnexecutorch/host_objects/JsiConversions.h>
27
#include <rnexecutorch/utils/FrameProcessor.h>
38

49
namespace rnexecutorch {
@@ -18,45 +23,47 @@ cv::Mat VisionModel::extractFromFrame(jsi::Runtime &runtime,
1823

1924
cv::Mat VisionModel::extractFromPixels(jsi::Runtime &runtime,
2025
const jsi::Object &pixelData) const {
21-
// Extract width, height, and channels
22-
if (!pixelData.hasProperty(runtime, "width") ||
23-
!pixelData.hasProperty(runtime, "height") ||
24-
!pixelData.hasProperty(runtime, "channels") ||
25-
!pixelData.hasProperty(runtime, "data")) {
26-
throw std::runtime_error(
27-
"Invalid pixel data: must contain width, height, channels, and data");
28-
}
29-
30-
int width = pixelData.getProperty(runtime, "width").asNumber();
31-
int height = pixelData.getProperty(runtime, "height").asNumber();
32-
int channels = pixelData.getProperty(runtime, "channels").asNumber();
26+
// PixelData follows TensorPtr structure (dataPtr, sizes, scalarType)
27+
// Use JSI conversion helper to extract the data
28+
auto tensorView = jsi::fromHostObject<JSTensorViewIn>(runtime, pixelData);
3329

34-
// Get the ArrayBuffer
35-
auto dataValue = pixelData.getProperty(runtime, "data");
36-
if (!dataValue.isObject() ||
37-
!dataValue.asObject(runtime).isArrayBuffer(runtime)) {
38-
throw std::runtime_error(
39-
"pixel data 'data' property must be an ArrayBuffer");
30+
// Validate dimensions: sizes must be [height, width, channels]
31+
if (tensorView.sizes.size() != 3) {
32+
char errorMessage[100];
33+
std::snprintf(errorMessage, sizeof(errorMessage),
34+
"Invalid pixel data: sizes must have 3 elements "
35+
"[height, width, channels], got %zu",
36+
tensorView.sizes.size());
37+
throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
38+
errorMessage);
4039
}
4140

42-
auto arrayBuffer = dataValue.asObject(runtime).getArrayBuffer(runtime);
43-
size_t expectedSize = width * height * channels;
41+
int height = tensorView.sizes[0];
42+
int width = tensorView.sizes[1];
43+
int channels = tensorView.sizes[2];
4444

45-
if (arrayBuffer.size(runtime) != expectedSize) {
46-
throw std::runtime_error(
47-
"ArrayBuffer size does not match width * height * channels");
45+
// Pixel data must be RGB (3 channels) and BYTE type
46+
if (channels != 3) {
47+
char errorMessage[100];
48+
std::snprintf(errorMessage, sizeof(errorMessage),
49+
"Invalid pixel data: expected 3 channels (RGB), got %d",
50+
channels);
51+
throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
52+
errorMessage);
4853
}
4954

50-
// Create cv::Mat and copy the data
51-
// OpenCV uses BGR/BGRA format internally, but we'll create as-is and let
52-
// preprocessFrame handle conversion
53-
int cvType = (channels == 3) ? CV_8UC3 : CV_8UC4;
54-
cv::Mat image(height, width, cvType);
55+
if (tensorView.scalarType != ScalarType::Byte) {
56+
throw RnExecutorchError(
57+
RnExecutorchErrorCode::InvalidUserInput,
58+
"Invalid pixel data: scalarType must be BYTE (Uint8Array)");
59+
}
5560

56-
// Copy data from ArrayBuffer to cv::Mat
57-
std::memcpy(image.data, arrayBuffer.data(runtime), expectedSize);
61+
// Create cv::Mat directly from dataPtr (zero-copy view)
62+
uint8_t *dataPtr = static_cast<uint8_t *>(tensorView.dataPtr);
63+
cv::Mat image(height, width, CV_8UC3, dataPtr);
5864

59-
return image;
65+
// Clone to own the data, since JS memory may be GC'd
66+
return image.clone();
6067
}
6168

6269
} // namespace models

0 commit comments

Comments
 (0)